initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
411
thirdparty/embree/kernels/builders/bvh_builder_hair.h
vendored
Normal file
411
thirdparty/embree/kernels/builders/bvh_builder_hair.h
vendored
Normal file
@@ -0,0 +1,411 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../geometry/primitive.h"
|
||||
#include "../builders/bvh_builder_sah.h"
|
||||
#include "../builders/heuristic_binning_array_aligned.h"
|
||||
#include "../builders/heuristic_binning_array_unaligned.h"
|
||||
#include "../builders/heuristic_strand_array.h"
|
||||
|
||||
#define NUM_HAIR_OBJECT_BINS 32
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct BVHBuilderHair
|
||||
{
|
||||
/*! settings for builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
size_t finished_range_threshold; //!< finished range threshold
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeFunc,
|
||||
typename SetAABBNodeFunc,
|
||||
typename CreateOBBNodeFunc,
|
||||
typename SetOBBNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor,
|
||||
typename ReportFinishedRangeFunc>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
friend struct BVHBuilderHair;
|
||||
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
|
||||
typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
|
||||
typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
|
||||
|
||||
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
|
||||
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
|
||||
|
||||
static const size_t travCostAligned = 1;
|
||||
static const size_t travCostUnaligned = 5;
|
||||
static const size_t intCost = 6;
|
||||
|
||||
BuilderT (Scene* scene,
|
||||
PrimRef* prims,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeFunc& createAABBNode,
|
||||
const SetAABBNodeFunc& setAABBNode,
|
||||
const CreateOBBNodeFunc& createOBBNode,
|
||||
const SetOBBNodeFunc& setOBBNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const ReportFinishedRangeFunc& reportFinishedRange,
|
||||
const Settings settings)
|
||||
|
||||
: cfg(settings),
|
||||
prims(prims),
|
||||
createAlloc(createAlloc),
|
||||
createAABBNode(createAABBNode),
|
||||
setAABBNode(setAABBNode),
|
||||
createOBBNode(createOBBNode),
|
||||
setOBBNode(setOBBNode),
|
||||
createLeaf(createLeaf),
|
||||
progressMonitor(progressMonitor),
|
||||
reportFinishedRange(reportFinishedRange),
|
||||
alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
|
||||
|
||||
/*! checks if all primitives are from the same geometry */
|
||||
__forceinline bool sameGeometry(const PrimInfoRange& range)
|
||||
{
|
||||
if (range.size() == 0) return true;
|
||||
unsigned int firstGeomID = prims[range.begin()].geomID();
|
||||
for (size_t i=range.begin()+1; i<range.end(); i++) {
|
||||
if (prims[i].geomID() != firstGeomID){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! creates a large leaf that could be larger than supported by the BVH */
|
||||
NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
|
||||
return createLeaf(prims,pinfo,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
PrimInfoRange children[MAX_BRANCHING_FACTOR];
|
||||
unsigned numChildren = 1;
|
||||
children[0] = pinfo;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
int bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (unsigned i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
|
||||
continue;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
__aligned(64) PrimInfoRange left, right;
|
||||
if (!sameGeometry(children[bestChild])) {
|
||||
alignedHeuristic.splitByGeometry(children[bestChild],left,right);
|
||||
} else {
|
||||
alignedHeuristic.splitFallback(children[bestChild],left,right);
|
||||
}
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < cfg.branchingFactor);
|
||||
|
||||
/* create node */
|
||||
auto node = createAABBNode(alloc);
|
||||
|
||||
for (size_t i=0; i<numChildren; i++) {
|
||||
const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
|
||||
setAABBNode(node,i,child,children[i].geomBounds);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/*! performs split */
|
||||
__noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
|
||||
{
|
||||
/* variable to track the SAH of the best splitting approach */
|
||||
float bestSAH = inf;
|
||||
const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
|
||||
const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
|
||||
|
||||
/* try standard binning in aligned space */
|
||||
float alignedObjectSAH = inf;
|
||||
HeuristicBinningSAH::Split alignedObjectSplit;
|
||||
if (aligned) {
|
||||
alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
|
||||
alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
|
||||
bestSAH = min(alignedObjectSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* try standard binning in unaligned space */
|
||||
UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
|
||||
LinearSpace3fa uspace;
|
||||
float unalignedObjectSAH = inf;
|
||||
if (bestSAH > 0.7f*leafSAH) {
|
||||
uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
|
||||
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
|
||||
unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
|
||||
unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
|
||||
bestSAH = min(unalignedObjectSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* try splitting into two strands */
|
||||
HeuristicStrandSplitSAH::Split strandSplit;
|
||||
float strandSAH = inf;
|
||||
if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
|
||||
strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
|
||||
strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
|
||||
bestSAH = min(strandSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* fallback if SAH heuristics failed */
|
||||
if (unlikely(!std::isfinite(bestSAH)))
|
||||
{
|
||||
alignedHeuristic.deterministic_order(pinfo);
|
||||
alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
/* perform aligned split if this is best */
|
||||
else if (bestSAH == alignedObjectSAH) {
|
||||
alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
/* perform unaligned split if this is best */
|
||||
else if (bestSAH == unalignedObjectSAH) {
|
||||
unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
|
||||
aligned = false;
|
||||
}
|
||||
|
||||
/* perform strand split if this is best */
|
||||
else if (bestSAH == strandSAH) {
|
||||
strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
|
||||
aligned = false;
|
||||
}
|
||||
|
||||
/* can never happen */
|
||||
else
|
||||
assert(false);
|
||||
}
|
||||
|
||||
/*! recursive build */
|
||||
NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
|
||||
progressMonitor(pinfo.size());
|
||||
|
||||
PrimInfoRange children[MAX_BRANCHING_FACTOR];
|
||||
|
||||
/* create leaf node */
|
||||
if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
|
||||
alignedHeuristic.deterministic_order(pinfo);
|
||||
return createLargeLeaf(depth,pinfo,alloc);
|
||||
}
|
||||
|
||||
/* fill all children by always splitting the one with the largest surface area */
|
||||
size_t numChildren = 1;
|
||||
children[0] = pinfo;
|
||||
bool aligned = true;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
ssize_t bestChild = -1;
|
||||
float bestArea = neg_inf;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.minLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest area */
|
||||
if (area(children[i].geomBounds) > bestArea) {
|
||||
bestArea = area(children[i].geomBounds);
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
PrimInfoRange left, right;
|
||||
split(children[bestChild],left,right,aligned);
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < cfg.branchingFactor);
|
||||
|
||||
NodeRef node;
|
||||
|
||||
/* create aligned node */
|
||||
if (aligned)
|
||||
{
|
||||
node = createAABBNode(alloc);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else {
|
||||
for (size_t i=0; i<numChildren; i++) {
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* create unaligned node */
|
||||
else
|
||||
{
|
||||
node = createOBBNode(alloc);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
|
||||
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
|
||||
const OBBox3fa obounds(space,sinfo.geomBounds);
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<numChildren; i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
|
||||
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
|
||||
const OBBox3fa obounds(space,sinfo.geomBounds);
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* reports a finished range of primrefs */
|
||||
if (unlikely(alloc_barrier))
|
||||
reportFinishedRange(pinfo);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
PrimRef* prims;
|
||||
const CreateAllocFunc& createAlloc;
|
||||
const CreateAABBNodeFunc& createAABBNode;
|
||||
const SetAABBNodeFunc& setAABBNode;
|
||||
const CreateOBBNodeFunc& createOBBNode;
|
||||
const SetOBBNodeFunc& setOBBNode;
|
||||
const CreateLeafFunc& createLeaf;
|
||||
const ProgressMonitor& progressMonitor;
|
||||
const ReportFinishedRangeFunc& reportFinishedRange;
|
||||
|
||||
private:
|
||||
HeuristicBinningSAH alignedHeuristic;
|
||||
UnalignedHeuristicBinningSAH unalignedHeuristic;
|
||||
HeuristicStrandSplitSAH strandHeuristic;
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeFunc,
|
||||
typename SetAABBNodeFunc,
|
||||
typename CreateOBBNodeFunc,
|
||||
typename SetOBBNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor,
|
||||
typename ReportFinishedRangeFunc>
|
||||
|
||||
static NodeRef build (const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeFunc& createAABBNode,
|
||||
const SetAABBNodeFunc& setAABBNode,
|
||||
const CreateOBBNodeFunc& createOBBNode,
|
||||
const SetOBBNodeFunc& setOBBNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const ReportFinishedRangeFunc& reportFinishedRange,
|
||||
Scene* scene,
|
||||
PrimRef* prims,
|
||||
const PrimInfo& pinfo,
|
||||
const Settings settings)
|
||||
{
|
||||
typedef BuilderT<NodeRef,
|
||||
CreateAllocFunc,
|
||||
CreateAABBNodeFunc,SetAABBNodeFunc,
|
||||
CreateOBBNodeFunc,SetOBBNodeFunc,
|
||||
CreateLeafFunc,ProgressMonitor,
|
||||
ReportFinishedRangeFunc> Builder;
|
||||
|
||||
Builder builder(scene,prims,createAlloc,
|
||||
createAABBNode,setAABBNode,
|
||||
createOBBNode,setOBBNode,
|
||||
createLeaf,progressMonitor,reportFinishedRange,settings);
|
||||
|
||||
NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
502
thirdparty/embree/kernels/builders/bvh_builder_morton.h
vendored
Normal file
502
thirdparty/embree/kernels/builders/bvh_builder_morton.h
vendored
Normal file
@@ -0,0 +1,502 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/builder.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_sort.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct BVHBuilderMorton
|
||||
{
|
||||
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
|
||||
|
||||
/*! settings for morton builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024) {}
|
||||
|
||||
/*! initialize settings from API settings */
|
||||
Settings (const RTCBuildArguments& settings)
|
||||
: branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024)
|
||||
{
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
|
||||
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
Settings (size_t branchingFactor, size_t maxDepth, size_t minLeafSize, size_t maxLeafSize, size_t singleThreadThreshold)
|
||||
: branchingFactor(branchingFactor), maxDepth(maxDepth), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), singleThreadThreshold(singleThreadThreshold)
|
||||
{
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
|
||||
};
|
||||
|
||||
/*! Build primitive consisting of morton code and primitive ID. */
|
||||
struct __aligned(8) BuildPrim
|
||||
{
|
||||
union {
|
||||
struct {
|
||||
unsigned int code; //!< morton code
|
||||
unsigned int index; //!< i'th primitive
|
||||
};
|
||||
uint64_t t;
|
||||
};
|
||||
|
||||
/*! interface for radix sort */
|
||||
__forceinline operator unsigned() const { return code; }
|
||||
|
||||
/*! interface for standard sort */
|
||||
__forceinline bool operator<(const BuildPrim &m) const { return code < m.code; }
|
||||
};
|
||||
|
||||
/*! maps bounding box to morton code */
|
||||
struct MortonCodeMapping
|
||||
{
|
||||
static const size_t LATTICE_BITS_PER_DIM = 10;
|
||||
static const size_t LATTICE_SIZE_PER_DIM = size_t(1) << LATTICE_BITS_PER_DIM;
|
||||
|
||||
vfloat4 base;
|
||||
vfloat4 scale;
|
||||
|
||||
__forceinline MortonCodeMapping(const BBox3fa& bounds)
|
||||
{
|
||||
base = (vfloat4)bounds.lower;
|
||||
const vfloat4 diag = (vfloat4)bounds.upper - (vfloat4)bounds.lower;
|
||||
scale = select(diag > vfloat4(1E-19f), rcp(diag) * vfloat4(LATTICE_SIZE_PER_DIM * 0.99f),vfloat4(0.0f));
|
||||
}
|
||||
|
||||
__forceinline const vint4 bin (const BBox3fa& box) const
|
||||
{
|
||||
const vfloat4 lower = (vfloat4)box.lower;
|
||||
const vfloat4 upper = (vfloat4)box.upper;
|
||||
const vfloat4 centroid = lower+upper;
|
||||
return vint4((centroid-base)*scale);
|
||||
}
|
||||
|
||||
__forceinline unsigned int code (const BBox3fa& box) const
|
||||
{
|
||||
const vint4 binID = bin(box);
|
||||
const unsigned int x = extract<0>(binID);
|
||||
const unsigned int y = extract<1>(binID);
|
||||
const unsigned int z = extract<2>(binID);
|
||||
const unsigned int xyz = bitInterleave(x,y,z);
|
||||
return xyz;
|
||||
}
|
||||
};
|
||||
|
||||
#if defined (__AVX2__) || defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
/*! for AVX2 there is a fast scalar bitInterleave */
|
||||
struct MortonCodeGenerator
|
||||
{
|
||||
__forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
|
||||
: mapping(mapping), dest(dest) {}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& b, const unsigned index)
|
||||
{
|
||||
dest->index = index;
|
||||
dest->code = mapping.code(b);
|
||||
dest++;
|
||||
}
|
||||
|
||||
public:
|
||||
const MortonCodeMapping mapping;
|
||||
BuildPrim* dest;
|
||||
size_t currentID;
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/*! before AVX2 is it better to use the SSE version of bitInterleave */
|
||||
struct MortonCodeGenerator
|
||||
{
|
||||
__forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
|
||||
: mapping(mapping), dest(dest), currentID(0), slots(0), ax(0), ay(0), az(0), ai(0) {}
|
||||
|
||||
__forceinline ~MortonCodeGenerator()
|
||||
{
|
||||
if (slots != 0)
|
||||
{
|
||||
const vint4 code = bitInterleave(ax,ay,az);
|
||||
for (size_t i=0; i<slots; i++) {
|
||||
dest[currentID-slots+i].index = ai[i];
|
||||
dest[currentID-slots+i].code = code[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& b, const unsigned index)
|
||||
{
|
||||
const vint4 binID = mapping.bin(b);
|
||||
ax[slots] = extract<0>(binID);
|
||||
ay[slots] = extract<1>(binID);
|
||||
az[slots] = extract<2>(binID);
|
||||
ai[slots] = index;
|
||||
slots++;
|
||||
currentID++;
|
||||
|
||||
if (slots == 4)
|
||||
{
|
||||
const vint4 code = bitInterleave(ax,ay,az);
|
||||
vint4::storeu(&dest[currentID-4],unpacklo(code,ai));
|
||||
vint4::storeu(&dest[currentID-2],unpackhi(code,ai));
|
||||
slots = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
const MortonCodeMapping mapping;
|
||||
BuildPrim* dest;
|
||||
size_t currentID;
|
||||
size_t slots;
|
||||
vint4 ax, ay, az, ai;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename Allocator,
|
||||
typename CreateAllocator,
|
||||
typename CreateNodeFunc,
|
||||
typename SetNodeBoundsFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CalculateBounds,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT : private Settings
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
|
||||
public:
|
||||
|
||||
BuilderT (CreateAllocator& createAllocator,
|
||||
CreateNodeFunc& createNode,
|
||||
SetNodeBoundsFunc& setBounds,
|
||||
CreateLeafFunc& createLeaf,
|
||||
CalculateBounds& calculateBounds,
|
||||
ProgressMonitor& progressMonitor,
|
||||
const Settings& settings)
|
||||
|
||||
: Settings(settings),
|
||||
createAllocator(createAllocator),
|
||||
createNode(createNode),
|
||||
setBounds(setBounds),
|
||||
createLeaf(createLeaf),
|
||||
calculateBounds(calculateBounds),
|
||||
progressMonitor(progressMonitor),
|
||||
morton(nullptr) {}
|
||||
|
||||
ReductionTy createLargeLeaf(size_t depth, const range<unsigned>& current, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (depth > maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.size() <= maxLeafSize)
|
||||
return createLeaf(current,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
range<unsigned> children[MAX_BRANCHING_FACTOR];
|
||||
size_t numChildren = 1;
|
||||
children[0] = current;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest number of primitives */
|
||||
size_t bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= maxLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == size_t(-1)) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
auto split = children[bestChild].split();
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = split.first;
|
||||
children[numChildren+0] = split.second;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < branchingFactor);
|
||||
|
||||
/* create node */
|
||||
auto node = createNode(alloc,numChildren);
|
||||
|
||||
/* recurse into each child */
|
||||
ReductionTy bounds[MAX_BRANCHING_FACTOR];
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
bounds[i] = createLargeLeaf(depth+1,children[i],alloc);
|
||||
|
||||
return setBounds(node,bounds,numChildren);
|
||||
}
|
||||
|
||||
/*! recreates morton codes when reaching a region where all codes are identical */
|
||||
__noinline void recreateMortonCodes(const range<unsigned>& current) const
|
||||
{
|
||||
/* fast path for small ranges */
|
||||
if (likely(current.size() < 1024))
|
||||
{
|
||||
/*! recalculate centroid bounds */
|
||||
BBox3fa centBounds(empty);
|
||||
for (size_t i=current.begin(); i<current.end(); i++)
|
||||
centBounds.extend(center2(calculateBounds(morton[i])));
|
||||
|
||||
/* recalculate morton codes */
|
||||
MortonCodeMapping mapping(centBounds);
|
||||
for (size_t i=current.begin(); i<current.end(); i++)
|
||||
morton[i].code = mapping.code(calculateBounds(morton[i]));
|
||||
|
||||
/* sort morton codes */
|
||||
std::sort(morton+current.begin(),morton+current.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
/*! recalculate centroid bounds */
|
||||
auto calculateCentBounds = [&] ( const range<unsigned>& r ) {
|
||||
BBox3fa centBounds = empty;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
centBounds.extend(center2(calculateBounds(morton[i])));
|
||||
return centBounds;
|
||||
};
|
||||
const BBox3fa centBounds = parallel_reduce(current.begin(), current.end(), unsigned(1024),
|
||||
BBox3fa(empty), calculateCentBounds, BBox3fa::merge);
|
||||
|
||||
/* recalculate morton codes */
|
||||
MortonCodeMapping mapping(centBounds);
|
||||
parallel_for(current.begin(), current.end(), unsigned(1024), [&] ( const range<unsigned>& r ) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
morton[i].code = mapping.code(calculateBounds(morton[i]));
|
||||
}
|
||||
});
|
||||
|
||||
/*! sort morton codes */
|
||||
#if defined(TASKING_TBB)
|
||||
tbb::parallel_sort(morton+current.begin(),morton+current.end());
|
||||
#else
|
||||
radixsort32(morton+current.begin(),current.size());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void split(const range<unsigned>& current, range<unsigned>& left, range<unsigned>& right) const
|
||||
{
|
||||
const unsigned int code_start = morton[current.begin()].code;
|
||||
const unsigned int code_end = morton[current.end()-1].code;
|
||||
unsigned int bitpos = lzcnt(code_start^code_end);
|
||||
|
||||
/* if all items mapped to same morton code, then re-create new morton codes for the items */
|
||||
if (unlikely(bitpos == 32))
|
||||
{
|
||||
recreateMortonCodes(current);
|
||||
const unsigned int code_start = morton[current.begin()].code;
|
||||
const unsigned int code_end = morton[current.end()-1].code;
|
||||
bitpos = lzcnt(code_start^code_end);
|
||||
|
||||
/* if the morton code is still the same, goto fall back split */
|
||||
if (unlikely(bitpos == 32)) {
|
||||
current.split(left,right);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* split the items at the topmost different morton code bit */
|
||||
const unsigned int bitpos_diff = 31-bitpos;
|
||||
const unsigned int bitmask = 1 << bitpos_diff;
|
||||
|
||||
/* find location where bit differs using binary search */
|
||||
unsigned begin = current.begin();
|
||||
unsigned end = current.end();
|
||||
while (begin + 1 != end) {
|
||||
const unsigned mid = (begin+end)/2;
|
||||
const unsigned bit = morton[mid].code & bitmask;
|
||||
if (bit == 0) begin = mid; else end = mid;
|
||||
}
|
||||
unsigned center = end;
|
||||
#if defined(DEBUG)
|
||||
for (unsigned int i=begin; i<center; i++) assert((morton[i].code & bitmask) == 0);
|
||||
for (unsigned int i=center; i<end; i++) assert((morton[i].code & bitmask) == bitmask);
|
||||
#endif
|
||||
|
||||
left = make_range(current.begin(),center);
|
||||
right = make_range(center,current.end());
|
||||
}
|
||||
|
||||
ReductionTy recurse(size_t depth, const range<unsigned>& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAllocator();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= singleThreadThreshold)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/* create leaf node */
|
||||
if (unlikely(depth+MIN_LARGE_LEAF_LEVELS >= maxDepth || current.size() <= minLeafSize))
|
||||
return createLargeLeaf(depth,current,alloc);
|
||||
|
||||
/* fill all children by always splitting the one with the largest surface area */
|
||||
range<unsigned> children[MAX_BRANCHING_FACTOR];
|
||||
split(current,children[0],children[1]);
|
||||
size_t numChildren = 2;
|
||||
|
||||
while (numChildren < branchingFactor)
|
||||
{
|
||||
/* find best child with largest number of primitives */
|
||||
int bestChild = -1;
|
||||
unsigned bestItems = 0;
|
||||
for (unsigned int i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= minLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest area */
|
||||
if (children[i].size() > bestItems) {
|
||||
bestItems = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
range<unsigned> left, right;
|
||||
split(children[bestChild],left,right);
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
}
|
||||
|
||||
/* create leaf node if no split is possible */
|
||||
if (unlikely(numChildren == 1))
|
||||
return createLeaf(current,alloc);
|
||||
|
||||
/* allocate node */
|
||||
auto node = createNode(alloc,numChildren);
|
||||
|
||||
/* process top parts of tree parallel */
|
||||
ReductionTy bounds[MAX_BRANCHING_FACTOR];
|
||||
if (current.size() > singleThreadThreshold)
|
||||
{
|
||||
/*! parallel_for is faster than spawning sub-tasks */
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
bounds[i] = recurse(depth+1,children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/* finish tree sequentially */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
bounds[i] = recurse(depth+1,children[i],alloc,false);
|
||||
}
|
||||
|
||||
return setBounds(node,bounds,numChildren);
|
||||
}
|
||||
|
||||
/* build function */
|
||||
ReductionTy build(BuildPrim* src, BuildPrim* tmp, size_t numPrimitives)
|
||||
{
|
||||
/* sort morton codes */
|
||||
morton = src;
|
||||
radix_sort_u32(src,tmp,numPrimitives,singleThreadThreshold);
|
||||
|
||||
/* build BVH */
|
||||
const ReductionTy root = recurse(1, range<unsigned>(0,(unsigned)numPrimitives), nullptr, true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
|
||||
public:
|
||||
CreateAllocator& createAllocator;
|
||||
CreateNodeFunc& createNode;
|
||||
SetNodeBoundsFunc& setBounds;
|
||||
CreateLeafFunc& createLeaf;
|
||||
CalculateBounds& calculateBounds;
|
||||
ProgressMonitor& progressMonitor;
|
||||
|
||||
public:
|
||||
BuildPrim* morton;
|
||||
};
|
||||
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename SetBoundsFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CalculateBoundsFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAllocator,
|
||||
CreateNodeFunc createNode,
|
||||
SetBoundsFunc setBounds,
|
||||
CreateLeafFunc createLeaf,
|
||||
CalculateBoundsFunc calculateBounds,
|
||||
ProgressMonitor progressMonitor,
|
||||
BuildPrim* src,
|
||||
BuildPrim* tmp,
|
||||
size_t numPrimitives,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuilderT<
|
||||
ReductionTy,
|
||||
decltype(createAllocator()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
SetBoundsFunc,
|
||||
CreateLeafFunc,
|
||||
CalculateBoundsFunc,
|
||||
ProgressMonitor> Builder;
|
||||
|
||||
Builder builder(createAllocator,
|
||||
createNode,
|
||||
setBounds,
|
||||
createLeaf,
|
||||
calculateBounds,
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
return builder.build(src,tmp,numPrimitives);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
693
thirdparty/embree/kernels/builders/bvh_builder_msmblur.h
vendored
Normal file
693
thirdparty/embree/kernels/builders/bvh_builder_msmblur.h
vendored
Normal file
@@ -0,0 +1,693 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#define MBLUR_NUM_TEMPORAL_BINS 2
|
||||
#define MBLUR_NUM_OBJECT_BINS 32
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../builders/primref_mb.h"
|
||||
#include "heuristic_binning_array_aligned.h"
|
||||
#include "heuristic_timesplit_array.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<typename T>
|
||||
struct SharedVector
|
||||
{
|
||||
__forceinline SharedVector() {}
|
||||
|
||||
__forceinline SharedVector(T* ptr, size_t refCount = 1)
|
||||
: prims(ptr), refCount(refCount) {}
|
||||
|
||||
__forceinline void incRef() {
|
||||
refCount++;
|
||||
}
|
||||
|
||||
__forceinline void decRef()
|
||||
{
|
||||
if (--refCount == 0)
|
||||
delete prims;
|
||||
}
|
||||
|
||||
T* prims;
|
||||
size_t refCount;
|
||||
};
|
||||
|
||||
template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
|
||||
struct LocalChildListT
|
||||
{
|
||||
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
|
||||
|
||||
__forceinline LocalChildListT (const BuildRecord& record)
|
||||
: numChildren(1), numSharedPrimVecs(1)
|
||||
{
|
||||
/* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
|
||||
children[0] = record;
|
||||
primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
|
||||
}
|
||||
|
||||
__forceinline ~LocalChildListT()
|
||||
{
|
||||
for (size_t i = 0; i < numChildren; i++)
|
||||
primvecs[i]->decRef();
|
||||
}
|
||||
|
||||
__forceinline BuildRecord& operator[] ( const size_t i ) {
|
||||
return children[i];
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return numChildren;
|
||||
}
|
||||
|
||||
__forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
|
||||
{
|
||||
SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
|
||||
if (lrecord.prims.prims == bsharedPrimVec->prims) {
|
||||
primvecs[bestChild] = bsharedPrimVec;
|
||||
bsharedPrimVec->incRef();
|
||||
}
|
||||
else {
|
||||
primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
|
||||
}
|
||||
|
||||
if (rrecord.prims.prims == bsharedPrimVec->prims) {
|
||||
primvecs[numChildren] = bsharedPrimVec;
|
||||
bsharedPrimVec->incRef();
|
||||
}
|
||||
else {
|
||||
primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
|
||||
}
|
||||
bsharedPrimVec->decRef();
|
||||
new_vector.release();
|
||||
|
||||
children[bestChild] = lrecord;
|
||||
children[numChildren] = rrecord;
|
||||
numChildren++;
|
||||
}
|
||||
|
||||
public:
|
||||
array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
|
||||
array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
|
||||
size_t numChildren;
|
||||
|
||||
array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
|
||||
size_t numSharedPrimVecs;
|
||||
};
|
||||
|
||||
template<typename Mesh>
|
||||
struct RecalculatePrimRef
|
||||
{
|
||||
Scene* scene;
|
||||
|
||||
__forceinline RecalculatePrimRef (Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Mesh* mesh = scene->get<Mesh>(geomID);
|
||||
const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
// __noinline is workaround for ICC16 bug under MacOSX
|
||||
__noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Mesh* mesh = scene->get<Mesh>(geomID);
|
||||
const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
||||
return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
|
||||
}
|
||||
|
||||
// __noinline is workaround for ICC16 bug under MacOSX
|
||||
__noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
|
||||
return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
|
||||
}
|
||||
};
|
||||
|
||||
struct VirtualRecalculatePrimRef
|
||||
{
|
||||
Scene* scene;
|
||||
const SubGridBuildData * const sgrids;
|
||||
|
||||
__forceinline VirtualRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids = nullptr)
|
||||
: scene(scene), sgrids(sgrids) {}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Geometry* mesh = scene->get(geomID);
|
||||
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range, sgrids);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Geometry* mesh = scene->get(geomID);
|
||||
const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
||||
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range, sgrids);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
|
||||
return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
|
||||
}
|
||||
};
|
||||
|
||||
struct BVHBuilderMSMBlur
|
||||
{
|
||||
/*! settings for msmblur builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
|
||||
travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
|
||||
singleThreadThreshold(1024) {}
|
||||
|
||||
|
||||
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
|
||||
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
|
||||
{
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
float travCost; //!< estimated cost of one traversal step
|
||||
float intCost; //!< estimated cost of one primitive intersection
|
||||
bool singleLeafTimeSegment; //!< split time to single time range
|
||||
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
|
||||
};
|
||||
|
||||
struct BuildRecord
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRecord () {}
|
||||
|
||||
__forceinline BuildRecord (size_t depth)
|
||||
: depth(depth) {}
|
||||
|
||||
__forceinline BuildRecord (const SetMB& prims, size_t depth)
|
||||
: depth(depth), prims(prims) {}
|
||||
|
||||
__forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
|
||||
return a.prims.size() < b.prims.size();
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return prims.size();
|
||||
}
|
||||
|
||||
public:
|
||||
size_t depth; //!< Depth of the root of this subtree.
|
||||
SetMB prims; //!< The list of primitives.
|
||||
};
|
||||
|
||||
struct BuildRecordSplit : public BuildRecord
|
||||
{
|
||||
__forceinline BuildRecordSplit () {}
|
||||
|
||||
__forceinline BuildRecordSplit (size_t depth)
|
||||
: BuildRecord(depth) {}
|
||||
|
||||
__forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
|
||||
: BuildRecord(record), split(split) {}
|
||||
|
||||
BinSplit<MBLUR_NUM_OBJECT_BINS> split;
|
||||
};
|
||||
|
||||
template<
|
||||
typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename Allocator,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename SetNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
|
||||
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
|
||||
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
|
||||
typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
|
||||
|
||||
public:
|
||||
|
||||
BuilderT (MemoryMonitorInterface* device,
|
||||
const RecalculatePrimRef recalculatePrimRef,
|
||||
const CreateAllocFunc createAlloc,
|
||||
const CreateNodeFunc createNode,
|
||||
const SetNodeFunc setNode,
|
||||
const CreateLeafFunc createLeaf,
|
||||
const ProgressMonitor progressMonitor,
|
||||
const Settings& settings)
|
||||
: cfg(settings),
|
||||
heuristicObjectSplit(),
|
||||
heuristicTemporalSplit(device, recalculatePrimRef),
|
||||
recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
|
||||
progressMonitor(progressMonitor)
|
||||
{
|
||||
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set)
|
||||
{
|
||||
/* first try standard object split */
|
||||
const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
|
||||
const float object_split_sah = object_split.splitSAH();
|
||||
|
||||
/* test temporal splits only when object split was bad */
|
||||
const float leaf_sah = set.leafSAH(cfg.logBlockSize);
|
||||
if (object_split_sah < 0.50f*leaf_sah)
|
||||
return object_split;
|
||||
|
||||
/* do temporal splits only if the time range is big enough */
|
||||
if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
|
||||
{
|
||||
const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
|
||||
const float temporal_split_sah = temporal_split.splitSAH();
|
||||
|
||||
/* take temporal split if it improved SAH */
|
||||
if (temporal_split_sah < object_split_sah)
|
||||
return temporal_split;
|
||||
}
|
||||
|
||||
return object_split;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
/* perform object split */
|
||||
if (likely(split.data == Split::SPLIT_OBJECT)) {
|
||||
heuristicObjectSplit.split(split,set,lset,rset);
|
||||
}
|
||||
/* perform temporal split */
|
||||
else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
|
||||
return heuristicTemporalSplit.split(split,set,lset,rset);
|
||||
}
|
||||
/* perform fallback split */
|
||||
else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
|
||||
set.deterministic_order();
|
||||
splitFallback(set,lset,rset);
|
||||
}
|
||||
/* split by geometry */
|
||||
else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
|
||||
set.deterministic_order();
|
||||
splitByGeometry(set,lset,rset);
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
|
||||
return std::unique_ptr<mvector<PrimRefMB>>();
|
||||
}
|
||||
|
||||
/*! finds the best fallback split */
|
||||
__noinline Split findFallback(const SetMB& set)
|
||||
{
|
||||
/* split if primitives are not from same geometry */
|
||||
if (!sameGeometry(set))
|
||||
return Split(0.0f,Split::SPLIT_GEOMID);
|
||||
|
||||
/* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
|
||||
if (cfg.singleLeafTimeSegment)
|
||||
{
|
||||
/* test if one primitive has more than one time segment in time range, if so split time */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const PrimRefMB& prim = (*set.prims)[i];
|
||||
const range<int> itime_range = prim.timeSegmentRange(set.time_range);
|
||||
const int localTimeSegments = itime_range.size();
|
||||
assert(localTimeSegments > 0);
|
||||
if (localTimeSegments > 1) {
|
||||
const int icenter = (itime_range.begin() + itime_range.end())/2;
|
||||
const float splitTime = prim.timeStep(icenter);
|
||||
return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* otherwise return fallback split */
|
||||
return Split(0.0f,Split::SPLIT_FALLBACK);
|
||||
}
|
||||
|
||||
/*! performs fallback split */
|
||||
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end + 1) / 2;
|
||||
|
||||
PrimInfoMB linfo = empty;
|
||||
for (size_t i=begin; i<center; i++)
|
||||
linfo.add_primref(prims[i]);
|
||||
|
||||
PrimInfoMB rinfo = empty;
|
||||
for (size_t i=center; i<end; i++)
|
||||
rinfo.add_primref(prims[i]);
|
||||
|
||||
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
/*! checks if all primitives are from the same geometry */
|
||||
__forceinline bool sameGeometry(const SetMB& set)
|
||||
{
|
||||
if (set.size() == 0) return true;
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
unsigned int firstGeomID = prims[begin].geomID();
|
||||
for (size_t i=begin+1; i<end; i++) {
|
||||
if (prims[i].geomID() != firstGeomID){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* split by geometry ID */
|
||||
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
assert(set.size() > 1);
|
||||
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
|
||||
PrimInfoMB left(empty);
|
||||
PrimInfoMB right(empty);
|
||||
unsigned int geomID = prims[begin].geomID();
|
||||
size_t center = serial_partitioning(prims.data(),begin,end,left,right,
|
||||
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
|
||||
[ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
|
||||
|
||||
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (in.depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* replace already found split by fallback split */
|
||||
const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
|
||||
|
||||
/* special case when directly creating leaf without any splits that could shrink time_range */
|
||||
bool force_split = false;
|
||||
if (current.depth == 1 && current.size() > 0)
|
||||
{
|
||||
BBox1f c = empty;
|
||||
BBox1f p = current.prims.time_range;
|
||||
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
|
||||
mvector<PrimRefMB>& prims = *current.prims.prims;
|
||||
c.extend(prims[i].time_range);
|
||||
}
|
||||
|
||||
force_split = c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
|
||||
return createLeaf(current,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
bool hasTimeSplits = false;
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
LocalChildListSplit children(current);
|
||||
|
||||
do {
|
||||
/* find best child with largest bounding box area */
|
||||
size_t bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
|
||||
continue;
|
||||
|
||||
force_split = false;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/* perform best found split */
|
||||
BuildRecordSplit& brecord = children[bestChild];
|
||||
BuildRecordSplit lrecord(current.depth+1);
|
||||
BuildRecordSplit rrecord(current.depth+1);
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
|
||||
hasTimeSplits |= new_vector != nullptr;
|
||||
|
||||
/* find new splits */
|
||||
lrecord.split = findFallback(lrecord.prims);
|
||||
rrecord.split = findFallback(rrecord.prims);
|
||||
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
|
||||
|
||||
} while (children.size() < cfg.branchingFactor);
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = in.prims.time_range;
|
||||
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create node */
|
||||
auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
|
||||
|
||||
/* recurse into each child and perform reduction */
|
||||
LBBox3fa gbounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = createLargeLeaf(children[i],alloc);
|
||||
gbounds.extend(values[i].lbounds);
|
||||
}
|
||||
|
||||
setNode(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
/* calculate geometry bounds of this node */
|
||||
if (hasTimeSplits)
|
||||
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
|
||||
else
|
||||
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= cfg.singleThreadThreshold)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/*! find best split */
|
||||
const Split csplit = find(current.prims);
|
||||
|
||||
/*! compute leaf and split cost */
|
||||
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
|
||||
const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
|
||||
assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
|
||||
|
||||
/*! create a leaf node when threshold reached or SAH tells us to stop */
|
||||
if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
|
||||
current.prims.deterministic_order();
|
||||
return createLargeLeaf(current,alloc);
|
||||
}
|
||||
|
||||
/*! perform initial split */
|
||||
SetMB lprims,rprims;
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
|
||||
bool hasTimeSplits = new_vector != nullptr;
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
LocalChildList children(current);
|
||||
{
|
||||
BuildRecord lrecord(lprims,current.depth+1);
|
||||
BuildRecord rrecord(rprims,current.depth+1);
|
||||
children.split(0,lrecord,rrecord,std::move(new_vector));
|
||||
}
|
||||
|
||||
/*! split until node is full or SAH tells us to stop */
|
||||
while (children.size() < cfg.branchingFactor)
|
||||
{
|
||||
/*! find best child to split */
|
||||
float bestArea = neg_inf;
|
||||
ssize_t bestChild = -1;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
{
|
||||
if (children[i].size() <= cfg.minLeafSize) continue;
|
||||
if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
|
||||
bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/* perform split */
|
||||
BuildRecord& brecord = children[bestChild];
|
||||
BuildRecord lrecord(current.depth+1);
|
||||
BuildRecord rrecord(current.depth+1);
|
||||
Split csplit = find(brecord.prims);
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
|
||||
hasTimeSplits |= new_vector != nullptr;
|
||||
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
|
||||
}
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = current.prims.time_range;
|
||||
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* sort buildrecords for simpler shadow ray traversal */
|
||||
//std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
|
||||
|
||||
/*! create an inner node */
|
||||
auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
|
||||
LBBox3fa gbounds = empty;
|
||||
|
||||
/* spawn tasks */
|
||||
if (unlikely(current.size() > cfg.singleThreadThreshold))
|
||||
{
|
||||
/*! parallel_for is faster than spawning sub-tasks */
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
|
||||
/*! merge bounding boxes */
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
gbounds.extend(values[i].lbounds);
|
||||
}
|
||||
/* recurse into each child */
|
||||
else
|
||||
{
|
||||
//for (size_t i=0; i<children.size(); i++)
|
||||
for (ssize_t i=children.size()-1; i>=0; i--) {
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
gbounds.extend(values[i].lbounds);
|
||||
}
|
||||
}
|
||||
|
||||
setNode(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
/* calculate geometry bounds of this node */
|
||||
if (unlikely(hasTimeSplits))
|
||||
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
|
||||
else
|
||||
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
/*! builder entry function */
|
||||
__forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
|
||||
{
|
||||
const SetMB set(pinfo,&prims);
|
||||
auto ret = recurse(BuildRecord(set,1),nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
|
||||
HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
|
||||
const RecalculatePrimRef recalculatePrimRef;
|
||||
const CreateAllocFunc createAlloc;
|
||||
const CreateNodeFunc createNode;
|
||||
const SetNodeFunc setNode;
|
||||
const CreateLeafFunc createLeaf;
|
||||
const ProgressMonitor progressMonitor;
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename SetNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitorFunc>
|
||||
|
||||
static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
|
||||
const PrimInfoMB& pinfo,
|
||||
MemoryMonitorInterface* device,
|
||||
const RecalculatePrimRef recalculatePrimRef,
|
||||
const CreateAllocFunc createAlloc,
|
||||
const CreateNodeFunc createNode,
|
||||
const SetNodeFunc setNode,
|
||||
const CreateLeafFunc createLeaf,
|
||||
const ProgressMonitorFunc progressMonitor,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuilderT<
|
||||
NodeRef,
|
||||
RecalculatePrimRef,
|
||||
decltype(createAlloc()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
SetNodeFunc,
|
||||
CreateLeafFunc,
|
||||
ProgressMonitorFunc> Builder;
|
||||
|
||||
Builder builder(device,
|
||||
recalculatePrimRef,
|
||||
createAlloc,
|
||||
createNode,
|
||||
setNode,
|
||||
createLeaf,
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
|
||||
return builder(prims,pinfo);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
526
thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h
vendored
Normal file
526
thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h
vendored
Normal file
@@ -0,0 +1,526 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../geometry/primitive.h"
|
||||
#include "../builders/bvh_builder_msmblur.h"
|
||||
#include "../builders/heuristic_binning_array_aligned.h"
|
||||
#include "../builders/heuristic_binning_array_unaligned.h"
|
||||
#include "../builders/heuristic_timesplit_array.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct BVHBuilderHairMSMBlur
|
||||
{
|
||||
/*! settings for msmblur builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
};
|
||||
|
||||
struct BuildRecord
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRecord () {}
|
||||
|
||||
__forceinline BuildRecord (size_t depth)
|
||||
: depth(depth) {}
|
||||
|
||||
__forceinline BuildRecord (const SetMB& prims, size_t depth)
|
||||
: depth(depth), prims(prims) {}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return prims.size();
|
||||
}
|
||||
|
||||
public:
|
||||
size_t depth; //!< depth of the root of this subtree
|
||||
SetMB prims; //!< the list of primitives
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeMBFunc,
|
||||
typename SetAABBNodeMBFunc,
|
||||
typename CreateOBBNodeMBFunc,
|
||||
typename SetOBBNodeMBFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
|
||||
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
|
||||
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
|
||||
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
|
||||
|
||||
typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
|
||||
typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
|
||||
typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
|
||||
|
||||
public:
|
||||
|
||||
BuilderT (Scene* scene,
|
||||
const RecalculatePrimRef& recalculatePrimRef,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeMBFunc& createAABBNodeMB,
|
||||
const SetAABBNodeMBFunc& setAABBNodeMB,
|
||||
const CreateOBBNodeMBFunc& createOBBNodeMB,
|
||||
const SetOBBNodeMBFunc& setOBBNodeMB,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings settings)
|
||||
|
||||
: cfg(settings),
|
||||
scene(scene),
|
||||
recalculatePrimRef(recalculatePrimRef),
|
||||
createAlloc(createAlloc),
|
||||
createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
|
||||
createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
|
||||
createLeaf(createLeaf),
|
||||
progressMonitor(progressMonitor),
|
||||
unalignedHeuristic(scene),
|
||||
temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
|
||||
|
||||
private:
|
||||
|
||||
/*! checks if all primitives are from the same geometry */
|
||||
__forceinline bool sameGeometry(const SetMB& set)
|
||||
{
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
unsigned int firstGeomID = prims[set.begin()].geomID();
|
||||
for (size_t i=set.begin()+1; i<set.end(); i++) {
|
||||
if (prims[i].geomID() != firstGeomID){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! performs some split if SAH approaches fail */
|
||||
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
PrimInfoMB linfo = empty;
|
||||
for (size_t i=begin; i<center; i++)
|
||||
linfo.add_primref(prims[i]);
|
||||
|
||||
PrimInfoMB rinfo = empty;
|
||||
for (size_t i=center; i<end; i++)
|
||||
rinfo.add_primref(prims[i]);
|
||||
|
||||
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
assert(set.size() > 1);
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfoMB linfo(empty);
|
||||
PrimInfoMB rinfo(empty);
|
||||
unsigned int geomID = (*set.prims)[begin].geomID();
|
||||
size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
|
||||
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
|
||||
[ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
|
||||
|
||||
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
/*! creates a large leaf that could be larger than supported by the BVH */
|
||||
NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (current.depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* special case when directly creating leaf without any splits that could shrink time_range */
|
||||
bool force_split = false;
|
||||
if (current.depth == 1 && current.size() > 0)
|
||||
{
|
||||
BBox1f c = empty;
|
||||
BBox1f p = current.prims.time_range;
|
||||
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
|
||||
mvector<PrimRefMB>& prims = *current.prims.prims;
|
||||
c.extend(prims[i].time_range);
|
||||
}
|
||||
|
||||
force_split = c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
|
||||
return createLeaf(current.prims,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
LocalChildList children(current);
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
int bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (unsigned i=0; i<children.size(); i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
|
||||
continue;
|
||||
|
||||
force_split = false;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
BuildRecord left(current.depth+1);
|
||||
BuildRecord right(current.depth+1);
|
||||
if (!sameGeometry(children[bestChild].prims)) {
|
||||
splitByGeometry(children[bestChild].prims,left.prims,right.prims);
|
||||
} else {
|
||||
splitFallback(children[bestChild].prims,left.prims,right.prims);
|
||||
}
|
||||
children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
|
||||
|
||||
} while (children.size() < cfg.branchingFactor);
|
||||
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
bool timesplit = false;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = current.prims.time_range;
|
||||
timesplit |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create node */
|
||||
NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = createLargeLeaf(children[i],alloc);
|
||||
bounds.extend(values[i].lbounds);
|
||||
}
|
||||
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
if (timesplit)
|
||||
bounds = current.prims.linearBounds(recalculatePrimRef);
|
||||
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
/*! performs split */
|
||||
std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
|
||||
{
|
||||
/* variable to track the SAH of the best splitting approach */
|
||||
float bestSAH = inf;
|
||||
const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
|
||||
|
||||
/* perform standard binning in aligned space */
|
||||
HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
|
||||
float alignedObjectSAH = alignedObjectSplit.splitSAH();
|
||||
bestSAH = min(alignedObjectSAH,bestSAH);
|
||||
|
||||
/* perform standard binning in unaligned space */
|
||||
UnalignedHeuristicBinning::Split unalignedObjectSplit;
|
||||
LinearSpace3fa uspace;
|
||||
float unalignedObjectSAH = inf;
|
||||
if (alignedObjectSAH > 0.7f*leafSAH) {
|
||||
uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
|
||||
const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
|
||||
unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
|
||||
unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
|
||||
bestSAH = min(unalignedObjectSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
|
||||
float temporal_split_sah = inf;
|
||||
typename HeuristicTemporal::Split temporal_split;
|
||||
if (bestSAH > 0.5f*leafSAH) {
|
||||
if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
|
||||
temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
|
||||
temporal_split_sah = temporal_split.splitSAH();
|
||||
bestSAH = min(temporal_split_sah,bestSAH);
|
||||
}
|
||||
}
|
||||
|
||||
/* perform fallback split if SAH heuristics failed */
|
||||
if (unlikely(!std::isfinite(bestSAH))) {
|
||||
current.prims.deterministic_order();
|
||||
splitFallback(current.prims,lrecord.prims,rrecord.prims);
|
||||
}
|
||||
/* perform aligned split if this is best */
|
||||
else if (likely(bestSAH == alignedObjectSAH)) {
|
||||
alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
|
||||
}
|
||||
/* perform unaligned split if this is best */
|
||||
else if (likely(bestSAH == unalignedObjectSAH)) {
|
||||
unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
|
||||
aligned = false;
|
||||
}
|
||||
/* perform temporal split if this is best */
|
||||
else if (likely(bestSAH == temporal_split_sah)) {
|
||||
timesplit = true;
|
||||
return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
|
||||
return std::unique_ptr<mvector<PrimRefMB>>();
|
||||
}
|
||||
|
||||
/*! recursive build */
|
||||
NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/* create leaf node */
|
||||
if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
|
||||
current.prims.deterministic_order();
|
||||
return createLargeLeaf(current,alloc);
|
||||
}
|
||||
|
||||
/* fill all children by always splitting the one with the largest surface area */
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
LocalChildList children(current);
|
||||
bool aligned = true;
|
||||
bool timesplit = false;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
ssize_t bestChild = -1;
|
||||
float bestArea = neg_inf;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.minLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest area */
|
||||
const float A = children[i].prims.halfArea();
|
||||
if (A > bestArea) {
|
||||
bestArea = children[i].prims.halfArea();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
BuildRecord left(current.depth+1);
|
||||
BuildRecord right(current.depth+1);
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
|
||||
children.split(bestChild,left,right,std::move(new_vector));
|
||||
|
||||
} while (children.size() < cfg.branchingFactor);
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = current.prims.time_range;
|
||||
timesplit |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create time split node */
|
||||
if (timesplit)
|
||||
{
|
||||
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (current.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequential */
|
||||
else {
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
}
|
||||
}
|
||||
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
/* create aligned node */
|
||||
else if (aligned)
|
||||
{
|
||||
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (current.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
cbounds[i] = values[i].lbounds;
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
bounds.extend(cbounds[i]);
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else
|
||||
{
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
bounds.extend(values[i].lbounds);
|
||||
}
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
}
|
||||
|
||||
/* create unaligned node */
|
||||
else
|
||||
{
|
||||
const NodeRef node = createOBBNodeMB(alloc);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (current.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
|
||||
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
|
||||
const auto child = recurse(children[i],nullptr,true);
|
||||
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
|
||||
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
|
||||
const auto child = recurse(children[i],alloc,false);
|
||||
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
|
||||
}
|
||||
}
|
||||
|
||||
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! entry point into builder */
|
||||
NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
|
||||
{
|
||||
BuildRecord record(SetMB(pinfo,&prims),1);
|
||||
auto root = recurse(record,nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
Scene* scene;
|
||||
const RecalculatePrimRef& recalculatePrimRef;
|
||||
const CreateAllocFunc& createAlloc;
|
||||
const CreateAABBNodeMBFunc& createAABBNodeMB;
|
||||
const SetAABBNodeMBFunc& setAABBNodeMB;
|
||||
const CreateOBBNodeMBFunc& createOBBNodeMB;
|
||||
const SetOBBNodeMBFunc& setOBBNodeMB;
|
||||
const CreateLeafFunc& createLeaf;
|
||||
const ProgressMonitor& progressMonitor;
|
||||
|
||||
private:
|
||||
HeuristicBinning alignedHeuristic;
|
||||
UnalignedHeuristicBinning unalignedHeuristic;
|
||||
HeuristicTemporal temporalSplitHeuristic;
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeMBFunc,
|
||||
typename SetAABBNodeMBFunc,
|
||||
typename CreateOBBNodeMBFunc,
|
||||
typename SetOBBNodeMBFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
|
||||
const RecalculatePrimRef& recalculatePrimRef,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeMBFunc& createAABBNodeMB,
|
||||
const SetAABBNodeMBFunc& setAABBNodeMB,
|
||||
const CreateOBBNodeMBFunc& createOBBNodeMB,
|
||||
const SetOBBNodeMBFunc& setOBBNodeMB,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings settings)
|
||||
{
|
||||
typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
|
||||
CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
|
||||
CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
|
||||
CreateLeafFunc,ProgressMonitor> Builder;
|
||||
|
||||
Builder builder(scene,recalculatePrimRef,createAlloc,
|
||||
createAABBNodeMB,setAABBNodeMB,
|
||||
createOBBNodeMB,setOBBNodeMB,
|
||||
createLeaf,progressMonitor,settings);
|
||||
|
||||
return builder(prims,pinfo);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
663
thirdparty/embree/kernels/builders/bvh_builder_sah.h
vendored
Normal file
663
thirdparty/embree/kernels/builders/bvh_builder_sah.h
vendored
Normal file
@@ -0,0 +1,663 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning_array_aligned.h"
|
||||
#include "heuristic_spatial_array.h"
|
||||
#include "heuristic_openmerge_array.h"
|
||||
|
||||
#define NUM_OBJECT_BINS 32
|
||||
#define NUM_SPATIAL_BINS 16
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
MAYBE_UNUSED static const float travCost = 1.0f;
|
||||
MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
|
||||
|
||||
struct GeneralBVHBuilder
|
||||
{
|
||||
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
|
||||
|
||||
|
||||
/*! settings for SAH builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
|
||||
travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {}
|
||||
|
||||
/*! initialize settings from API settings */
|
||||
Settings (const RTCBuildArguments& settings)
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
|
||||
travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf)
|
||||
{
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(settings.sahBlockSize);
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost;
|
||||
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(min(minLeafSize,maxLeafSize)), maxLeafSize(maxLeafSize),
|
||||
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
float travCost; //!< estimated cost of one traversal step
|
||||
float intCost; //!< estimated cost of one primitive intersection
|
||||
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
|
||||
size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished
|
||||
};
|
||||
|
||||
/*! recursive state of builder */
|
||||
template<typename Set, typename Split>
|
||||
struct BuildRecordT
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRecordT () {}
|
||||
|
||||
__forceinline BuildRecordT (size_t depth)
|
||||
: depth(depth), alloc_barrier(false), prims(empty) {}
|
||||
|
||||
__forceinline BuildRecordT (size_t depth, const Set& prims)
|
||||
: depth(depth), alloc_barrier(false), prims(prims) {}
|
||||
|
||||
__forceinline BBox3fa bounds() const { return prims.geomBounds; }
|
||||
|
||||
__forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); }
|
||||
__forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); }
|
||||
|
||||
__forceinline size_t size() const { return prims.size(); }
|
||||
|
||||
public:
|
||||
size_t depth; //!< Depth of the root of this subtree.
|
||||
bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes
|
||||
Set prims; //!< The list of primitives.
|
||||
};
|
||||
|
||||
template<typename PrimRef, typename Set>
|
||||
struct DefaultCanCreateLeafFunc
|
||||
{
|
||||
__forceinline bool operator()(const PrimRef*, const Set&) const { return true; }
|
||||
};
|
||||
|
||||
template<typename PrimRef, typename Set>
|
||||
struct DefaultCanCreateLeafSplitFunc
|
||||
{
|
||||
__forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { }
|
||||
};
|
||||
|
||||
template<typename BuildRecord,
|
||||
typename Heuristic,
|
||||
typename Set,
|
||||
typename PrimRef,
|
||||
typename ReductionTy,
|
||||
typename Allocator,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CanCreateLeafFunc,
|
||||
typename CanCreateLeafSplitFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
friend struct GeneralBVHBuilder;
|
||||
|
||||
BuilderT (PrimRef* prims,
|
||||
Heuristic& heuristic,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateNodeFunc& createNode,
|
||||
const UpdateNodeFunc& updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const CanCreateLeafFunc& canCreateLeaf,
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings& settings) :
|
||||
cfg(settings),
|
||||
prims(prims),
|
||||
heuristic(heuristic),
|
||||
createAlloc(createAlloc),
|
||||
createNode(createNode),
|
||||
updateNode(updateNode),
|
||||
createLeaf(createLeaf),
|
||||
canCreateLeaf(canCreateLeaf),
|
||||
canCreateLeafSplit(canCreateLeafSplit),
|
||||
progressMonitor(progressMonitor)
|
||||
{
|
||||
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
|
||||
}
|
||||
|
||||
const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (current.depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims))
|
||||
return createLeaf(prims,current.prims,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
ReductionTy values[MAX_BRANCHING_FACTOR];
|
||||
BuildRecord children[MAX_BRANCHING_FACTOR];
|
||||
size_t numChildren = 1;
|
||||
children[0] = current;
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
size_t bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims))
|
||||
continue;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].prims.size() > bestSize) {
|
||||
bestSize = children[i].prims.size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == (size_t)-1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
BuildRecord left(current.depth+1);
|
||||
BuildRecord right(current.depth+1);
|
||||
if (!canCreateLeaf(prims,children[bestChild].prims)) {
|
||||
canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims);
|
||||
} else {
|
||||
heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims);
|
||||
}
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < cfg.branchingFactor);
|
||||
|
||||
/* set barrier for primrefarrayalloc */
|
||||
if (unlikely(current.size() > cfg.primrefarrayalloc))
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
|
||||
|
||||
/* create node */
|
||||
auto node = createNode(children,numChildren,alloc);
|
||||
|
||||
/* recurse into each child and perform reduction */
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
values[i] = createLargeLeaf(children[i],alloc);
|
||||
|
||||
/* perform reduction */
|
||||
return updateNode(current,children,node,values,numChildren);
|
||||
}
|
||||
|
||||
const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= cfg.singleThreadThreshold)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/*! find best split */
|
||||
auto split = heuristic.find(current.prims,cfg.logBlockSize);
|
||||
|
||||
/*! compute leaf and split cost */
|
||||
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
|
||||
const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH();
|
||||
assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
|
||||
|
||||
/*! create a leaf node when threshold reached or SAH tells us to stop */
|
||||
if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
|
||||
heuristic.deterministic_order(current.prims);
|
||||
return createLargeLeaf(current,alloc);
|
||||
}
|
||||
|
||||
/*! perform initial split */
|
||||
Set lprims,rprims;
|
||||
heuristic.split(split,current.prims,lprims,rprims);
|
||||
|
||||
/*! initialize child list with initial split */
|
||||
ReductionTy values[MAX_BRANCHING_FACTOR];
|
||||
BuildRecord children[MAX_BRANCHING_FACTOR];
|
||||
children[0] = BuildRecord(current.depth+1,lprims);
|
||||
children[1] = BuildRecord(current.depth+1,rprims);
|
||||
size_t numChildren = 2;
|
||||
|
||||
/*! split until node is full or SAH tells us to stop */
|
||||
while (numChildren < cfg.branchingFactor)
|
||||
{
|
||||
/*! find best child to split */
|
||||
float bestArea = neg_inf;
|
||||
ssize_t bestChild = -1;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].prims.size() <= cfg.minLeafSize) continue;
|
||||
|
||||
/* find child with largest surface area */
|
||||
if (halfArea(children[i].prims.geomBounds) > bestArea) {
|
||||
bestChild = i;
|
||||
bestArea = halfArea(children[i].prims.geomBounds);
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/* perform best found split */
|
||||
BuildRecord& brecord = children[bestChild];
|
||||
BuildRecord lrecord(current.depth+1);
|
||||
BuildRecord rrecord(current.depth+1);
|
||||
auto split = heuristic.find(brecord.prims,cfg.logBlockSize);
|
||||
heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims);
|
||||
children[bestChild ] = lrecord;
|
||||
children[numChildren] = rrecord;
|
||||
numChildren++;
|
||||
}
|
||||
|
||||
/* set barrier for primrefarrayalloc */
|
||||
if (unlikely(current.size() > cfg.primrefarrayalloc))
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
|
||||
|
||||
/* sort buildrecords for faster shadow ray traversal */
|
||||
std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>());
|
||||
|
||||
/*! create an inner node */
|
||||
auto node = createNode(children,numChildren,alloc);
|
||||
|
||||
/* spawn tasks */
|
||||
if (current.size() > cfg.singleThreadThreshold)
|
||||
{
|
||||
/*! parallel_for is faster than spawning sub-tasks */
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
|
||||
return updateNode(current,children,node,values,numChildren);
|
||||
}
|
||||
/* recurse into each child */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
|
||||
return updateNode(current,children,node,values,numChildren);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
PrimRef* prims;
|
||||
Heuristic& heuristic;
|
||||
const CreateAllocFunc& createAlloc;
|
||||
const CreateNodeFunc& createNode;
|
||||
const UpdateNodeFunc& updateNode;
|
||||
const CreateLeafFunc& createLeaf;
|
||||
const CanCreateLeafFunc& canCreateLeaf;
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit;
|
||||
const ProgressMonitor& progressMonitor;
|
||||
};
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename Heuristic,
|
||||
typename Set,
|
||||
typename PrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
__noinline static ReductionTy build(Heuristic& heuristic,
|
||||
PrimRef* prims,
|
||||
const Set& set,
|
||||
CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
|
||||
|
||||
typedef BuilderT<
|
||||
BuildRecord,
|
||||
Heuristic,
|
||||
Set,
|
||||
PrimRef,
|
||||
ReductionTy,
|
||||
decltype(createAlloc()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
UpdateNodeFunc,
|
||||
CreateLeafFunc,
|
||||
DefaultCanCreateLeafFunc<PrimRef, Set>,
|
||||
DefaultCanCreateLeafSplitFunc<PrimRef, Set>,
|
||||
ProgressMonitor> Builder;
|
||||
|
||||
/* instantiate builder */
|
||||
Builder builder(prims,
|
||||
heuristic,
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
DefaultCanCreateLeafFunc<PrimRef, Set>(),
|
||||
DefaultCanCreateLeafSplitFunc<PrimRef, Set>(),
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
/* build hierarchy */
|
||||
BuildRecord record(1,set);
|
||||
const ReductionTy root = builder.recurse(record,nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename Heuristic,
|
||||
typename Set,
|
||||
typename PrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CanCreateLeafFunc,
|
||||
typename CanCreateLeafSplitFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
__noinline static ReductionTy build(Heuristic& heuristic,
|
||||
PrimRef* prims,
|
||||
const Set& set,
|
||||
CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const CanCreateLeafFunc& canCreateLeaf,
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
|
||||
|
||||
typedef BuilderT<
|
||||
BuildRecord,
|
||||
Heuristic,
|
||||
Set,
|
||||
PrimRef,
|
||||
ReductionTy,
|
||||
decltype(createAlloc()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
UpdateNodeFunc,
|
||||
CreateLeafFunc,
|
||||
CanCreateLeafFunc,
|
||||
CanCreateLeafSplitFunc,
|
||||
ProgressMonitor> Builder;
|
||||
|
||||
/* instantiate builder */
|
||||
Builder builder(prims,
|
||||
heuristic,
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
canCreateLeaf,
|
||||
canCreateLeafSplit,
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
/* build hierarchy */
|
||||
BuildRecord record(1,set);
|
||||
const ReductionTy root = builder.recurse(record,nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
};
|
||||
|
||||
/* SAH builder that operates on an array of BuildRecords */
|
||||
struct BVHBuilderBinnedSAH
|
||||
{
|
||||
typedef PrimInfoRange Set;
|
||||
typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic;
|
||||
typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
|
||||
typedef GeneralBVHBuilder::Settings Settings;
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
PrimRef* prims, const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
Heuristic heuristic(prims);
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoRange(0,pinfo.size(),pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CanCreateLeafFunc,
|
||||
typename CanCreateLeafSplitFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const CanCreateLeafFunc& canCreateLeaf,
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
PrimRef* prims, const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
Heuristic heuristic(prims);
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoRange(0,pinfo.size(),pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
canCreateLeaf,
|
||||
canCreateLeafSplit,
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
};
|
||||
|
||||
/* Spatial SAH builder that operates on an double-buffered array of BuildRecords */
|
||||
struct BVHBuilderBinnedFastSpatialSAH
|
||||
{
|
||||
typedef PrimInfoExtRange Set;
|
||||
typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split;
|
||||
typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
|
||||
typedef GeneralBVHBuilder::Settings Settings;
|
||||
|
||||
static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
|
||||
template<typename ReductionTy, typename UserCreateLeaf>
|
||||
struct CreateLeafExt
|
||||
{
|
||||
__forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf)
|
||||
: userCreateLeaf(userCreateLeaf) {}
|
||||
|
||||
// __noinline is workaround for ICC2016 compiler bug
|
||||
template<typename Allocator>
|
||||
__noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const
|
||||
{
|
||||
for (size_t i=range.begin(); i<range.end(); i++)
|
||||
prims[i].lower.u &= GEOMID_MASK;
|
||||
|
||||
return userCreateLeaf(prims,range,alloc);
|
||||
}
|
||||
|
||||
const UserCreateLeaf userCreateLeaf;
|
||||
};
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename SplitPrimitiveFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode,
|
||||
UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
SplitPrimitiveFunc splitPrimitive,
|
||||
ProgressMonitor progressMonitor,
|
||||
PrimRef* prims,
|
||||
const size_t extSize,
|
||||
const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic;
|
||||
Heuristic heuristic(splitPrimitive,prims,pinfo);
|
||||
|
||||
/* calculate total surface area */ // FIXME: this sum is not deterministic
|
||||
const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double {
|
||||
|
||||
double A = 0.0f;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
PrimRef& prim = prims[i];
|
||||
A += area(prim.bounds());
|
||||
}
|
||||
return A;
|
||||
},std::plus<double>());
|
||||
|
||||
|
||||
/* calculate maximum number of spatial splits per primitive */
|
||||
const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1;
|
||||
const float f = 10.0f;
|
||||
|
||||
const float invA = 1.0f / A;
|
||||
parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) {
|
||||
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
PrimRef& prim = prims[i];
|
||||
assert((prim.geomID() & SPLITS_MASK) == 0);
|
||||
// FIXME: is there a better general heuristic ?
|
||||
const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA);
|
||||
unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf)));
|
||||
prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
}
|
||||
});
|
||||
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf),
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
};
|
||||
|
||||
/* Open/Merge SAH builder that operates on an array of BuildRecords */
|
||||
struct BVHBuilderBinnedOpenMergeSAH
|
||||
{
|
||||
static const size_t NUM_OBJECT_BINS_HQ = 32;
|
||||
typedef PrimInfoExtRange Set;
|
||||
typedef BinSplit<NUM_OBJECT_BINS_HQ> Split;
|
||||
typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
|
||||
typedef GeneralBVHBuilder::Settings Settings;
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename BuildRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename NodeOpenerFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode,
|
||||
UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
NodeOpenerFunc nodeOpenerFunc,
|
||||
ProgressMonitor progressMonitor,
|
||||
BuildRef* prims,
|
||||
const size_t extSize,
|
||||
const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic;
|
||||
Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor);
|
||||
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
552
thirdparty/embree/kernels/builders/heuristic_binning.h
vendored
Normal file
552
thirdparty/embree/kernels/builders/heuristic_binning.h
vendored
Normal file
@@ -0,0 +1,552 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "priminfo.h"
|
||||
#include "priminfo_mb.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! mapping into bins */
|
||||
template<size_t BINS>
|
||||
struct BinMapping
|
||||
{
|
||||
public:
|
||||
__forceinline BinMapping() {}
|
||||
|
||||
/*! calculates the mapping */
|
||||
__forceinline BinMapping(size_t N, const BBox3fa& centBounds)
|
||||
{
|
||||
num = min(BINS,size_t(4.0f + 0.05f*N));
|
||||
assert(num >= 1);
|
||||
const vfloat4 eps = 1E-34f;
|
||||
const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
|
||||
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
|
||||
ofs = (vfloat4) centBounds.lower;
|
||||
}
|
||||
|
||||
/*! calculates the mapping */
|
||||
__forceinline BinMapping(const BBox3fa& centBounds)
|
||||
{
|
||||
num = BINS;
|
||||
const vfloat4 eps = 1E-34f;
|
||||
const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
|
||||
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
|
||||
ofs = (vfloat4) centBounds.lower;
|
||||
}
|
||||
|
||||
/*! calculates the mapping */
|
||||
template<typename PrimInfo>
|
||||
__forceinline BinMapping(const PrimInfo& pinfo)
|
||||
{
|
||||
const vfloat4 eps = 1E-34f;
|
||||
num = min(BINS,size_t(4.0f + 0.05f*pinfo.size()));
|
||||
const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
|
||||
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
|
||||
ofs = (vfloat4) pinfo.centBounds.lower;
|
||||
}
|
||||
|
||||
/*! returns number of bins */
|
||||
__forceinline size_t size() const { return num; }
|
||||
|
||||
/*! slower but safe binning */
|
||||
__forceinline Vec3ia bin(const Vec3fa& p) const
|
||||
{
|
||||
const vint4 i = floori((vfloat4(p)-ofs)*scale);
|
||||
assert(i[0] >= 0 && (size_t)i[0] < num);
|
||||
assert(i[1] >= 0 && (size_t)i[1] < num);
|
||||
assert(i[2] >= 0 && (size_t)i[2] < num);
|
||||
|
||||
// we clamp to handle corner cases that could calculate out of bounds bin
|
||||
return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
|
||||
}
|
||||
|
||||
/*! faster but unsafe binning */
|
||||
__forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {
|
||||
return Vec3ia(floori((vfloat4(p)-ofs)*scale));
|
||||
}
|
||||
|
||||
/*! faster but unsafe binning */
|
||||
template<typename PrimRef>
|
||||
__forceinline Vec3ia bin_unsafe(const PrimRef& p) const {
|
||||
return bin_unsafe(p.binCenter());
|
||||
}
|
||||
|
||||
/*! faster but unsafe binning */
|
||||
template<typename PrimRef, typename BinBoundsAndCenter>
|
||||
__forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {
|
||||
return bin_unsafe(binBoundsAndCenter.binCenter(p));
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline bool bin_unsafe(const PrimRef& ref,
|
||||
const vint4& vSplitPos,
|
||||
const vbool4& splitDimMask) const // FIXME: rename to isLeft
|
||||
{
|
||||
return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);
|
||||
}
|
||||
/*! calculates left spatial position of bin */
|
||||
__forceinline float pos(const size_t bin, const size_t dim) const {
|
||||
return madd(float(bin),1.0f / scale[dim],ofs[dim]);
|
||||
}
|
||||
|
||||
/*! returns true if the mapping is invalid in some dimension */
|
||||
__forceinline bool invalid(const size_t dim) const {
|
||||
return scale[dim] == 0.0f;
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {
|
||||
return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
size_t num;
|
||||
vfloat4 ofs,scale; //!< linear function that maps to bin ID
|
||||
};
|
||||
|
||||
/*! stores all information to perform some split */
|
||||
template<size_t BINS>
|
||||
struct BinSplit
|
||||
{
|
||||
enum
|
||||
{
|
||||
SPLIT_OBJECT = 0,
|
||||
SPLIT_FALLBACK = 1,
|
||||
SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already
|
||||
SPLIT_TEMPORAL = 2,
|
||||
SPLIT_GEOMID = 3,
|
||||
};
|
||||
|
||||
/*! construct an invalid split by default */
|
||||
__forceinline BinSplit()
|
||||
: sah(inf), dim(-1), pos(0), data(0) {}
|
||||
|
||||
__forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)
|
||||
: sah(sah), dim(dim), fpos(fpos), data(data) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)
|
||||
: sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}
|
||||
|
||||
/*! tests if this split is valid */
|
||||
__forceinline bool valid() const { return dim != -1; }
|
||||
|
||||
/*! calculates surface area heuristic for performing the split */
|
||||
__forceinline float splitSAH() const { return sah; }
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {
|
||||
return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
float sah; //!< SAH cost of the split
|
||||
int dim; //!< split dimension
|
||||
union { int pos; float fpos; }; //!< bin index for splitting
|
||||
unsigned int data; //!< extra optional split data
|
||||
BinMapping<BINS> mapping; //!< mapping into bins
|
||||
};
|
||||
|
||||
/*! stores extended information about the split */
|
||||
template<typename BBox>
|
||||
struct SplitInfoT
|
||||
{
|
||||
|
||||
__forceinline SplitInfoT () {}
|
||||
|
||||
__forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)
|
||||
: leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}
|
||||
|
||||
public:
|
||||
size_t leftCount,rightCount;
|
||||
BBox leftBounds,rightBounds;
|
||||
};
|
||||
|
||||
typedef SplitInfoT<BBox3fa> SplitInfo;
|
||||
typedef SplitInfoT<LBBox3fa> SplitInfo2;
|
||||
|
||||
/*! stores all binning information */
|
||||
template<size_t BINS, typename PrimRef, typename BBox>
|
||||
struct __aligned(64) BinInfoT
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef vbool4 vbool;
|
||||
typedef vint4 vint;
|
||||
typedef vfloat4 vfloat;
|
||||
|
||||
__forceinline BinInfoT() {
|
||||
}
|
||||
|
||||
__forceinline BinInfoT(EmptyTy) {
|
||||
clear();
|
||||
}
|
||||
|
||||
/*! bin access function */
|
||||
__forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }
|
||||
__forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }
|
||||
|
||||
__forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }
|
||||
__forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }
|
||||
|
||||
__forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }
|
||||
__forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }
|
||||
|
||||
/*! clears the bin info */
|
||||
__forceinline void clear()
|
||||
{
|
||||
for (size_t i=0; i<BINS; i++) {
|
||||
bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;
|
||||
counts(i) = vuint4(zero);
|
||||
}
|
||||
}
|
||||
|
||||
/*! bins an array of primitives */
|
||||
__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)
|
||||
{
|
||||
if (unlikely(N == 0)) return;
|
||||
size_t i;
|
||||
for (i=0; i<N-1; i+=2)
|
||||
{
|
||||
/*! map even and odd primitive to bin */
|
||||
BBox prim0; Vec3fa center0;
|
||||
prims[i+0].binBoundsAndCenter(prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
|
||||
BBox prim1; Vec3fa center1;
|
||||
prims[i+1].binBoundsAndCenter(prim1,center1);
|
||||
const vint4 bin1 = (vint4)mapping.bin(center1);
|
||||
|
||||
/*! increase bounds for bins for even primitive */
|
||||
const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0);
|
||||
const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0);
|
||||
const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0);
|
||||
const unsigned int s0 = (unsigned int)prims[i+0].size();
|
||||
counts(b00,0)+=s0;
|
||||
counts(b01,1)+=s0;
|
||||
counts(b02,2)+=s0;
|
||||
|
||||
/*! increase bounds of bins for odd primitive */
|
||||
const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1);
|
||||
const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1);
|
||||
const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1);
|
||||
const unsigned int s1 = (unsigned int)prims[i+1].size();
|
||||
counts(b10,0)+=s1;
|
||||
counts(b11,1)+=s1;
|
||||
counts(b12,2)+=s1;
|
||||
}
|
||||
/*! for uneven number of primitives */
|
||||
if (i < N)
|
||||
{
|
||||
/*! map primitive to bin */
|
||||
BBox prim0; Vec3fa center0;
|
||||
prims[i].binBoundsAndCenter(prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
|
||||
/*! increase bounds of bins */
|
||||
const unsigned int s0 = (unsigned int)prims[i].size();
|
||||
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
|
||||
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
|
||||
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
|
||||
}
|
||||
}
|
||||
|
||||
/*! bins an array of primitives */
|
||||
template<typename BinBoundsAndCenter>
|
||||
__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
|
||||
{
|
||||
if (N == 0) return;
|
||||
|
||||
size_t i;
|
||||
for (i=0; i<N-1; i+=2)
|
||||
{
|
||||
/*! map even and odd primitive to bin */
|
||||
BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);
|
||||
const vint4 bin1 = (vint4)mapping.bin(center1);
|
||||
|
||||
/*! increase bounds for bins for even primitive */
|
||||
const unsigned int s0 = prims[i+0].size();
|
||||
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
|
||||
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
|
||||
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
|
||||
|
||||
/*! increase bounds of bins for odd primitive */
|
||||
const unsigned int s1 = prims[i+1].size();
|
||||
const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);
|
||||
const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);
|
||||
const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);
|
||||
}
|
||||
|
||||
/*! for uneven number of primitives */
|
||||
if (i < N)
|
||||
{
|
||||
/*! map primitive to bin */
|
||||
BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
|
||||
/*! increase bounds of bins */
|
||||
const unsigned int s0 = prims[i+0].size();
|
||||
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
|
||||
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
|
||||
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {
|
||||
bin(prims+begin,end-begin,mapping);
|
||||
}
|
||||
|
||||
template<typename BinBoundsAndCenter>
|
||||
__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {
|
||||
bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
__forceinline void merge (const BinInfoT& other, size_t numBins)
|
||||
{
|
||||
|
||||
for (size_t i=0; i<numBins; i++)
|
||||
{
|
||||
counts(i) += other.counts(i);
|
||||
bounds(i,0).extend(other.bounds(i,0));
|
||||
bounds(i,1).extend(other.bounds(i,1));
|
||||
bounds(i,2).extend(other.bounds(i,2));
|
||||
}
|
||||
}
|
||||
|
||||
/*! reduces binning information */
|
||||
static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)
|
||||
{
|
||||
BinInfoT c;
|
||||
for (size_t i=0; i<numBins; i++)
|
||||
{
|
||||
c.counts(i) = a.counts(i)+b.counts(i);
|
||||
c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));
|
||||
c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));
|
||||
c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/*! finds the best split by scanning binning information */
|
||||
__forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const
|
||||
{
|
||||
/* sweep from right to left and compute parallel prefix of merged bounds */
|
||||
vfloat4 rAreas[BINS];
|
||||
vuint4 rCounts[BINS];
|
||||
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
|
||||
for (size_t i=mapping.size()-1; i>0; i--)
|
||||
{
|
||||
count += counts(i);
|
||||
rCounts[i] = count;
|
||||
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
|
||||
rAreas[i][3] = 0.0f;
|
||||
}
|
||||
/* sweep from left to right and compute SAH */
|
||||
vuint4 blocks_add = (1 << blocks_shift)-1;
|
||||
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
|
||||
count = 0; bx = empty; by = empty; bz = empty;
|
||||
for (size_t i=1; i<mapping.size(); i++, ii+=1)
|
||||
{
|
||||
count += counts(i-1);
|
||||
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
|
||||
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
|
||||
const vfloat4 rArea = rAreas[i];
|
||||
const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.
|
||||
const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
|
||||
const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
|
||||
//const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
|
||||
|
||||
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
|
||||
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
|
||||
}
|
||||
|
||||
/* find best dimension */
|
||||
float bestSAH = inf;
|
||||
int bestDim = -1;
|
||||
int bestPos = 0;
|
||||
for (int dim=0; dim<3; dim++)
|
||||
{
|
||||
/* ignore zero sized dimensions */
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
/* test if this is a better dimension */
|
||||
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
|
||||
bestDim = dim;
|
||||
bestPos = vbestPos[dim];
|
||||
bestSAH = vbestSAH[dim];
|
||||
}
|
||||
}
|
||||
return Split(bestSAH,bestDim,bestPos,mapping);
|
||||
}
|
||||
|
||||
/*! finds the best split by scanning binning information */
|
||||
__forceinline Split best_block_size(const BinMapping<BINS>& mapping, const size_t blockSize) const
|
||||
{
|
||||
/* sweep from right to left and compute parallel prefix of merged bounds */
|
||||
vfloat4 rAreas[BINS];
|
||||
vuint4 rCounts[BINS];
|
||||
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
|
||||
for (size_t i=mapping.size()-1; i>0; i--)
|
||||
{
|
||||
count += counts(i);
|
||||
rCounts[i] = count;
|
||||
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
|
||||
rAreas[i][3] = 0.0f;
|
||||
}
|
||||
/* sweep from left to right and compute SAH */
|
||||
vuint4 blocks_add = blockSize-1;
|
||||
vfloat4 blocks_factor = 1.0f/float(blockSize);
|
||||
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
|
||||
count = 0; bx = empty; by = empty; bz = empty;
|
||||
for (size_t i=1; i<mapping.size(); i++, ii+=1)
|
||||
{
|
||||
count += counts(i-1);
|
||||
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
|
||||
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
|
||||
const vfloat4 rArea = rAreas[i];
|
||||
const vfloat4 lCount = floor(vfloat4(count +blocks_add)*blocks_factor);
|
||||
const vfloat4 rCount = floor(vfloat4(rCounts[i]+blocks_add)*blocks_factor);
|
||||
const vfloat4 sah = madd(lArea,lCount,rArea*rCount);
|
||||
|
||||
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
|
||||
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
|
||||
}
|
||||
|
||||
/* find best dimension */
|
||||
float bestSAH = inf;
|
||||
int bestDim = -1;
|
||||
int bestPos = 0;
|
||||
for (int dim=0; dim<3; dim++)
|
||||
{
|
||||
/* ignore zero sized dimensions */
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
/* test if this is a better dimension */
|
||||
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
|
||||
bestDim = dim;
|
||||
bestPos = vbestPos[dim];
|
||||
bestSAH = vbestSAH[dim];
|
||||
}
|
||||
}
|
||||
return Split(bestSAH,bestDim,bestPos,mapping);
|
||||
}
|
||||
|
||||
/*! calculates extended split information */
|
||||
__forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
|
||||
{
|
||||
if (split.dim == -1) {
|
||||
new (&info) SplitInfoT<BBox>(0,empty,0,empty);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t leftCount = 0;
|
||||
BBox leftBounds = empty;
|
||||
for (size_t i=0; i<(size_t)split.pos; i++) {
|
||||
leftCount += counts(i,split.dim);
|
||||
leftBounds.extend(bounds(i,split.dim));
|
||||
}
|
||||
size_t rightCount = 0;
|
||||
BBox rightBounds = empty;
|
||||
for (size_t i=split.pos; i<mapping.size(); i++) {
|
||||
rightCount += counts(i,split.dim);
|
||||
rightBounds.extend(bounds(i,split.dim));
|
||||
}
|
||||
new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);
|
||||
}
|
||||
|
||||
/*! gets the number of primitives left of the split */
|
||||
__forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const
|
||||
{
|
||||
if (unlikely(split.dim == -1)) return -1;
|
||||
|
||||
size_t leftCount = 0;
|
||||
for (size_t i = 0; i < (size_t)split.pos; i++) {
|
||||
leftCount += counts(i, split.dim);
|
||||
}
|
||||
return leftCount;
|
||||
}
|
||||
|
||||
/*! gets the number of primitives right of the split */
|
||||
__forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const
|
||||
{
|
||||
if (unlikely(split.dim == -1)) return -1;
|
||||
|
||||
size_t rightCount = 0;
|
||||
for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
|
||||
rightCount += counts(i, split.dim);
|
||||
}
|
||||
return rightCount;
|
||||
}
|
||||
|
||||
private:
|
||||
BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
|
||||
vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins
|
||||
};
|
||||
}
|
||||
|
||||
template<typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)
|
||||
{
|
||||
if (likely(end-begin < parallelThreshold)) {
|
||||
binner.bin(prims,begin,end,mapping);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
|
||||
template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
|
||||
{
|
||||
if (likely(end-begin < parallelThreshold)) {
|
||||
binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
|
||||
template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)
|
||||
{
|
||||
if (!parallel) {
|
||||
binner.bin(prims,begin,end,mapping);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
|
||||
template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
|
||||
{
|
||||
if (!parallel) {
|
||||
binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
}
|
249
thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h
vendored
Normal file
249
thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
|
||||
{
|
||||
__forceinline PrimInfoRange () {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoRange(const PrimInfo& pinfo)
|
||||
: CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
|
||||
|
||||
__forceinline PrimInfoRange(EmptyTy)
|
||||
: CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
|
||||
|
||||
__forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
|
||||
|
||||
__forceinline PrimInfoRange (range<size_t> r, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), range<size_t>(r) {}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return expectedApproxHalfArea(geomBounds)*float(size());
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
|
||||
__forceinline range<size_t> get_range() const {
|
||||
return range<size_t>(begin(),end());
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeomBBox3fa::extend_primref(prim);
|
||||
_end++;
|
||||
}
|
||||
};
|
||||
|
||||
inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
const size_t begin = pinfo.begin();
|
||||
const size_t end = pinfo.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend_center2(prims[i]);
|
||||
new (&linfo) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend_center2(prims[i]);
|
||||
new (&rinfo) PrimInfoRange(center,end,right);
|
||||
}
|
||||
|
||||
template<typename Type, typename getTypeFunc>
|
||||
inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range<size_t> range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
CentGeomBBox3fa local_left(empty), local_right(empty);
|
||||
auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); };
|
||||
const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref);
|
||||
linfo = PrimInfoRange(make_range(range.begin(),center ),local_left);
|
||||
rinfo = PrimInfoRange(make_range(center ,range.end()),local_right);
|
||||
}
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRef, size_t BINS>
|
||||
struct HeuristicArrayBinningSAH
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
|
||||
typedef range<size_t> Set;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
__forceinline HeuristicArrayBinningSAH ()
|
||||
: prims(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicArrayBinningSAH (PrimRef* prims)
|
||||
: prims(prims) {}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
|
||||
{
|
||||
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
|
||||
return find_template<false>(pinfo,logBlockSize);
|
||||
else
|
||||
return find_template<true>(pinfo,logBlockSize);
|
||||
}
|
||||
|
||||
template<bool parallel>
|
||||
__forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<BINS> mapping(pinfo);
|
||||
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize)
|
||||
{
|
||||
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
|
||||
return find_block_size_template<false>(pinfo,blockSize);
|
||||
else
|
||||
return find_block_size_template<true>(pinfo,blockSize);
|
||||
}
|
||||
|
||||
template<bool parallel>
|
||||
__forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<BINS> mapping(pinfo);
|
||||
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
|
||||
return binner.best_block_size(mapping,blockSize);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
|
||||
split_template<false>(split,pinfo,linfo,rinfo);
|
||||
else
|
||||
split_template<true>(split,pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
template<bool parallel>
|
||||
__forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (!split.valid()) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
CentGeomBBox3fa local_left(empty);
|
||||
CentGeomBBox3fa local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const typename Binner::vint vSplitPos(splitPos);
|
||||
const typename Binner::vbool vSplitMask(splitDimMask);
|
||||
auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
|
||||
|
||||
size_t center = 0;
|
||||
if (!parallel)
|
||||
center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
|
||||
else
|
||||
center = parallel_partitioning(
|
||||
prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
|
||||
[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
new (&lset) PrimInfoRange(begin,center,local_left);
|
||||
new (&rset) PrimInfoRange(center,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
}
|
||||
|
||||
void deterministic_order(const PrimInfoRange& pinfo)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) {
|
||||
performFallbackSplit(prims,pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
assert(range.size() > 1);
|
||||
CentGeomBBox3fa left(empty);
|
||||
CentGeomBBox3fa right(empty);
|
||||
unsigned int geomID = prims[range.begin()].geomID();
|
||||
size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,
|
||||
[&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },
|
||||
[ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });
|
||||
|
||||
new (&linfo) PrimInfoRange(range.begin(),center,left);
|
||||
new (&rinfo) PrimInfoRange(center,range.end(),right);
|
||||
}
|
||||
|
||||
private:
|
||||
PrimRef* const prims;
|
||||
};
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRefMB, size_t BINS>
|
||||
struct HeuristicArrayBinningMB
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef typename PrimRefMB::BBox BBox;
|
||||
typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set, const size_t logBlockSize)
|
||||
{
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<BINS> mapping(set.size(),set.centBounds);
|
||||
bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);
|
||||
Split osplit = binner.best(mapping,logBlockSize);
|
||||
osplit.sah *= set.time_range.size();
|
||||
if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
|
||||
return osplit;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfoMB left = empty;
|
||||
PrimInfoMB right = empty;
|
||||
const vint4 vSplitPos(split.pos);
|
||||
const vbool4 vSplitMask(1 << split.dim);
|
||||
auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };
|
||||
auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
|
||||
auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
|
||||
size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
|
||||
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
}
|
||||
}
|
302
thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h
vendored
Normal file
302
thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h
vendored
Normal file
@@ -0,0 +1,302 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRef, size_t BINS>
|
||||
struct UnalignedHeuristicArrayBinningSAH
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
|
||||
typedef range<size_t> Set;
|
||||
|
||||
__forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required?
|
||||
: scene(nullptr), prims(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims)
|
||||
: scene(scene), prims(prims) {}
|
||||
|
||||
const LinearSpace3fa computeAlignedSpace(const range<size_t>& set)
|
||||
{
|
||||
Vec3fa axis(0,0,1);
|
||||
uint64_t bestGeomPrimID = -1;
|
||||
|
||||
/*! find curve with minimum ID that defines valid direction */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const unsigned int geomID = prims[i].geomID();
|
||||
const unsigned int primID = prims[i].primID();
|
||||
const uint64_t geomprimID = prims[i].ID64();
|
||||
if (geomprimID >= bestGeomPrimID) continue;
|
||||
const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID);
|
||||
if (sqr_length(axis1) > 1E-18f) {
|
||||
axis = normalize(axis1);
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
return frame(axis).transposed();
|
||||
}
|
||||
|
||||
const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space)
|
||||
{
|
||||
auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
|
||||
{
|
||||
CentGeomBBox3fa bounds(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
Geometry* mesh = scene->get(prims[i].geomID());
|
||||
bounds.extend(mesh->vbounds(space,prims[i].primID()));
|
||||
}
|
||||
return bounds;
|
||||
};
|
||||
|
||||
const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096),
|
||||
CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
|
||||
|
||||
return PrimInfo(set.begin(),set.end(),bounds);
|
||||
}
|
||||
|
||||
struct BinBoundsAndCenter
|
||||
{
|
||||
__forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space)
|
||||
: scene(scene), space(space) {}
|
||||
|
||||
/*! returns center for binning */
|
||||
__forceinline Vec3fa binCenter(const PrimRef& ref) const
|
||||
{
|
||||
Geometry* mesh = (Geometry*) scene->get(ref.geomID());
|
||||
BBox3fa bounds = mesh->vbounds(space,ref.primID());
|
||||
return embree::center2(bounds);
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
Geometry* mesh = (Geometry*) scene->get(ref.geomID());
|
||||
BBox3fa bounds = mesh->vbounds(space,ref.primID());
|
||||
bounds_o = bounds;
|
||||
center_o = embree::center2(bounds);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
const LinearSpace3fa space;
|
||||
};
|
||||
|
||||
/*! finds the best split */
|
||||
__forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space)
|
||||
{
|
||||
if (likely(pinfo.size() < 10000))
|
||||
return find_template<false>(pinfo,logBlockSize,space);
|
||||
else
|
||||
return find_template<true>(pinfo,logBlockSize,space);
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
template<bool parallel>
|
||||
const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<BINS> mapping(set);
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,space);
|
||||
bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (likely(set.size() < 10000))
|
||||
split_template<false>(split,space,set,lset,rset);
|
||||
else
|
||||
split_template<true>(split,space,set,lset,rset);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
template<bool parallel>
|
||||
__forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (!split.valid()) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
CentGeomBBox3fa local_left(empty);
|
||||
CentGeomBBox3fa local_right(empty);
|
||||
const int splitPos = split.pos;
|
||||
const int splitDim = split.dim;
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,space);
|
||||
|
||||
size_t center = 0;
|
||||
if (likely(set.size() < 10000))
|
||||
center = serial_partitioning(prims,begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
|
||||
else
|
||||
center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right,
|
||||
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
|
||||
[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
|
||||
128);
|
||||
|
||||
new (&lset) PrimInfoRange(begin,center,local_left);
|
||||
new (&rset) PrimInfoRange(center,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
}
|
||||
|
||||
void deterministic_order(const range<size_t>& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims[set.begin()],&prims[set.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend_center2(prims[i]);
|
||||
new (&lset) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend_center2(prims[i]);
|
||||
new (&rset) PrimInfoRange(center,end,right);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* const scene;
|
||||
PrimRef* const prims;
|
||||
};
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRefMB, size_t BINS>
|
||||
struct UnalignedHeuristicArrayBinningMB
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef typename PrimRefMB::BBox BBox;
|
||||
typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
UnalignedHeuristicArrayBinningMB(Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set)
|
||||
{
|
||||
Vec3fa axis0(0,0,1);
|
||||
uint64_t bestGeomPrimID = -1;
|
||||
|
||||
/*! find curve with minimum ID that defines valid direction */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const PrimRefMB& prim = (*set.prims)[i];
|
||||
const unsigned int geomID = prim.geomID();
|
||||
const unsigned int primID = prim.primID();
|
||||
const uint64_t geomprimID = prim.ID64();
|
||||
if (geomprimID >= bestGeomPrimID) continue;
|
||||
|
||||
const Geometry* mesh = scene->get(geomID);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(set.time_range);
|
||||
if (tbounds.size() == 0) continue;
|
||||
|
||||
const size_t t = (tbounds.begin()+tbounds.end())/2;
|
||||
const Vec3fa axis1 = mesh->computeDirection(primID,t);
|
||||
if (sqr_length(axis1) > 1E-18f) {
|
||||
axis0 = normalize(axis1);
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
|
||||
return frame(axis0).transposed();
|
||||
}
|
||||
|
||||
struct BinBoundsAndCenter
|
||||
{
|
||||
__forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space)
|
||||
: scene(scene), time_range(time_range), space(space) {}
|
||||
|
||||
/*! returns center for binning */
|
||||
template<typename PrimRef>
|
||||
__forceinline Vec3fa binCenter(const PrimRef& ref) const
|
||||
{
|
||||
Geometry* mesh = scene->get(ref.geomID());
|
||||
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
|
||||
return center2(lbounds.interpolate(0.5f));
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
|
||||
{
|
||||
Geometry* mesh = scene->get(ref.geomID());
|
||||
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
|
||||
bounds_o = lbounds.interpolate(0.5f);
|
||||
center_o = center2(bounds_o);
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
|
||||
{
|
||||
Geometry* mesh = scene->get(ref.geomID());
|
||||
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
|
||||
bounds_o = lbounds;
|
||||
center_o = center2(lbounds.interpolate(0.5f));
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
BBox1f time_range;
|
||||
const LinearSpace3fa space;
|
||||
};
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space)
|
||||
{
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<BINS> mapping(set.size(),set.centBounds);
|
||||
bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter);
|
||||
Split osplit = binner.best(mapping,logBlockSize);
|
||||
osplit.sah *= set.time_range.size();
|
||||
if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
|
||||
return osplit;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfoMB left = empty;
|
||||
PrimInfoMB right = empty;
|
||||
const vint4 vSplitPos(split.pos);
|
||||
const vbool4 vSplitMask(1 << split.dim);
|
||||
auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); };
|
||||
auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
|
||||
auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
|
||||
size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
|
||||
new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
};
|
||||
}
|
||||
}
|
443
thirdparty/embree/kernels/builders/heuristic_openmerge_array.h
vendored
Normal file
443
thirdparty/embree/kernels/builders/heuristic_openmerge_array.h
vendored
Normal file
@@ -0,0 +1,443 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// TODO:
|
||||
// - adjust parallel build thresholds
|
||||
// - openNodesBasedOnExtend should consider max extended size
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
#include "heuristic_spatial.h"
|
||||
|
||||
/* stop opening of all bref.geomIDs are the same */
|
||||
#define EQUAL_GEOMID_STOP_CRITERIA 1
|
||||
|
||||
/* 10% spatial extend threshold */
|
||||
#define MAX_EXTEND_THRESHOLD 0.1f
|
||||
|
||||
/* maximum is 8 children */
|
||||
#define MAX_OPENED_CHILD_NODES 8
|
||||
|
||||
/* open until all build refs are below threshold size in one step */
|
||||
#define USE_LOOP_OPENING 0
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
|
||||
struct HeuristicArrayOpenMergeSAH
|
||||
{
|
||||
typedef BinSplit<OBJECT_BINS> Split;
|
||||
typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
static const size_t MOVE_STEP_SIZE = 64;
|
||||
static const size_t CREATE_SPLITS_STEP_SIZE = 128;
|
||||
|
||||
__forceinline HeuristicArrayOpenMergeSAH ()
|
||||
: prims0(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
|
||||
: prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
|
||||
{
|
||||
assert(max_open_size <= MAX_OPENED_CHILD_NODES);
|
||||
}
|
||||
|
||||
struct OpenHeuristic
|
||||
{
|
||||
__forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
|
||||
{
|
||||
const Vec3fa diag = pinfo.geomBounds.size();
|
||||
dim = maxDim(diag);
|
||||
assert(diag[dim] > 0.0f);
|
||||
inv_max_extend = 1.0f / diag[dim];
|
||||
}
|
||||
|
||||
__forceinline bool operator () ( PrimRef& prim ) const {
|
||||
return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t dim;
|
||||
float inv_max_extend;
|
||||
};
|
||||
|
||||
/*! compute extended ranges */
|
||||
__forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
|
||||
{
|
||||
assert(set.ext_range_size() > 0);
|
||||
const float left_factor = (float)lweight / (lweight + rweight);
|
||||
const size_t ext_range_size = set.ext_range_size();
|
||||
const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
|
||||
const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
|
||||
lset.set_ext_range(lset.end() + left_ext_range_size);
|
||||
rset.set_ext_range(rset.end() + right_ext_range_size);
|
||||
}
|
||||
|
||||
/*! move ranges */
|
||||
__forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t left_ext_range_size = lset.ext_range_size();
|
||||
const size_t right_size = rset.size();
|
||||
|
||||
/* has the left child an extended range? */
|
||||
if (left_ext_range_size > 0)
|
||||
{
|
||||
/* left extended range smaller than right range ? */
|
||||
if (left_ext_range_size < right_size)
|
||||
{
|
||||
/* only move a small part of the beginning of the right range to the end */
|
||||
parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+right_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no overlap, move entire right range to new location, can be made fully parallel */
|
||||
parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+left_ext_range_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
/* update right range */
|
||||
assert(rset.ext_end() + left_ext_range_size == set.ext_end());
|
||||
rset.move_right(left_ext_range_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* estimates the extra space required when opening, and checks if all primitives are from same geometry */
|
||||
__noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
|
||||
{
|
||||
const OpenHeuristic heuristic(set);
|
||||
const unsigned int geomID = prims0[set.begin()].geomID();
|
||||
|
||||
auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
|
||||
bool commonGeomID = true;
|
||||
size_t opens = 0;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
commonGeomID &= prims0[i].geomID() == geomID;
|
||||
if (heuristic(prims0[i]))
|
||||
opens += prims0[i].node.getN()-1; // coarse approximation
|
||||
}
|
||||
return std::pair<size_t,bool>(opens,commonGeomID);
|
||||
};
|
||||
auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
|
||||
return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
|
||||
};
|
||||
return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
|
||||
}
|
||||
|
||||
// FIXME: should consider maximum available extended size
|
||||
__noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
|
||||
{
|
||||
const OpenHeuristic heuristic(set);
|
||||
const size_t ext_range_start = set.end();
|
||||
|
||||
if (false && set.size() < PARALLEL_THRESHOLD)
|
||||
{
|
||||
size_t extra_elements = 0;
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
if (heuristic(prims0[i]))
|
||||
{
|
||||
PrimRef tmp[MAX_OPENED_CHILD_NODES];
|
||||
const size_t n = nodeOpenerFunc(prims0[i],tmp);
|
||||
assert(extra_elements + n-1 <= set.ext_range_size());
|
||||
for (size_t j=0; j<n; j++)
|
||||
set.extend_center2(tmp[j]);
|
||||
|
||||
prims0[i] = tmp[0];
|
||||
for (size_t j=1; j<n; j++)
|
||||
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
|
||||
extra_elements += n-1;
|
||||
}
|
||||
}
|
||||
set._end += extra_elements;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::atomic<size_t> ext_elements;
|
||||
ext_elements.store(0);
|
||||
PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
|
||||
PrimInfo info(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
if (heuristic(prims0[i]))
|
||||
{
|
||||
PrimRef tmp[MAX_OPENED_CHILD_NODES];
|
||||
const size_t n = nodeOpenerFunc(prims0[i],tmp);
|
||||
const size_t ID = ext_elements.fetch_add(n-1);
|
||||
assert(ID + n-1 <= set.ext_range_size());
|
||||
|
||||
for (size_t j=0; j<n; j++)
|
||||
info.extend_center2(tmp[j]);
|
||||
|
||||
prims0[i] = tmp[0];
|
||||
for (size_t j=1; j<n; j++)
|
||||
prims0[ext_range_start+ID+j-1] = tmp[j];
|
||||
}
|
||||
return info;
|
||||
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
|
||||
set.centBounds.extend(info.centBounds);
|
||||
assert(ext_elements.load() <= set.ext_range_size());
|
||||
set._end += ext_elements.load();
|
||||
}
|
||||
}
|
||||
|
||||
__noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
|
||||
{
|
||||
const OpenHeuristic heuristic(set);
|
||||
size_t next_iteration_extra_elements = est_new_elements;
|
||||
|
||||
while (next_iteration_extra_elements <= set.ext_range_size())
|
||||
{
|
||||
next_iteration_extra_elements = 0;
|
||||
size_t extra_elements = 0;
|
||||
const size_t ext_range_start = set.end();
|
||||
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
if (heuristic(prims0[i]))
|
||||
{
|
||||
PrimRef tmp[MAX_OPENED_CHILD_NODES];
|
||||
const size_t n = nodeOpenerFunc(prims0[i],tmp);
|
||||
assert(extra_elements + n-1 <= set.ext_range_size());
|
||||
for (size_t j=0;j<n;j++)
|
||||
set.extend_center2(tmp[j]);
|
||||
|
||||
prims0[i] = tmp[0];
|
||||
for (size_t j=1;j<n;j++)
|
||||
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
|
||||
extra_elements += n-1;
|
||||
|
||||
for (size_t j=0; j<n; j++)
|
||||
if (heuristic(tmp[j]))
|
||||
next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
|
||||
|
||||
}
|
||||
}
|
||||
assert( extra_elements <= set.ext_range_size());
|
||||
set._end += extra_elements;
|
||||
|
||||
for (size_t i=set.begin();i<set.end();i++)
|
||||
assert(prims0[i].numPrimitives() > 0);
|
||||
|
||||
if (unlikely(next_iteration_extra_elements == 0)) break;
|
||||
}
|
||||
}
|
||||
|
||||
__noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
/* single element */
|
||||
if (set.size() <= 1)
|
||||
return Split();
|
||||
|
||||
/* disable opening if there is no overlap */
|
||||
const size_t D = 4;
|
||||
if (unlikely(set.has_ext_range() && set.size() <= D))
|
||||
{
|
||||
bool disjoint = true;
|
||||
for (size_t j=set.begin(); j<set.end()-1; j++) {
|
||||
for (size_t i=set.begin()+1; i<set.end(); i++) {
|
||||
if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
|
||||
disjoint = false; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (disjoint) set.set_ext_range(set.end()); /* disables opening */
|
||||
}
|
||||
|
||||
std::pair<size_t,bool> p(0,false);
|
||||
|
||||
/* disable opening when all primitives are from same geometry */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
p = getProperties(set);
|
||||
#if EQUAL_GEOMID_STOP_CRITERIA == 1
|
||||
if (p.second) set.set_ext_range(set.end()); /* disable opening */
|
||||
#endif
|
||||
}
|
||||
|
||||
/* open nodes when we have sufficient space available */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
#if USE_LOOP_OPENING == 1
|
||||
openNodesBasedOnExtendLoop(set,p.first);
|
||||
#else
|
||||
if (p.first <= set.ext_range_size())
|
||||
openNodesBasedOnExtend(set);
|
||||
#endif
|
||||
|
||||
/* disable opening when insufficient space for opening a node available */
|
||||
if (set.ext_range_size() < max_open_size-1)
|
||||
set.set_ext_range(set.end()); /* disable opening */
|
||||
}
|
||||
|
||||
/* find best split */
|
||||
return object_find(set,logBlockSize);
|
||||
}
|
||||
|
||||
|
||||
/*! finds the best object split */
|
||||
__forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
|
||||
{
|
||||
if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
|
||||
else return parallel_object_find (set,logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best object split */
|
||||
__noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
|
||||
binner.bin(prims0,set.begin(),set.end(),mapping);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
|
||||
const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
|
||||
auto body = [&] (const range<size_t>& r) -> Binner {
|
||||
Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
|
||||
};
|
||||
auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
|
||||
Binner r = b0; r.merge(b1,_mapping.size()); return r;
|
||||
};
|
||||
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
PrimInfoExtRange set = set_i;
|
||||
|
||||
/* valid split */
|
||||
if (unlikely(!split.valid())) {
|
||||
deterministic_order(set);
|
||||
splitFallback(set,lset,rset);
|
||||
return;
|
||||
}
|
||||
|
||||
std::pair<size_t,size_t> ext_weights(0,0);
|
||||
|
||||
/* object split */
|
||||
if (likely(set.size() < PARALLEL_THRESHOLD))
|
||||
ext_weights = sequential_object_split(split,set,lset,rset);
|
||||
else
|
||||
ext_weights = parallel_object_split(split,set,lset,rset);
|
||||
|
||||
/* if we have an extended range, set extended child ranges and move right split range */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo local_left(empty);
|
||||
PrimInfo local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
|
||||
size_t center = serial_partitioning(prims0,
|
||||
begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(local_left.size(),local_right.size());
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo left(empty);
|
||||
PrimInfo right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
|
||||
|
||||
const size_t center = parallel_partitioning(
|
||||
prims0,begin,end,EmptyTy(),left,right,isLeft,
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
|
||||
[] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
|
||||
return std::pair<size_t,size_t>(left.size(),right.size());
|
||||
}
|
||||
|
||||
void deterministic_order(const extended_range<size_t>& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims0[set.begin()],&prims0[set.end()]);
|
||||
}
|
||||
|
||||
__forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
PrimInfo left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.add_center2(prims0[i]);
|
||||
|
||||
const size_t lweight = left.end;
|
||||
|
||||
PrimInfo right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.add_center2(prims0[i]);
|
||||
|
||||
const size_t rweight = right.end;
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
/* if we have an extended range */
|
||||
if (set.has_ext_range())
|
||||
{
|
||||
setExtentedRanges(set,lset,rset,lweight,rweight);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
PrimRef* const prims0;
|
||||
const NodeOpenerFunc& nodeOpenerFunc;
|
||||
size_t max_open_size;
|
||||
};
|
||||
}
|
||||
}
|
366
thirdparty/embree/kernels/builders/heuristic_spatial.h
vendored
Normal file
366
thirdparty/embree/kernels/builders/heuristic_spatial.h
vendored
Normal file
@@ -0,0 +1,366 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "priminfo.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
|
||||
|
||||
namespace isa
|
||||
{
|
||||
|
||||
/*! mapping into bins */
|
||||
template<size_t BINS>
|
||||
struct SpatialBinMapping
|
||||
{
|
||||
public:
|
||||
__forceinline SpatialBinMapping() {}
|
||||
|
||||
/*! calculates the mapping */
|
||||
__forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
|
||||
{
|
||||
const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
|
||||
const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
|
||||
const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
|
||||
const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
|
||||
scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
|
||||
ofs = (vfloat4) pinfo.geomBounds.lower;
|
||||
inv_scale = 1.0f / scale;
|
||||
}
|
||||
|
||||
/*! slower but safe binning */
|
||||
__forceinline vint4 bin(const Vec3fa& p) const
|
||||
{
|
||||
const vint4 i = floori((vfloat4(p)-ofs)*scale);
|
||||
return clamp(i,vint4(0),vint4(BINS-1));
|
||||
}
|
||||
|
||||
__forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
|
||||
{
|
||||
#if defined(__AVX__)
|
||||
const vfloat8 ofs8(ofs);
|
||||
const vfloat8 scale8(scale);
|
||||
const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
|
||||
const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
|
||||
return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
|
||||
#else
|
||||
const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
|
||||
const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
|
||||
const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
|
||||
const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
|
||||
return std::pair<vint4,vint4>(c_lower,c_upper);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*! calculates left spatial position of bin */
|
||||
__forceinline float pos(const size_t bin, const size_t dim) const {
|
||||
return madd(float(bin),inv_scale[dim],ofs[dim]);
|
||||
}
|
||||
|
||||
/*! calculates left spatial position of bin */
|
||||
template<size_t N>
|
||||
__forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
|
||||
return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
|
||||
}
|
||||
|
||||
/*! returns true if the mapping is invalid in some dimension */
|
||||
__forceinline bool invalid(const size_t dim) const {
|
||||
return scale[dim] == 0.0f;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
|
||||
};
|
||||
|
||||
/*! stores all information required to perform some split */
|
||||
template<size_t BINS>
|
||||
struct SpatialBinSplit
|
||||
{
|
||||
/*! construct an invalid split by default */
|
||||
__forceinline SpatialBinSplit()
|
||||
: sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
|
||||
: sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
|
||||
: sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
|
||||
|
||||
/*! tests if this split is valid */
|
||||
__forceinline bool valid() const { return dim != -1; }
|
||||
|
||||
/*! calculates surface area heuristic for performing the split */
|
||||
__forceinline float splitSAH() const { return sah; }
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
|
||||
return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
float sah; //!< SAH cost of the split
|
||||
int dim; //!< split dimension
|
||||
int pos; //!< split position
|
||||
int left; //!< number of elements on the left side
|
||||
int right; //!< number of elements on the right side
|
||||
float factor; //!< factor splitting the extended range
|
||||
SpatialBinMapping<BINS> mapping; //!< mapping into bins
|
||||
};
|
||||
|
||||
/*! stores all binning information */
|
||||
template<size_t BINS, typename PrimRef>
|
||||
struct __aligned(64) SpatialBinInfo
|
||||
{
|
||||
SpatialBinInfo() {
|
||||
}
|
||||
|
||||
__forceinline SpatialBinInfo(EmptyTy) {
|
||||
clear();
|
||||
}
|
||||
|
||||
/*! clears the bin info */
|
||||
__forceinline void clear()
|
||||
{
|
||||
for (size_t i=0; i<BINS; i++) {
|
||||
bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
|
||||
numBegin[i] = numEnd[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*! adds binning data */
|
||||
__forceinline void add(const size_t dim,
|
||||
const size_t beginID,
|
||||
const size_t endID,
|
||||
const size_t binID,
|
||||
const BBox3fa &b,
|
||||
const size_t n = 1)
|
||||
{
|
||||
assert(beginID < BINS);
|
||||
assert(endID < BINS);
|
||||
assert(binID < BINS);
|
||||
|
||||
numBegin[beginID][dim]+=(unsigned int)n;
|
||||
numEnd [endID][dim]+=(unsigned int)n;
|
||||
bounds [binID][dim].extend(b);
|
||||
}
|
||||
|
||||
/*! extends binning bounds */
|
||||
__forceinline void extend(const size_t dim,
|
||||
const size_t binID,
|
||||
const BBox3fa &b)
|
||||
{
|
||||
assert(binID < BINS);
|
||||
bounds [binID][dim].extend(b);
|
||||
}
|
||||
|
||||
/*! bins an array of primitives */
|
||||
template<typename PrimitiveSplitterFactory>
|
||||
__forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
|
||||
{
|
||||
for (size_t i=begin; i<end; i++)
|
||||
{
|
||||
const PrimRef& prim = source[i];
|
||||
unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
|
||||
if (unlikely(splits <= 1))
|
||||
{
|
||||
const vint4 bin = mapping.bin(center(prim.bounds()));
|
||||
for (size_t dim=0; dim<3; dim++)
|
||||
{
|
||||
assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
|
||||
add(dim,bin[dim],bin[dim],bin[dim],prim.bounds());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const vint4 bin0 = mapping.bin(prim.bounds().lower);
|
||||
const vint4 bin1 = mapping.bin(prim.bounds().upper);
|
||||
|
||||
for (size_t dim=0; dim<3; dim++)
|
||||
{
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
size_t bin;
|
||||
size_t l = bin0[dim];
|
||||
size_t r = bin1[dim];
|
||||
|
||||
// same bin optimization
|
||||
if (likely(l == r))
|
||||
{
|
||||
add(dim,l,l,l,prim.bounds());
|
||||
continue;
|
||||
}
|
||||
size_t bin_start = bin0[dim];
|
||||
size_t bin_end = bin1[dim];
|
||||
BBox3fa rest = prim.bounds();
|
||||
|
||||
/* assure that split position always overlaps the primitive bounds */
|
||||
while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++;
|
||||
while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--;
|
||||
|
||||
const auto splitter = splitterFactory(prim);
|
||||
for (bin=bin_start; bin<bin_end; bin++)
|
||||
{
|
||||
const float pos = mapping.pos(bin+1,dim);
|
||||
BBox3fa left,right;
|
||||
splitter(rest,dim,pos,left,right);
|
||||
|
||||
if (unlikely(left.empty())) l++;
|
||||
extend(dim,bin,left);
|
||||
rest = right;
|
||||
}
|
||||
if (unlikely(rest.empty())) r--;
|
||||
add(dim,l,r,bin,rest);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*! bins an array of primitives */
|
||||
__forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
|
||||
{
|
||||
for (size_t i=begin; i<end; i++)
|
||||
{
|
||||
const PrimRef &prim = source[i];
|
||||
const vint4 bin0 = mapping.bin(prim.bounds().lower);
|
||||
const vint4 bin1 = mapping.bin(prim.bounds().upper);
|
||||
|
||||
for (size_t dim=0; dim<3; dim++)
|
||||
{
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
const size_t l = bin0[dim];
|
||||
const size_t r = bin1[dim];
|
||||
|
||||
const unsigned int n = prim.primID();
|
||||
|
||||
// same bin optimization
|
||||
if (likely(l == r))
|
||||
{
|
||||
add(dim,l,l,l,prim.bounds(),n);
|
||||
continue;
|
||||
}
|
||||
const size_t bin_start = bin0[dim];
|
||||
const size_t bin_end = bin1[dim];
|
||||
for (size_t bin=bin_start; bin<bin_end; bin++)
|
||||
add(dim,l,r,bin,prim.bounds(),n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
void merge (const SpatialBinInfo& other)
|
||||
{
|
||||
for (size_t i=0; i<BINS; i++)
|
||||
{
|
||||
numBegin[i] += other.numBegin[i];
|
||||
numEnd [i] += other.numEnd [i];
|
||||
bounds[i][0].extend(other.bounds[i][0]);
|
||||
bounds[i][1].extend(other.bounds[i][1]);
|
||||
bounds[i][2].extend(other.bounds[i][2]);
|
||||
}
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
|
||||
{
|
||||
SpatialBinInfo c(empty);
|
||||
for (size_t i=0; i<BINS; i++)
|
||||
{
|
||||
c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
|
||||
c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
|
||||
c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
|
||||
c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
|
||||
c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/*! finds the best split by scanning binning information */
|
||||
SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
|
||||
{
|
||||
/* sweep from right to left and compute parallel prefix of merged bounds */
|
||||
vfloat4 rAreas[BINS];
|
||||
vuint4 rCounts[BINS];
|
||||
vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
|
||||
for (size_t i=BINS-1; i>0; i--)
|
||||
{
|
||||
count += numEnd[i];
|
||||
rCounts[i] = count;
|
||||
bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
|
||||
by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
|
||||
bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
|
||||
rAreas[i][3] = 0.0f;
|
||||
}
|
||||
|
||||
/* sweep from left to right and compute SAH */
|
||||
vuint4 blocks_add = (1 << blocks_shift)-1;
|
||||
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
|
||||
count = 0; bx = empty; by = empty; bz = empty;
|
||||
for (size_t i=1; i<BINS; i++, ii+=1)
|
||||
{
|
||||
count += numBegin[i-1];
|
||||
bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
|
||||
by.extend(bounds[i-1][1]); float Ay = halfArea(by);
|
||||
bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
|
||||
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
|
||||
const vfloat4 rArea = rAreas[i];
|
||||
const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
|
||||
const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
|
||||
const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
|
||||
// const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
|
||||
const vbool4 mask = sah < vbestSAH;
|
||||
vbestPos = select(mask,ii ,vbestPos);
|
||||
vbestSAH = select(mask,sah,vbestSAH);
|
||||
vbestlCount = select(mask,count,vbestlCount);
|
||||
vbestrCount = select(mask,rCounts[i],vbestrCount);
|
||||
}
|
||||
|
||||
/* find best dimension */
|
||||
float bestSAH = inf;
|
||||
int bestDim = -1;
|
||||
int bestPos = 0;
|
||||
unsigned int bestlCount = 0;
|
||||
unsigned int bestrCount = 0;
|
||||
for (int dim=0; dim<3; dim++)
|
||||
{
|
||||
/* ignore zero sized dimensions */
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
/* test if this is a better dimension */
|
||||
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
|
||||
bestDim = dim;
|
||||
bestPos = vbestPos[dim];
|
||||
bestSAH = vbestSAH[dim];
|
||||
bestlCount = vbestlCount[dim];
|
||||
bestrCount = vbestrCount[dim];
|
||||
}
|
||||
}
|
||||
assert(bestSAH >= 0.0f);
|
||||
|
||||
/* return invalid split if no split found */
|
||||
if (bestDim == -1)
|
||||
return SpatialBinSplit<BINS>(inf,-1,0,mapping);
|
||||
|
||||
/* return best found split */
|
||||
return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
|
||||
}
|
||||
|
||||
private:
|
||||
BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
|
||||
vuint4 numBegin[BINS]; //!< number of primitives starting in bin
|
||||
vuint4 numEnd[BINS]; //!< number of primitives ending in bin
|
||||
};
|
||||
}
|
||||
}
|
||||
|
545
thirdparty/embree/kernels/builders/heuristic_spatial_array.h
vendored
Normal file
545
thirdparty/embree/kernels/builders/heuristic_spatial_array.h
vendored
Normal file
@@ -0,0 +1,545 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
#include "heuristic_spatial.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
#if 0
|
||||
#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f
|
||||
#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f
|
||||
#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f
|
||||
#else
|
||||
#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f
|
||||
#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f
|
||||
#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f
|
||||
#endif
|
||||
|
||||
struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t>
|
||||
{
|
||||
__forceinline PrimInfoExtRange() {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoExtRange(EmptyTy)
|
||||
: CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {}
|
||||
|
||||
__forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return expectedApproxHalfArea(geomBounds)*float(size());
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ObjectSplit, typename SpatialSplit>
|
||||
struct Split2
|
||||
{
|
||||
__forceinline Split2 () {}
|
||||
|
||||
__forceinline Split2 (const Split2& other)
|
||||
{
|
||||
spatial = other.spatial;
|
||||
sah = other.sah;
|
||||
if (spatial) spatialSplit() = other.spatialSplit();
|
||||
else objectSplit() = other.objectSplit();
|
||||
}
|
||||
|
||||
__forceinline Split2& operator= (const Split2& other)
|
||||
{
|
||||
spatial = other.spatial;
|
||||
sah = other.sah;
|
||||
if (spatial) spatialSplit() = other.spatialSplit();
|
||||
else objectSplit() = other.objectSplit();
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; }
|
||||
__forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; }
|
||||
|
||||
__forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; }
|
||||
__forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; }
|
||||
|
||||
__forceinline Split2 (const ObjectSplit& objectSplit, float sah)
|
||||
: spatial(false), sah(sah)
|
||||
{
|
||||
new (data) ObjectSplit(objectSplit);
|
||||
}
|
||||
|
||||
__forceinline Split2 (const SpatialSplit& spatialSplit, float sah)
|
||||
: spatial(true), sah(sah)
|
||||
{
|
||||
new (data) SpatialSplit(spatialSplit);
|
||||
}
|
||||
|
||||
__forceinline float splitSAH() const {
|
||||
return sah;
|
||||
}
|
||||
|
||||
__forceinline bool valid() const {
|
||||
return sah < float(inf);
|
||||
}
|
||||
|
||||
public:
|
||||
__aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)];
|
||||
bool spatial;
|
||||
float sah;
|
||||
};
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS>
|
||||
struct HeuristicArraySpatialSAH
|
||||
{
|
||||
typedef BinSplit<OBJECT_BINS> ObjectSplit;
|
||||
typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner;
|
||||
|
||||
typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit;
|
||||
typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner;
|
||||
|
||||
//typedef extended_range<size_t> Set;
|
||||
typedef Split2<ObjectSplit,SpatialSplit> Split;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3*1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
static const size_t MOVE_STEP_SIZE = 64;
|
||||
static const size_t CREATE_SPLITS_STEP_SIZE = 64;
|
||||
|
||||
__forceinline HeuristicArraySpatialSAH ()
|
||||
: prims0(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info)
|
||||
: prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {}
|
||||
|
||||
|
||||
/*! compute extended ranges */
|
||||
__noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
|
||||
{
|
||||
assert(set.ext_range_size() > 0);
|
||||
const float left_factor = (float)lweight / (lweight + rweight);
|
||||
const size_t ext_range_size = set.ext_range_size();
|
||||
const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
|
||||
const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
|
||||
lset.set_ext_range(lset.end() + left_ext_range_size);
|
||||
rset.set_ext_range(rset.end() + right_ext_range_size);
|
||||
}
|
||||
|
||||
/*! move ranges */
|
||||
__noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t left_ext_range_size = lset.ext_range_size();
|
||||
const size_t right_size = rset.size();
|
||||
|
||||
/* has the left child an extended range? */
|
||||
if (left_ext_range_size > 0)
|
||||
{
|
||||
/* left extended range smaller than right range ? */
|
||||
if (left_ext_range_size < right_size)
|
||||
{
|
||||
/* only move a small part of the beginning of the right range to the end */
|
||||
parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+right_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no overlap, move entire right range to new location, can be made fully parallel */
|
||||
parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+left_ext_range_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
/* update right range */
|
||||
assert(rset.ext_end() + left_ext_range_size == set.ext_end());
|
||||
rset.move_right(left_ext_range_size);
|
||||
}
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
SplitInfo oinfo;
|
||||
const ObjectSplit object_split = object_find(set,logBlockSize,oinfo);
|
||||
const float object_split_sah = object_split.splitSAH();
|
||||
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds);
|
||||
|
||||
/* do only spatial splits if the child bounds overlap */
|
||||
if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) &&
|
||||
safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds))
|
||||
{
|
||||
const SpatialSplit spatial_split = spatial_find(set, logBlockSize);
|
||||
const float spatial_split_sah = spatial_split.splitSAH();
|
||||
|
||||
/* valid spatial split, better SAH and number of splits do not exceed extended range */
|
||||
if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah &&
|
||||
spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size())
|
||||
{
|
||||
return Split(spatial_split,spatial_split_sah);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Split(object_split,object_split_sah);
|
||||
}
|
||||
|
||||
/*! finds the best object split */
|
||||
__forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
|
||||
{
|
||||
if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info);
|
||||
else return parallel_object_find (set,logBlockSize,info);
|
||||
}
|
||||
|
||||
/*! finds the best object split */
|
||||
__noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
|
||||
{
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set);
|
||||
binner.bin(prims0,set.begin(),set.end(),mapping);
|
||||
ObjectSplit s = binner.best(mapping,logBlockSize);
|
||||
binner.getSplitInfo(mapping, s, info);
|
||||
return s;
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
|
||||
{
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set);
|
||||
const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
|
||||
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
|
||||
[&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; },
|
||||
[&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; });
|
||||
ObjectSplit s = binner.best(mapping,logBlockSize);
|
||||
binner.getSplitInfo(mapping, s, info);
|
||||
return s;
|
||||
}
|
||||
|
||||
/*! finds the best spatial split */
|
||||
__forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize);
|
||||
else return parallel_spatial_find (set, logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best spatial split */
|
||||
__noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
SpatialBinner binner(empty);
|
||||
const SpatialBinMapping<SPATIAL_BINS> mapping(set);
|
||||
binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);
|
||||
/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
|
||||
return binner.best(mapping,logBlockSize); //,set.ext_size());
|
||||
}
|
||||
|
||||
__noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
SpatialBinner binner(empty);
|
||||
const SpatialBinMapping<SPATIAL_BINS> mapping(set);
|
||||
const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
|
||||
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
|
||||
[&] (const range<size_t>& r) -> SpatialBinner {
|
||||
SpatialBinner binner(empty);
|
||||
binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);
|
||||
return binner; },
|
||||
[&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });
|
||||
/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
|
||||
return binner.best(mapping,logBlockSize); //,set.ext_size());
|
||||
}
|
||||
|
||||
|
||||
/*! subdivides primitives based on a spatial split */
|
||||
__noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping)
|
||||
{
|
||||
assert(set.has_ext_range());
|
||||
const size_t max_ext_range_size = set.ext_range_size();
|
||||
const size_t ext_range_start = set.end();
|
||||
|
||||
/* atomic counter for number of primref splits */
|
||||
std::atomic<size_t> ext_elements;
|
||||
ext_elements.store(0);
|
||||
|
||||
const float fpos = split.mapping.pos(split.pos,split.dim);
|
||||
|
||||
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
|
||||
parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin();i<r.end();i++)
|
||||
{
|
||||
const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
|
||||
if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
|
||||
|
||||
const int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
|
||||
const int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
|
||||
if (unlikely(bin0 < split.pos && bin1 >= split.pos))
|
||||
{
|
||||
assert(splits > 1);
|
||||
|
||||
PrimRef left,right;
|
||||
const auto splitter = splitterFactory(prims0[i]);
|
||||
splitter(prims0[i],split.dim,fpos,left,right);
|
||||
|
||||
// no empty splits
|
||||
if (unlikely(left.bounds().empty() || right.bounds().empty())) continue;
|
||||
|
||||
left.lower.u = (left.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
|
||||
const size_t ID = ext_elements.fetch_add(1);
|
||||
|
||||
/* break if the number of subdivided elements are greater than the maximum allowed size */
|
||||
if (unlikely(ID >= max_ext_range_size))
|
||||
break;
|
||||
|
||||
/* only write within the correct bounds */
|
||||
assert(ID < max_ext_range_size);
|
||||
prims0[i] = left;
|
||||
prims0[ext_range_start+ID] = right;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const size_t numExtElements = min(max_ext_range_size,ext_elements.load());
|
||||
assert(set.end()+numExtElements<=set.ext_end());
|
||||
set._end += numExtElements;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
PrimInfoExtRange set = set_i;
|
||||
|
||||
/* valid split */
|
||||
if (unlikely(!split.valid())) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
std::pair<size_t,size_t> ext_weights(0,0);
|
||||
|
||||
if (unlikely(split.spatial))
|
||||
{
|
||||
create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping);
|
||||
|
||||
/* spatial split */
|
||||
if (likely(set.size() < PARALLEL_THRESHOLD))
|
||||
ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset);
|
||||
else
|
||||
ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* object split */
|
||||
if (likely(set.size() < PARALLEL_THRESHOLD))
|
||||
ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset);
|
||||
else
|
||||
ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset);
|
||||
}
|
||||
|
||||
/* if we have an extended range, set extended child ranges and move right split range */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo local_left(empty);
|
||||
PrimInfo local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const typename ObjectBinner::vint vSplitPos(splitPos);
|
||||
const typename ObjectBinner::vbool vSplitMask(splitDimMask);
|
||||
size_t center = serial_partitioning(prims0,
|
||||
begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) {
|
||||
return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask);
|
||||
},
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
|
||||
const size_t left_weight = local_left.end;
|
||||
const size_t right_weight = local_right.end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,local_right);
|
||||
|
||||
assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
|
||||
assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo local_left(empty);
|
||||
PrimInfo local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
/* init spatial mapping */
|
||||
const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping;
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
|
||||
size_t center = serial_partitioning(prims0,
|
||||
begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) {
|
||||
const Vec3fa c = ref.bounds().center();
|
||||
return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);
|
||||
},
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
|
||||
|
||||
const size_t left_weight = local_left.end;
|
||||
const size_t right_weight = local_right.end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,local_right);
|
||||
assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
|
||||
assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo left(empty);
|
||||
PrimInfo right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const typename ObjectBinner::vint vSplitPos(splitPos);
|
||||
const typename ObjectBinner::vbool vSplitMask(splitDimMask);
|
||||
auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
|
||||
|
||||
const size_t center = parallel_partitioning(
|
||||
prims0,begin,end,EmptyTy(),left,right,isLeft,
|
||||
[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
|
||||
[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
const size_t left_weight = left.end;
|
||||
const size_t right_weight = right.end;
|
||||
|
||||
left.begin = begin; left.end = center;
|
||||
right.begin = center; right.end = end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
assert(area(left.geomBounds) >= 0.0f);
|
||||
assert(area(right.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo left(empty);
|
||||
PrimInfo right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
/* init spatial mapping */
|
||||
const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping;
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
|
||||
auto isLeft = [&] (const PrimRef &ref) {
|
||||
const Vec3fa c = ref.bounds().center();
|
||||
return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); };
|
||||
|
||||
const size_t center = parallel_partitioning(
|
||||
prims0,begin,end,EmptyTy(),left,right,isLeft,
|
||||
[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
|
||||
[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
const size_t left_weight = left.end;
|
||||
const size_t right_weight = right.end;
|
||||
|
||||
left.begin = begin; left.end = center;
|
||||
right.begin = center; right.end = end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
assert(area(left.geomBounds) >= 0.0f);
|
||||
assert(area(right.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
void deterministic_order(const PrimInfoExtRange& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims0[set.begin()],&prims0[set.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const PrimInfoExtRange& set,
|
||||
PrimInfoExtRange& lset,
|
||||
PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
PrimInfo left(empty);
|
||||
for (size_t i=begin; i<center; i++) {
|
||||
left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
}
|
||||
const size_t lweight = left.end;
|
||||
|
||||
PrimInfo right(empty);
|
||||
for (size_t i=center; i<end; i++) {
|
||||
right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
}
|
||||
const size_t rweight = right.end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
/* if we have an extended range */
|
||||
if (set.has_ext_range()) {
|
||||
setExtentedRanges(set,lset,rset,lweight,rweight);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
PrimRef* const prims0;
|
||||
const PrimitiveSplitterFactory& splitterFactory;
|
||||
const CentGeomBBox3fa& root_info;
|
||||
};
|
||||
}
|
||||
}
|
188
thirdparty/embree/kernels/builders/heuristic_strand_array.h
vendored
Normal file
188
thirdparty/embree/kernels/builders/heuristic_strand_array.h
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "priminfo.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
struct HeuristicStrandSplit
|
||||
{
|
||||
typedef range<size_t> Set;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 10000;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
|
||||
|
||||
/*! stores all information to perform some split */
|
||||
struct Split
|
||||
{
|
||||
/*! construct an invalid split by default */
|
||||
__forceinline Split()
|
||||
: sah(inf), axis0(zero), axis1(zero) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
|
||||
: sah(sah), axis0(axis0), axis1(axis1) {}
|
||||
|
||||
/*! calculates standard surface area heuristic for the split */
|
||||
__forceinline float splitSAH() const { return sah; }
|
||||
|
||||
/*! test if this split is valid */
|
||||
__forceinline bool valid() const { return sah != float(inf); }
|
||||
|
||||
public:
|
||||
float sah; //!< SAH cost of the split
|
||||
Vec3fa axis0, axis1; //!< axis the two strands are aligned into
|
||||
};
|
||||
|
||||
__forceinline HeuristicStrandSplit () // FIXME: required?
|
||||
: scene(nullptr), prims(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
|
||||
: scene(scene), prims(prims) {}
|
||||
|
||||
__forceinline const Vec3fa direction(const PrimRef& prim) {
|
||||
return scene->get(prim.geomID())->computeDirection(prim.primID());
|
||||
}
|
||||
|
||||
__forceinline const BBox3fa bounds(const PrimRef& prim) {
|
||||
return scene->get(prim.geomID())->vbounds(prim.primID());
|
||||
}
|
||||
|
||||
__forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
|
||||
return scene->get(prim.geomID())->vbounds(space,prim.primID());
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const range<size_t>& set, size_t logBlockSize)
|
||||
{
|
||||
Vec3fa axis0(0,0,1);
|
||||
uint64_t bestGeomPrimID = -1;
|
||||
|
||||
/* curve with minimum ID determines first axis */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const uint64_t geomprimID = prims[i].ID64();
|
||||
if (geomprimID >= bestGeomPrimID) continue;
|
||||
const Vec3fa axis = direction(prims[i]);
|
||||
if (sqr_length(axis) > 1E-18f) {
|
||||
axis0 = normalize(axis);
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
|
||||
/* find 2nd axis that is most misaligned with first axis and has minimum ID */
|
||||
float bestCos = 1.0f;
|
||||
Vec3fa axis1 = axis0;
|
||||
bestGeomPrimID = -1;
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const uint64_t geomprimID = prims[i].ID64();
|
||||
Vec3fa axisi = direction(prims[i]);
|
||||
float leni = length(axisi);
|
||||
if (leni == 0.0f) continue;
|
||||
axisi /= leni;
|
||||
float cos = abs(dot(axisi,axis0));
|
||||
if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
|
||||
bestCos = cos; axis1 = axisi;
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
|
||||
/* partition the two strands */
|
||||
size_t lnum = 0, rnum = 0;
|
||||
BBox3fa lbounds = empty, rbounds = empty;
|
||||
const LinearSpace3fa space0 = frame(axis0).transposed();
|
||||
const LinearSpace3fa space1 = frame(axis1).transposed();
|
||||
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
PrimRef& prim = prims[i];
|
||||
const Vec3fa axisi = normalize(direction(prim));
|
||||
const float cos0 = abs(dot(axisi,axis0));
|
||||
const float cos1 = abs(dot(axisi,axis1));
|
||||
|
||||
if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
|
||||
else { rnum++; rbounds.extend(bounds(space1,prim)); }
|
||||
}
|
||||
|
||||
/*! return an invalid split if we do not partition */
|
||||
if (lnum == 0 || rnum == 0)
|
||||
return Split(inf,axis0,axis1);
|
||||
|
||||
/*! calculate sah for the split */
|
||||
const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
|
||||
const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
|
||||
const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
|
||||
return Split(sah,axis0,axis1);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (!split.valid()) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
CentGeomBBox3fa local_left(empty);
|
||||
CentGeomBBox3fa local_right(empty);
|
||||
|
||||
auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
|
||||
const Vec3fa axisi = normalize(direction(prim));
|
||||
const float cos0 = abs(dot(axisi,split.axis0));
|
||||
const float cos1 = abs(dot(axisi,split.axis1));
|
||||
return cos0 > cos1;
|
||||
};
|
||||
|
||||
auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
|
||||
pinfo.extend(bounds(ref));
|
||||
};
|
||||
|
||||
size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
|
||||
|
||||
new (&lset) PrimInfoRange(begin,center,local_left);
|
||||
new (&rset) PrimInfoRange(center,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
}
|
||||
|
||||
void deterministic_order(const Set& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims[set.begin()],&prims[set.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend(bounds(prims[i]));
|
||||
new (&lset) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend(bounds(prims[i]));
|
||||
new (&rset) PrimInfoRange(center,end,right);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* const scene;
|
||||
PrimRef* const prims;
|
||||
};
|
||||
}
|
||||
}
|
237
thirdparty/embree/kernels/builders/heuristic_timesplit_array.h
vendored
Normal file
237
thirdparty/embree/kernels/builders/heuristic_timesplit_array.h
vendored
Normal file
@@ -0,0 +1,237 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../builders/primref_mb.h"
|
||||
#include "../../common/algorithms/parallel_filter.h"
|
||||
|
||||
#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRefMB, typename RecalculatePrimRef, size_t BINS>
|
||||
struct HeuristicMBlurTemporalSplit
|
||||
{
|
||||
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
typedef typename PrimRefMB::BBox BBox;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
HeuristicMBlurTemporalSplit (MemoryMonitorInterface* device, const RecalculatePrimRef& recalculatePrimRef)
|
||||
: device(device), recalculatePrimRef(recalculatePrimRef) {}
|
||||
|
||||
struct TemporalBinInfo
|
||||
{
|
||||
__forceinline TemporalBinInfo () {
|
||||
}
|
||||
|
||||
__forceinline TemporalBinInfo (EmptyTy)
|
||||
{
|
||||
for (size_t i=0; i<BINS-1; i++)
|
||||
{
|
||||
count0[i] = count1[i] = 0;
|
||||
bounds0[i] = bounds1[i] = empty;
|
||||
}
|
||||
}
|
||||
|
||||
void bin(const PrimRefMB* prims, size_t begin, size_t end, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
|
||||
{
|
||||
for (int b=0; b<BINS-1; b++)
|
||||
{
|
||||
const float t = float(b+1)/float(BINS);
|
||||
const float ct = lerp(time_range.lower,time_range.upper,t);
|
||||
const float center_time = set.align_time(ct);
|
||||
if (center_time <= time_range.lower) continue;
|
||||
if (center_time >= time_range.upper) continue;
|
||||
const BBox1f dt0(time_range.lower,center_time);
|
||||
const BBox1f dt1(center_time,time_range.upper);
|
||||
|
||||
/* find linear bounds for both time segments */
|
||||
for (size_t i=begin; i<end; i++)
|
||||
{
|
||||
if (prims[i].time_range_overlap(dt0))
|
||||
{
|
||||
const LBBox3fa bn0 = recalculatePrimRef.linearBounds(prims[i],dt0);
|
||||
#if MBLUR_BIN_LBBOX
|
||||
bounds0[b].extend(bn0);
|
||||
#else
|
||||
bounds0[b].extend(bn0.interpolate(0.5f));
|
||||
#endif
|
||||
count0[b] += prims[i].timeSegmentRange(dt0).size();
|
||||
}
|
||||
|
||||
if (prims[i].time_range_overlap(dt1))
|
||||
{
|
||||
const LBBox3fa bn1 = recalculatePrimRef.linearBounds(prims[i],dt1);
|
||||
#if MBLUR_BIN_LBBOX
|
||||
bounds1[b].extend(bn1);
|
||||
#else
|
||||
bounds1[b].extend(bn1.interpolate(0.5f));
|
||||
#endif
|
||||
count1[b] += prims[i].timeSegmentRange(dt1).size();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void bin_parallel(const PrimRefMB* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
|
||||
{
|
||||
if (likely(end-begin < parallelThreshold)) {
|
||||
bin(prims,begin,end,time_range,set,recalculatePrimRef);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto bin = [&](const range<size_t>& r) -> TemporalBinInfo {
|
||||
TemporalBinInfo binner(empty); binner.bin(prims, r.begin(), r.end(), time_range, set, recalculatePrimRef); return binner;
|
||||
};
|
||||
*this = parallel_reduce(begin,end,blockSize,TemporalBinInfo(empty),bin,merge2);
|
||||
}
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
__forceinline void merge (const TemporalBinInfo& other)
|
||||
{
|
||||
for (size_t i=0; i<BINS-1; i++)
|
||||
{
|
||||
count0[i] += other.count0[i];
|
||||
count1[i] += other.count1[i];
|
||||
bounds0[i].extend(other.bounds0[i]);
|
||||
bounds1[i].extend(other.bounds1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline const TemporalBinInfo merge2(const TemporalBinInfo& a, const TemporalBinInfo& b) {
|
||||
TemporalBinInfo r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
Split best(int logBlockSize, BBox1f time_range, const SetMB& set)
|
||||
{
|
||||
float bestSAH = inf;
|
||||
float bestPos = 0.0f;
|
||||
for (int b=0; b<BINS-1; b++)
|
||||
{
|
||||
float t = float(b+1)/float(BINS);
|
||||
float ct = lerp(time_range.lower,time_range.upper,t);
|
||||
const float center_time = set.align_time(ct);
|
||||
if (center_time <= time_range.lower) continue;
|
||||
if (center_time >= time_range.upper) continue;
|
||||
const BBox1f dt0(time_range.lower,center_time);
|
||||
const BBox1f dt1(center_time,time_range.upper);
|
||||
|
||||
/* calculate sah */
|
||||
const size_t lCount = (count0[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
|
||||
const size_t rCount = (count1[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
|
||||
float sah0 = expectedApproxHalfArea(bounds0[b])*float(lCount)*dt0.size();
|
||||
float sah1 = expectedApproxHalfArea(bounds1[b])*float(rCount)*dt1.size();
|
||||
if (unlikely(lCount == 0)) sah0 = 0.0f; // happens for initial splits when objects not alive over entire shutter time
|
||||
if (unlikely(rCount == 0)) sah1 = 0.0f;
|
||||
const float sah = sah0+sah1;
|
||||
if (sah < bestSAH) {
|
||||
bestSAH = sah;
|
||||
bestPos = center_time;
|
||||
}
|
||||
}
|
||||
return Split(bestSAH*MBLUR_TIME_SPLIT_THRESHOLD,(unsigned)Split::SPLIT_TEMPORAL,0,bestPos);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t count0[BINS-1];
|
||||
size_t count1[BINS-1];
|
||||
BBox bounds0[BINS-1];
|
||||
BBox bounds1[BINS-1];
|
||||
};
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set, const size_t logBlockSize)
|
||||
{
|
||||
assert(set.size() > 0);
|
||||
TemporalBinInfo binner(empty);
|
||||
binner.bin_parallel(set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,set.time_range,set,recalculatePrimRef);
|
||||
Split tsplit = binner.best((int)logBlockSize,set.time_range,set);
|
||||
if (!tsplit.valid()) tsplit.data = Split::SPLIT_FALLBACK; // use fallback split
|
||||
return tsplit;
|
||||
}
|
||||
|
||||
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& tsplit, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
assert(tsplit.sah != float(inf));
|
||||
assert(tsplit.fpos > set.time_range.lower);
|
||||
assert(tsplit.fpos < set.time_range.upper);
|
||||
|
||||
float center_time = tsplit.fpos;
|
||||
const BBox1f time_range0(set.time_range.lower,center_time);
|
||||
const BBox1f time_range1(center_time,set.time_range.upper);
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
|
||||
/* calculate primrefs for first time range */
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector(new mvector<PrimRefMB>(device, set.size()));
|
||||
PrimRefVector lprims = new_vector.get();
|
||||
|
||||
auto reduction_func0 = [&] (const range<size_t>& r) {
|
||||
PrimInfoMB pinfo = empty;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (likely(prims[i].time_range_overlap(time_range0)))
|
||||
{
|
||||
const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range0);
|
||||
(*lprims)[i-set.begin()] = prim;
|
||||
pinfo.add_primref(prim);
|
||||
}
|
||||
else
|
||||
{
|
||||
(*lprims)[i-set.begin()] = prims[i];
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
};
|
||||
PrimInfoMB linfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func0,PrimInfoMB::merge2);
|
||||
|
||||
/* primrefs for first time range are in lprims[0 .. set.size()) */
|
||||
/* some primitives may need to be filtered out */
|
||||
if (linfo.size() != set.size())
|
||||
linfo.object_range._end = parallel_filter(lprims->data(), size_t(0), set.size(), size_t(1024),
|
||||
[&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range0); });
|
||||
|
||||
lset = SetMB(linfo,lprims,time_range0);
|
||||
|
||||
/* calculate primrefs for second time range */
|
||||
auto reduction_func1 = [&] (const range<size_t>& r) {
|
||||
PrimInfoMB pinfo = empty;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (likely(prims[i].time_range_overlap(time_range1)))
|
||||
{
|
||||
const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range1);
|
||||
prims[i] = prim;
|
||||
pinfo.add_primref(prim);
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
};
|
||||
PrimInfoMB rinfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func1,PrimInfoMB::merge2);
|
||||
rinfo.object_range = range<size_t>(set.begin(), set.begin() + rinfo.size());
|
||||
|
||||
/* primrefs for second time range are in prims[set.begin() .. set.end()) */
|
||||
/* some primitives may need to be filtered out */
|
||||
if (rinfo.size() != set.size())
|
||||
rinfo.object_range._end = parallel_filter(prims.data(), set.begin(), set.end(), size_t(1024),
|
||||
[&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range1); });
|
||||
|
||||
rset = SetMB(rinfo,&prims,time_range1);
|
||||
|
||||
return new_vector;
|
||||
}
|
||||
|
||||
private:
|
||||
MemoryMonitorInterface* device; // device to report memory usage to
|
||||
const RecalculatePrimRef recalculatePrimRef;
|
||||
};
|
||||
}
|
||||
}
|
167
thirdparty/embree/kernels/builders/priminfo.h
vendored
Normal file
167
thirdparty/embree/kernels/builders/priminfo.h
vendored
Normal file
@@ -0,0 +1,167 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "primref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
// FIXME: maybe there's a better place for this util fct
|
||||
__forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2)
|
||||
{
|
||||
const Vec3fa e0 = v1-v0;
|
||||
const Vec3fa e1 = v2-v0;
|
||||
const Vec3fa d = cross(e0,e1);
|
||||
return fabs(d.x) + fabs(d.y) + fabs(d.z);
|
||||
}
|
||||
|
||||
//namespace isa
|
||||
//{
|
||||
template<typename BBox>
|
||||
class CentGeom
|
||||
{
|
||||
public:
|
||||
__forceinline CentGeom () {}
|
||||
|
||||
__forceinline CentGeom (EmptyTy)
|
||||
: geomBounds(empty), centBounds(empty) {}
|
||||
|
||||
__forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds)
|
||||
: geomBounds(geomBounds), centBounds(centBounds) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void extend_primref(const PrimRef& prim)
|
||||
{
|
||||
BBox bounds; Vec3fa center;
|
||||
prim.binBoundsAndCenter(bounds,center);
|
||||
geomBounds.extend(bounds);
|
||||
centBounds.extend(center);
|
||||
}
|
||||
|
||||
static void extend_ref (CentGeom& pinfo, const PrimRef& ref) {
|
||||
pinfo.extend_primref(ref);
|
||||
};
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void extend_center2(const PrimRef& prim)
|
||||
{
|
||||
BBox3fa bounds = prim.bounds();
|
||||
geomBounds.extend(bounds);
|
||||
centBounds.extend(bounds.center2());
|
||||
}
|
||||
|
||||
__forceinline void extend(const BBox& geomBounds_) {
|
||||
geomBounds.extend(geomBounds_);
|
||||
centBounds.extend(center2(geomBounds_));
|
||||
}
|
||||
|
||||
__forceinline void merge(const CentGeom& other)
|
||||
{
|
||||
geomBounds.extend(other.geomBounds);
|
||||
centBounds.extend(other.centBounds);
|
||||
}
|
||||
|
||||
static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) {
|
||||
CentGeom r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
public:
|
||||
BBox geomBounds; //!< geometry bounds of primitives
|
||||
BBox3fa centBounds; //!< centroid bounds of primitives
|
||||
};
|
||||
|
||||
typedef CentGeom<BBox3fa> CentGeomBBox3fa;
|
||||
|
||||
/*! stores bounding information for a set of primitives */
|
||||
template<typename BBox>
|
||||
class PrimInfoT : public CentGeom<BBox>
|
||||
{
|
||||
public:
|
||||
using CentGeom<BBox>::geomBounds;
|
||||
using CentGeom<BBox>::centBounds;
|
||||
|
||||
__forceinline PrimInfoT () {}
|
||||
|
||||
__forceinline PrimInfoT (EmptyTy)
|
||||
: CentGeom<BBox>(empty), begin(0), end(0) {}
|
||||
|
||||
__forceinline PrimInfoT (size_t N)
|
||||
: CentGeom<BBox>(empty), begin(0), end(N) {}
|
||||
|
||||
__forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeom<BBox>::extend_primref(prim);
|
||||
end++;
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_center2(const PrimRef& prim) {
|
||||
CentGeom<BBox>::extend_center2(prim);
|
||||
end++;
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_center2(const PrimRef& prim, const size_t i) {
|
||||
CentGeom<BBox>::extend_center2(prim);
|
||||
end+=i;
|
||||
}
|
||||
|
||||
/*__forceinline void add(const BBox& geomBounds_) {
|
||||
CentGeom<BBox>::extend(geomBounds_);
|
||||
end++;
|
||||
}
|
||||
|
||||
__forceinline void add(const BBox& geomBounds_, const size_t i) {
|
||||
CentGeom<BBox>::extend(geomBounds_);
|
||||
end+=i;
|
||||
}*/
|
||||
|
||||
__forceinline void merge(const PrimInfoT& other)
|
||||
{
|
||||
CentGeom<BBox>::merge(other);
|
||||
begin += other.begin;
|
||||
end += other.end;
|
||||
}
|
||||
|
||||
static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) {
|
||||
PrimInfoT r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
/*! returns the number of primitives */
|
||||
__forceinline size_t size() const {
|
||||
return end-begin;
|
||||
}
|
||||
|
||||
__forceinline float halfArea() {
|
||||
return expectedApproxHalfArea(geomBounds);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return expectedApproxHalfArea(geomBounds)*float(size());
|
||||
//return halfArea(geomBounds)*blocks(num);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
//return halfArea(geomBounds)*float((num+3) >> 2);
|
||||
//return halfArea(geomBounds)*blocks(num);
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) {
|
||||
return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
size_t begin,end; //!< number of primitives
|
||||
};
|
||||
|
||||
typedef PrimInfoT<BBox3fa> PrimInfo;
|
||||
//typedef PrimInfoT<LBBox3fa> PrimInfoMB;
|
||||
//}
|
||||
}
|
210
thirdparty/embree/kernels/builders/priminfo_mb.h
vendored
Normal file
210
thirdparty/embree/kernels/builders/priminfo_mb.h
vendored
Normal file
@@ -0,0 +1,210 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "primref_mb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! stores bounding information for a set of primitives */
|
||||
template<typename BBox>
|
||||
class PrimInfoMBT : public CentGeom<BBox>
|
||||
{
|
||||
public:
|
||||
using CentGeom<BBox>::geomBounds;
|
||||
using CentGeom<BBox>::centBounds;
|
||||
|
||||
__forceinline PrimInfoMBT () {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoMBT (EmptyTy)
|
||||
: CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
__forceinline PrimInfoMBT (size_t begin, size_t end)
|
||||
: CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeom<BBox>::extend_primref(prim);
|
||||
time_range.extend(prim.time_range);
|
||||
object_range._end++;
|
||||
num_time_segments += prim.size();
|
||||
if (max_num_time_segments < prim.totalTimeSegments()) {
|
||||
max_num_time_segments = prim.totalTimeSegments();
|
||||
max_time_range = prim.time_range;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void merge(const PrimInfoMBT& other)
|
||||
{
|
||||
CentGeom<BBox>::merge(other);
|
||||
time_range.extend(other.time_range);
|
||||
object_range._begin += other.object_range.begin();
|
||||
object_range._end += other.object_range.end();
|
||||
num_time_segments += other.num_time_segments;
|
||||
if (max_num_time_segments < other.max_num_time_segments) {
|
||||
max_num_time_segments = other.max_num_time_segments;
|
||||
max_time_range = other.max_time_range;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
|
||||
PrimInfoMBT r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
__forceinline size_t begin() const {
|
||||
return object_range.begin();
|
||||
}
|
||||
|
||||
__forceinline size_t end() const {
|
||||
return object_range.end();
|
||||
}
|
||||
|
||||
/*! returns the number of primitives */
|
||||
__forceinline size_t size() const {
|
||||
return object_range.size();
|
||||
}
|
||||
|
||||
__forceinline float halfArea() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
|
||||
__forceinline float align_time(float ct) const
|
||||
{
|
||||
//return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
|
||||
float t0 = (ct-max_time_range.lower)/max_time_range.size();
|
||||
float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
|
||||
return t1*max_time_range.size()+max_time_range.lower;
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
|
||||
{
|
||||
return cout << "PrimInfo { " <<
|
||||
"object_range = " << pinfo.object_range <<
|
||||
", time_range = " << pinfo.time_range <<
|
||||
", time_segments = " << pinfo.num_time_segments <<
|
||||
", geomBounds = " << pinfo.geomBounds <<
|
||||
", centBounds = " << pinfo.centBounds <<
|
||||
"}";
|
||||
}
|
||||
|
||||
public:
|
||||
range<size_t> object_range; //!< primitive range
|
||||
size_t num_time_segments; //!< total number of time segments of all added primrefs
|
||||
size_t max_num_time_segments; //!< maximum number of time segments of a primitive
|
||||
BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
|
||||
BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
|
||||
};
|
||||
|
||||
typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
|
||||
|
||||
struct SetMB : public PrimInfoMB
|
||||
{
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
|
||||
__forceinline SetMB() {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
|
||||
: PrimInfoMB(pinfo_i), prims(prims) {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
object_range = object_range_in;
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
void deterministic_order() const
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
PrimRefMB* prim = prims->data();
|
||||
std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
|
||||
pinfo.add_primref(ref1);
|
||||
};
|
||||
return pinfo;
|
||||
};
|
||||
|
||||
const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
|
||||
PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
|
||||
|
||||
return SetMB(pinfo,prims,object_range,time_range);
|
||||
}
|
||||
|
||||
public:
|
||||
PrimRefVector prims;
|
||||
};
|
||||
//}
|
||||
}
|
139
thirdparty/embree/kernels/builders/primref.h
vendored
Normal file
139
thirdparty/embree/kernels/builders/primref.h
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! A primitive reference stores the bounds of the primitive and its ID. */
|
||||
struct __aligned(32) PrimRef
|
||||
{
|
||||
__forceinline PrimRef () {}
|
||||
|
||||
#if defined(__AVX__)
|
||||
__forceinline PrimRef(const PrimRef& v) {
|
||||
vfloat8::store((float*)this,vfloat8::load((float*)&v));
|
||||
}
|
||||
__forceinline PrimRef& operator=(const PrimRef& v) {
|
||||
vfloat8::store((float*)this,vfloat8::load((float*)&v)); return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline PrimRef (const BBox3fa& bounds, unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
lower = Vec3fx(bounds.lower, geomID);
|
||||
upper = Vec3fx(bounds.upper, primID);
|
||||
}
|
||||
|
||||
__forceinline PrimRef (const BBox3fa& bounds, size_t id)
|
||||
{
|
||||
#if defined(__64BIT__)
|
||||
lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF));
|
||||
upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF));
|
||||
#else
|
||||
lower = Vec3fx(bounds.lower, (unsigned)id);
|
||||
upper = Vec3fx(bounds.upper, (unsigned)0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! calculates twice the center of the primitive */
|
||||
__forceinline const Vec3fa center2() const {
|
||||
return lower+upper;
|
||||
}
|
||||
|
||||
/*! return the bounding box of the primitive */
|
||||
__forceinline const BBox3fa bounds() const {
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! size for bin heuristic is 1 */
|
||||
__forceinline unsigned size() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
bounds_o = bounds();
|
||||
center_o = embree::center2(bounds_o);
|
||||
}
|
||||
|
||||
__forceinline unsigned& geomIDref() { // FIXME: remove !!!!!!!
|
||||
return lower.u;
|
||||
}
|
||||
__forceinline unsigned& primIDref() { // FIXME: remove !!!!!!!
|
||||
return upper.u;
|
||||
}
|
||||
|
||||
/*! returns the geometry ID */
|
||||
__forceinline unsigned geomID() const {
|
||||
return lower.a;
|
||||
}
|
||||
|
||||
/*! returns the primitive ID */
|
||||
__forceinline unsigned primID() const {
|
||||
return upper.a;
|
||||
}
|
||||
|
||||
/*! returns an size_t sized ID */
|
||||
__forceinline size_t ID() const {
|
||||
#if defined(__64BIT__)
|
||||
return size_t(lower.u) + (size_t(upper.u) << 32);
|
||||
#else
|
||||
return size_t(lower.u);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! special function for operator< */
|
||||
__forceinline uint64_t ID64() const {
|
||||
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
|
||||
}
|
||||
|
||||
/*! allows sorting the primrefs by ID */
|
||||
friend __forceinline bool operator<(const PrimRef& p0, const PrimRef& p1) {
|
||||
return p0.ID64() < p1.ID64();
|
||||
}
|
||||
|
||||
/*! Outputs primitive reference to a stream. */
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRef& ref) {
|
||||
return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << " }";
|
||||
}
|
||||
|
||||
public:
|
||||
Vec3fx lower; //!< lower bounds and geomID
|
||||
Vec3fx upper; //!< upper bounds and primID
|
||||
};
|
||||
|
||||
/*! fast exchange for PrimRefs */
|
||||
__forceinline void xchg(PrimRef& a, PrimRef& b)
|
||||
{
|
||||
#if defined(__AVX__)
|
||||
const vfloat8 aa = vfloat8::load((float*)&a);
|
||||
const vfloat8 bb = vfloat8::load((float*)&b);
|
||||
vfloat8::store((float*)&a,bb);
|
||||
vfloat8::store((float*)&b,aa);
|
||||
#else
|
||||
std::swap(a,b);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
struct SubGridBuildData {
|
||||
unsigned short sx,sy;
|
||||
unsigned int primID;
|
||||
|
||||
__forceinline SubGridBuildData() {};
|
||||
__forceinline SubGridBuildData(const unsigned int sx, const unsigned int sy, const unsigned int primID) : sx(sx), sy(sy), primID(primID) {};
|
||||
|
||||
__forceinline size_t x() const { return (size_t)sx & 0x7fff; }
|
||||
__forceinline size_t y() const { return (size_t)sy & 0x7fff; }
|
||||
|
||||
};
|
||||
}
|
262
thirdparty/embree/kernels/builders/primref_mb.h
vendored
Normal file
262
thirdparty/embree/kernels/builders/primref_mb.h
vendored
Normal file
@@ -0,0 +1,262 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
|
||||
#define MBLUR_BIN_LBBOX 1
|
||||
|
||||
namespace embree
|
||||
{
|
||||
#if MBLUR_BIN_LBBOX
|
||||
|
||||
/*! A primitive reference stores the bounds of the primitive and its ID. */
|
||||
struct PrimRefMB
|
||||
{
|
||||
typedef LBBox3fa BBox;
|
||||
|
||||
__forceinline PrimRefMB () {}
|
||||
|
||||
__forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
|
||||
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
lbounds.bounds0.lower.a = geomID;
|
||||
lbounds.bounds0.upper.a = primID;
|
||||
lbounds.bounds1.lower.a = activeTimeSegments;
|
||||
lbounds.bounds1.upper.a = totalTimeSegments;
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
|
||||
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
#if defined(__64BIT__)
|
||||
lbounds.bounds0.lower.a = id & 0xFFFFFFFF;
|
||||
lbounds.bounds0.upper.a = (id >> 32) & 0xFFFFFFFF;
|
||||
#else
|
||||
lbounds.bounds0.lower.a = id;
|
||||
lbounds.bounds0.upper.a = 0;
|
||||
#endif
|
||||
lbounds.bounds1.lower.a = activeTimeSegments;
|
||||
lbounds.bounds1.upper.a = totalTimeSegments;
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
|
||||
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
#if defined(__64BIT__)
|
||||
lbounds.bounds0.lower.u = id & 0xFFFFFFFF;
|
||||
lbounds.bounds0.upper.u = (id >> 32) & 0xFFFFFFFF;
|
||||
#else
|
||||
lbounds.bounds0.lower.u = id;
|
||||
lbounds.bounds0.upper.u = 0;
|
||||
#endif
|
||||
lbounds.bounds1.lower.a = activeTimeSegments;
|
||||
lbounds.bounds1.upper.a = totalTimeSegments;
|
||||
}
|
||||
|
||||
/*! returns bounds for binning */
|
||||
__forceinline LBBox3fa bounds() const {
|
||||
return lbounds;
|
||||
}
|
||||
|
||||
/*! returns the number of time segments of this primref */
|
||||
__forceinline unsigned size() const {
|
||||
return lbounds.bounds1.lower.a;
|
||||
}
|
||||
|
||||
__forceinline unsigned totalTimeSegments() const {
|
||||
return lbounds.bounds1.upper.a;
|
||||
}
|
||||
|
||||
/* calculate overlapping time segment range */
|
||||
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
|
||||
return getTimeSegmentRange(range,time_range,float(totalTimeSegments()));
|
||||
}
|
||||
|
||||
/* returns time that corresponds to time step */
|
||||
__forceinline float timeStep(const int i) const {
|
||||
assert(i>=0 && i<=(int)totalTimeSegments());
|
||||
return time_range.lower + time_range.size()*float(i)/float(totalTimeSegments());
|
||||
}
|
||||
|
||||
/*! checks if time range overlaps */
|
||||
__forceinline bool time_range_overlap(const BBox1f& range) const
|
||||
{
|
||||
if (0.9999f*time_range.upper <= range.lower) return false;
|
||||
if (1.0001f*time_range.lower >= range.upper) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! returns center for binning */
|
||||
__forceinline Vec3fa binCenter() const {
|
||||
return center2(lbounds.interpolate(0.5f));
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(LBBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
bounds_o = bounds();
|
||||
center_o = binCenter();
|
||||
}
|
||||
|
||||
/*! returns the geometry ID */
|
||||
__forceinline unsigned geomID() const {
|
||||
return lbounds.bounds0.lower.a;
|
||||
}
|
||||
|
||||
/*! returns the primitive ID */
|
||||
__forceinline unsigned primID() const {
|
||||
return lbounds.bounds0.upper.a;
|
||||
}
|
||||
|
||||
/*! returns an size_t sized ID */
|
||||
__forceinline size_t ID() const {
|
||||
#if defined(__64BIT__)
|
||||
return size_t(lbounds.bounds0.lower.u) + (size_t(lbounds.bounds0.upper.u) << 32);
|
||||
#else
|
||||
return size_t(lbounds.bounds0.lower.u);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! special function for operator< */
|
||||
__forceinline uint64_t ID64() const {
|
||||
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
|
||||
}
|
||||
|
||||
/*! allows sorting the primrefs by ID */
|
||||
friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
|
||||
return p0.ID64() < p1.ID64();
|
||||
}
|
||||
|
||||
/*! Outputs primitive reference to a stream. */
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
|
||||
return cout << "{ time_range = " << ref.time_range << ", bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
|
||||
}
|
||||
|
||||
public:
|
||||
LBBox3fx lbounds;
|
||||
BBox1f time_range; // entire geometry time range
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/*! A primitive reference stores the bounds of the primitive and its ID. */
|
||||
struct __aligned(16) PrimRefMB
|
||||
{
|
||||
typedef BBox3fa BBox;
|
||||
|
||||
__forceinline PrimRefMB () {}
|
||||
|
||||
__forceinline PrimRefMB (const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
|
||||
: bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
bbox.lower.a = geomID;
|
||||
bbox.upper.a = primID;
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
|
||||
: bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
#if defined(__64BIT__)
|
||||
bbox.lower.u = id & 0xFFFFFFFF;
|
||||
bbox.upper.u = (id >> 32) & 0xFFFFFFFF;
|
||||
#else
|
||||
bbox.lower.u = id;
|
||||
bbox.upper.u = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! returns bounds for binning */
|
||||
__forceinline BBox3fa bounds() const {
|
||||
return bbox;
|
||||
}
|
||||
|
||||
/*! returns the number of time segments of this primref */
|
||||
__forceinline unsigned int size() const {
|
||||
return _activeTimeSegments;
|
||||
}
|
||||
|
||||
__forceinline unsigned int totalTimeSegments() const {
|
||||
return _totalTimeSegments;
|
||||
}
|
||||
|
||||
/* calculate overlapping time segment range */
|
||||
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
|
||||
return getTimeSegmentRange(range,time_range,float(_totalTimeSegments));
|
||||
}
|
||||
|
||||
/* returns time that corresponds to time step */
|
||||
__forceinline float timeStep(const int i) const {
|
||||
assert(i>=0 && i<=(int)_totalTimeSegments);
|
||||
return time_range.lower + time_range.size()*float(i)/float(_totalTimeSegments);
|
||||
}
|
||||
|
||||
/*! checks if time range overlaps */
|
||||
__forceinline bool time_range_overlap(const BBox1f& range) const
|
||||
{
|
||||
if (0.9999f*time_range.upper <= range.lower) return false;
|
||||
if (1.0001f*time_range.lower >= range.upper) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! returns center for binning */
|
||||
__forceinline Vec3fa binCenter() const {
|
||||
return center2(bounds());
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
bounds_o = bounds();
|
||||
center_o = center2(bounds());
|
||||
}
|
||||
|
||||
/*! returns the geometry ID */
|
||||
__forceinline unsigned int geomID() const {
|
||||
return bbox.lower.a;
|
||||
}
|
||||
|
||||
/*! returns the primitive ID */
|
||||
__forceinline unsigned int primID() const {
|
||||
return bbox.upper.a;
|
||||
}
|
||||
|
||||
/*! returns an size_t sized ID */
|
||||
__forceinline size_t ID() const {
|
||||
#if defined(__64BIT__)
|
||||
return size_t(bbox.lower.u) + (size_t(bbox.upper.u) << 32);
|
||||
#else
|
||||
return size_t(bbox.lower.u);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! special function for operator< */
|
||||
__forceinline uint64_t ID64() const {
|
||||
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
|
||||
}
|
||||
|
||||
/*! allows sorting the primrefs by ID */
|
||||
friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
|
||||
return p0.ID64() < p1.ID64();
|
||||
}
|
||||
|
||||
/*! Outputs primitive reference to a stream. */
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
|
||||
return cout << "{ bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
|
||||
}
|
||||
|
||||
public:
|
||||
BBox3fa bbox; // bounds, geomID, primID
|
||||
unsigned int _activeTimeSegments;
|
||||
unsigned int _totalTimeSegments;
|
||||
BBox1f time_range; // entire geometry time range
|
||||
};
|
||||
|
||||
#endif
|
||||
}
|
359
thirdparty/embree/kernels/builders/primrefgen.cpp
vendored
Normal file
359
thirdparty/embree/kernels/builders/primrefgen.cpp
vendored
Normal file
@@ -0,0 +1,359 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "primrefgen.h"
|
||||
#include "primrefgen_presplit.h"
|
||||
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelPrefixSumState<PrimInfo> pstate;
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,base.size(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,true);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArrayMB(prims,itime,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArrayMB(prims,itime,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator2 iter(scene,types,true);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,t0t1,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
}
|
||||
|
||||
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
|
||||
pinfo.time_range = t0t1;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator2 iter(scene,types,true);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
}
|
||||
|
||||
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
|
||||
pinfo.time_range = t0t1;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
template<typename Mesh>
|
||||
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
size_t numPrimitives = morton.size();
|
||||
|
||||
/* compute scene bounds */
|
||||
std::pair<size_t,BBox3fa> cb_empty(0,empty);
|
||||
auto cb = parallel_reduce
|
||||
( size_t(0), numPrimitives, size_t(1024), cb_empty, [&](const range<size_t>& r) -> std::pair<size_t,BBox3fa>
|
||||
{
|
||||
size_t num = 0;
|
||||
BBox3fa bounds = empty;
|
||||
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa prim_bounds = empty;
|
||||
if (unlikely(!mesh->buildBounds(j,&prim_bounds))) continue;
|
||||
bounds.extend(center2(prim_bounds));
|
||||
num++;
|
||||
}
|
||||
return std::make_pair(num,bounds);
|
||||
}, [] (const std::pair<size_t,BBox3fa>& a, const std::pair<size_t,BBox3fa>& b) {
|
||||
return std::make_pair(a.first + b.first,merge(a.second,b.second));
|
||||
});
|
||||
|
||||
|
||||
size_t numPrimitivesGen = cb.first;
|
||||
const BBox3fa centBounds = cb.second;
|
||||
|
||||
/* compute morton codes */
|
||||
if (likely(numPrimitivesGen == numPrimitives))
|
||||
{
|
||||
/* fast path if all primitives were valid */
|
||||
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
|
||||
parallel_for( size_t(0), numPrimitives, size_t(1024), [&](const range<size_t>& r) -> void {
|
||||
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
generator(mesh->bounds(j),unsigned(j));
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
/* slow path, fallback in case some primitives were invalid */
|
||||
ParallelPrefixSumState<size_t> pstate;
|
||||
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
|
||||
parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
|
||||
size_t num = 0;
|
||||
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (unlikely(!mesh->buildBounds(j,&bounds))) continue;
|
||||
generator(bounds,unsigned(j));
|
||||
num++;
|
||||
}
|
||||
return num;
|
||||
}, std::plus<size_t>());
|
||||
|
||||
parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
|
||||
size_t num = 0;
|
||||
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[base]);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!mesh->buildBounds(j,&bounds)) continue;
|
||||
generator(bounds,unsigned(j));
|
||||
num++;
|
||||
}
|
||||
return num;
|
||||
}, std::plus<size_t>());
|
||||
}
|
||||
return numPrimitivesGen;
|
||||
}
|
||||
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
|
||||
// special variants for grid meshes
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids)
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
size_t numPrimitives = 0;
|
||||
|
||||
/* first run to get #primitives */
|
||||
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator<GridMesh,false> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
|
||||
/* iterate over all meshes in the scene */
|
||||
pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j)) continue;
|
||||
BBox3fa bounds = empty;
|
||||
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j);
|
||||
if (!mesh->valid(j)) continue;
|
||||
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
numPrimitives = pinfo.size();
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
assert(pinfo.size() == numPrimitives);
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids)
|
||||
{
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
size_t numPrimitives = 0;
|
||||
|
||||
ParallelPrefixSumState<PrimInfo> pstate;
|
||||
/* iterate over all grids in a single mesh */
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j)) continue;
|
||||
BBox3fa bounds = empty;
|
||||
const PrimRef prim(bounds,geomID_,unsigned(j));
|
||||
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
numPrimitives = pinfo.size();
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID_);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
/* first run to get #primitives */
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator<GridMesh,true> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
/* iterate over all meshes in the scene */
|
||||
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
|
||||
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
|
||||
LBBox3fa bounds(empty);
|
||||
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
|
||||
pinfoMB.merge(gridMB);
|
||||
}
|
||||
return pinfoMB;
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
size_t numPrimitives = pinfoMB.size();
|
||||
if (numPrimitives == 0) return pinfoMB;
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
assert(pinfoMB.size() == numPrimitives);
|
||||
pinfoMB.time_range = t0t1;
|
||||
return pinfoMB;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
|
||||
IF_ENABLED_TRIS (template size_t createMortonCodeArray<TriangleMesh>(TriangleMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_INSTANCE_ARRAY (template size_t createMortonCodeArray<InstanceArray>(InstanceArray* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
}
|
||||
}
|
37
thirdparty/embree/kernels/builders/primrefgen.h
vendored
Normal file
37
thirdparty/embree/kernels/builders/primrefgen.h
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/scene.h"
|
||||
#include "priminfo.h"
|
||||
#include "priminfo_mb.h"
|
||||
#include "bvh_builder_morton.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0);
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
|
||||
template<typename Mesh>
|
||||
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
/* special variants for grids */
|
||||
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids); // FIXME: remove
|
||||
|
||||
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids);
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
}
|
||||
}
|
468
thirdparty/embree/kernels/builders/primrefgen_presplit.h
vendored
Normal file
468
thirdparty/embree/kernels/builders/primrefgen_presplit.h
vendored
Normal file
@@ -0,0 +1,468 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_sort.h"
|
||||
#include "../builders/heuristic_spatial.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
#define DBG_PRESPLIT(x)
|
||||
#define CHECK_PRESPLIT(x)
|
||||
|
||||
#define GRID_SIZE 1024
|
||||
//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6
|
||||
#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
|
||||
#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
|
||||
//#define PRIORITY_CUTOFF_THRESHOLD 2.0f
|
||||
#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct SplittingGrid
|
||||
{
|
||||
__forceinline SplittingGrid(const BBox3fa& bounds)
|
||||
{
|
||||
base = bounds.lower;
|
||||
const Vec3fa diag = bounds.size();
|
||||
extend = max(diag.x,max(diag.y,diag.z));
|
||||
scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend;
|
||||
}
|
||||
|
||||
__forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const
|
||||
{
|
||||
/* compute morton code */
|
||||
const Vec3fa lower = prim.lower;
|
||||
const Vec3fa upper = prim.upper;
|
||||
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
|
||||
Vec3ia ilower(floor(glower));
|
||||
Vec3ia iupper(floor(gupper));
|
||||
|
||||
/* this ignores dimensions that are empty */
|
||||
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
|
||||
|
||||
/* compute a morton code for the lower and upper grid coordinates. */
|
||||
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
|
||||
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
|
||||
|
||||
/* if all bits are equal then we cannot split */
|
||||
if (unlikely(lower_code == upper_code))
|
||||
return false;
|
||||
|
||||
/* compute octree level and dimension to perform the split in */
|
||||
const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
|
||||
const unsigned int level = diff / 3;
|
||||
const unsigned int dim = diff % 3;
|
||||
|
||||
/* now we compute the grid position of the split */
|
||||
const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
|
||||
|
||||
/* compute world space position of split */
|
||||
const float inv_grid_size = 1.0f / GRID_SIZE;
|
||||
const float fsplit = base[dim] + isplit * inv_grid_size * extend;
|
||||
assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit);
|
||||
|
||||
dim_o = dim;
|
||||
fsplit_o = fsplit;
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline Vec2i computeMC(const PrimRef& ref) const
|
||||
{
|
||||
const Vec3fa lower = ref.lower;
|
||||
const Vec3fa upper = ref.upper;
|
||||
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
|
||||
Vec3ia ilower(floor(glower));
|
||||
Vec3ia iupper(floor(gupper));
|
||||
|
||||
/* this ignores dimensions that are empty */
|
||||
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
|
||||
|
||||
/* compute a morton code for the lower and upper grid coordinates. */
|
||||
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
|
||||
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
|
||||
return Vec2i(lower_code,upper_code);
|
||||
}
|
||||
|
||||
Vec3fa base;
|
||||
float scale;
|
||||
float extend;
|
||||
};
|
||||
|
||||
struct PresplitItem
|
||||
{
|
||||
union {
|
||||
float priority;
|
||||
unsigned int data;
|
||||
};
|
||||
unsigned int index;
|
||||
|
||||
__forceinline operator unsigned() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
template<typename ProjectedPrimitiveAreaFunc>
|
||||
__forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc)
|
||||
{
|
||||
const float area_aabb = area(ref.bounds());
|
||||
const float area_prim = primitiveArea(ref);
|
||||
if (area_prim == 0.0f) return 0.0f;
|
||||
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
|
||||
//assert(area_prim <= area_aabb); // may trigger due to numerical issues
|
||||
const float area_diff = max(0.0f, area_aabb - area_prim);
|
||||
//const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
|
||||
const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
|
||||
//const float priority = sqrtf(sqrtf( area_diff ) );
|
||||
//const float priority = sqrtfarea_diff;
|
||||
//const float priority = area_diff; // 104 fps !!!!!!!!!!
|
||||
//const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps
|
||||
//const float priority = area_aabb * max(area_aabb/area_prim,32.0f);
|
||||
//const float priority = area_prim;
|
||||
assert(priority >= 0.0f && priority < FLT_LARGE);
|
||||
return priority;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
|
||||
return cout << "index " << item.index << " priority " << item.priority;
|
||||
};
|
||||
|
||||
#if 1
|
||||
|
||||
template<typename Splitter>
|
||||
void splitPrimitive(const Splitter& splitter,
|
||||
const PrimRef& prim,
|
||||
const unsigned int splitprims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
if (splitprims == 1)
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int dim; float fsplit;
|
||||
if (!grid.split_pos(prim, dim, fsplit))
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
return;
|
||||
}
|
||||
|
||||
/* split primitive */
|
||||
PrimRef left,right;
|
||||
splitter(prim,dim,fsplit,left,right);
|
||||
assert(!left.bounds().empty());
|
||||
assert(!right.bounds().empty());
|
||||
|
||||
const unsigned int splitprims_left = splitprims/2;
|
||||
const unsigned int splitprims_right = splitprims - splitprims_left;
|
||||
splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims);
|
||||
splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<typename Splitter>
|
||||
void splitPrimitive(const Splitter& splitter,
|
||||
const PrimRef& prim,
|
||||
const unsigned int targetSubPrims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
auto compare = [] ( const PrimRef& a, const PrimRef& b ) {
|
||||
return area(a.bounds()) < area(b.bounds());
|
||||
};
|
||||
|
||||
subPrims[numSubPrims++] = prim;
|
||||
|
||||
while (numSubPrims < targetSubPrims)
|
||||
{
|
||||
/* get top heap element */
|
||||
std::pop_heap(subPrims+0,subPrims+numSubPrims, compare);
|
||||
PrimRef top = subPrims[--numSubPrims];
|
||||
|
||||
unsigned int dim; float fsplit;
|
||||
if (!grid.split_pos(top, dim, fsplit))
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = top;
|
||||
return;
|
||||
}
|
||||
|
||||
/* split primitive */
|
||||
PrimRef left,right;
|
||||
splitter(top,dim,fsplit,left,right);
|
||||
assert(!left.bounds().empty());
|
||||
assert(!right.bounds().empty());
|
||||
|
||||
subPrims[numSubPrims++] = left;
|
||||
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
|
||||
|
||||
subPrims[numSubPrims++] = right;
|
||||
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
template<typename Mesh, typename SplitterFactory>
|
||||
PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelPrefixSumState<PrimInfo> pstate;
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,base.size(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector>
|
||||
PrimInfo createPrimRefArray_presplit(size_t numPrimRefs,
|
||||
PrimVector& prims,
|
||||
const PrimInfo& pinfo,
|
||||
const SplitPrimitiveFunc& splitPrimitive,
|
||||
const ProjectedPrimitiveAreaFunc& primitiveArea)
|
||||
{
|
||||
static const size_t MIN_STEP_SIZE = 128;
|
||||
|
||||
/* use correct number of primitives */
|
||||
size_t numPrimitives = pinfo.size();
|
||||
const size_t numPrimitivesExt = prims.size();
|
||||
const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives;
|
||||
|
||||
/* allocate double buffer presplit items */
|
||||
avector<PresplitItem> preSplitItem0(numPrimitivesExt);
|
||||
avector<PresplitItem> preSplitItem1(numPrimitivesExt);
|
||||
|
||||
/* compute grid */
|
||||
SplittingGrid grid(pinfo.geomBounds);
|
||||
|
||||
/* init presplit items and get total sum */
|
||||
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
|
||||
float sum = 0.0f;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
preSplitItem0[i].index = (unsigned int)i;
|
||||
const Vec2i mc = grid.computeMC(prims[i]);
|
||||
/* if all bits are equal then we cannot split */
|
||||
preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f;
|
||||
/* FIXME: sum undeterministic */
|
||||
sum += preSplitItem0[i].priority;
|
||||
}
|
||||
return sum;
|
||||
},[](const float& a, const float& b) -> float { return a+b; });
|
||||
|
||||
/* compute number of splits per primitive */
|
||||
const float inv_psum = 1.0f / psum;
|
||||
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (preSplitItem0[i].priority <= 0.0f) {
|
||||
preSplitItem0[i].data = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum;
|
||||
if (rel_p < 1) {
|
||||
preSplitItem0[i].data = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
//preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f);
|
||||
preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
|
||||
preSplitItem0[i].data = 1 << preSplitItem0[i].data;
|
||||
assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
}
|
||||
});
|
||||
|
||||
auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; };
|
||||
size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024);
|
||||
assert(center <= numPrimitives);
|
||||
|
||||
/* anything to split ? */
|
||||
if (center >= numPrimitives)
|
||||
return pinfo;
|
||||
|
||||
size_t numPrimitivesToSplit = numPrimitives - center;
|
||||
assert(preSplitItem0[center].data >= 1.0f);
|
||||
|
||||
/* sort presplit items in ascending order */
|
||||
radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024);
|
||||
|
||||
CHECK_PRESPLIT(
|
||||
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
assert(preSplitItem0[i-1].data <= preSplitItem0[i].data);
|
||||
});
|
||||
);
|
||||
|
||||
unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data();
|
||||
unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit;
|
||||
|
||||
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
|
||||
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
|
||||
size_t sum = 0;
|
||||
for (size_t i=t.begin(); i<t.end(); i++)
|
||||
{
|
||||
const unsigned int primrefID = preSplitItem0[i].index;
|
||||
const unsigned int splitprims = preSplitItem0[i].data;
|
||||
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
unsigned int numSubPrims = 0;
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
|
||||
assert(numSubPrims);
|
||||
|
||||
numSubPrims--; // can reuse slot
|
||||
sum+=numSubPrims;
|
||||
preSplitItem0[i].data = (numSubPrims << 16) | splitprims;
|
||||
|
||||
primOffset0[i-center] = numSubPrims;
|
||||
}
|
||||
return sum;
|
||||
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
|
||||
|
||||
/* if we are over budget, need to shrink the range */
|
||||
if (totalNumSubPrims > numSplitPrimitivesBudget)
|
||||
{
|
||||
size_t new_center = numPrimitives-1;
|
||||
size_t sum = 0;
|
||||
for (;new_center>=center;new_center--)
|
||||
{
|
||||
const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16;
|
||||
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
|
||||
sum += numSubPrims;
|
||||
}
|
||||
new_center++;
|
||||
|
||||
primOffset0 += new_center - center;
|
||||
numPrimitivesToSplit -= new_center - center;
|
||||
center = new_center;
|
||||
assert(numPrimitivesToSplit == (numPrimitives - center));
|
||||
}
|
||||
|
||||
/* parallel prefix sum to compute offsets for storing sub-primitives */
|
||||
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
|
||||
assert(numPrimitives+offset <= numPrimitivesExt);
|
||||
|
||||
/* iterate over range, and split primitives into sub primitives and append them to prims array */
|
||||
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
|
||||
for (size_t j=rn.begin(); j<rn.end(); j++)
|
||||
{
|
||||
const unsigned int primrefID = preSplitItem0[j].index;
|
||||
const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF;
|
||||
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
unsigned int numSubPrims = 0;
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
|
||||
|
||||
const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16;
|
||||
assert(numSubPrims-1 == numSubPrimsExpected);
|
||||
|
||||
const size_t newID = numPrimitives + primOffset1[j-center];
|
||||
assert(newID+numSubPrims-1 <= numPrimitivesExt);
|
||||
|
||||
prims[primrefID] = subPrims[0];
|
||||
for (size_t i=1;i<numSubPrims;i++)
|
||||
prims[newID+i-1] = subPrims[i];
|
||||
}
|
||||
});
|
||||
|
||||
numPrimitives += offset;
|
||||
|
||||
/* recompute centroid bounding boxes */
|
||||
const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
|
||||
PrimInfo p(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
p.add_center2(prims[j]);
|
||||
return p;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
assert(pinfo1.size() == numPrimitives);
|
||||
|
||||
return pinfo1;
|
||||
}
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
template<typename Mesh, typename SplitterFactory>
|
||||
PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
|
||||
|
||||
SplitterFactory Splitter(scene);
|
||||
|
||||
auto split_primitive = [&] (const PrimRef &prim,
|
||||
const unsigned int splitprims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
const auto splitter = Splitter(prim);
|
||||
splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims);
|
||||
};
|
||||
|
||||
auto primitiveArea = [&] (const PrimRef &ref) {
|
||||
const unsigned int geomID = ref.geomID();
|
||||
const unsigned int primID = ref.primID();
|
||||
return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
|
||||
};
|
||||
|
||||
return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
240
thirdparty/embree/kernels/builders/splitter.h
vendored
Normal file
240
thirdparty/embree/kernels/builders/splitter.h
vendored
Normal file
@@ -0,0 +1,240 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
#include "../common/scene.h"
|
||||
#endif
|
||||
|
||||
#include "../builders/primref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const BBox3fa& bounds,
|
||||
const size_t dim,
|
||||
const float pos,
|
||||
const Vec3fa (&v)[N+1],
|
||||
BBox3fa& left_o,
|
||||
BBox3fa& right_o)
|
||||
{
|
||||
BBox3fa left = empty, right = empty;
|
||||
/* clip triangle to left and right box by processing all edges */
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const Vec3fa &v0 = v[i];
|
||||
const Vec3fa &v1 = v[i+1];
|
||||
const float v0d = v0[dim];
|
||||
const float v1d = v1[dim];
|
||||
|
||||
if (v0d <= pos) left. extend(v0); // this point is on left side
|
||||
if (v0d >= pos) right.extend(v0); // this point is on right side
|
||||
|
||||
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
|
||||
{
|
||||
assert((v1d-v0d) != 0.0f);
|
||||
const float inv_length = 1.0f/(v1d-v0d);
|
||||
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
|
||||
left.extend(c);
|
||||
right.extend(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* clip against current bounds */
|
||||
left_o = intersect(left,bounds);
|
||||
right_o = intersect(right,bounds);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const BBox3fa& bounds,
|
||||
const size_t dim,
|
||||
const float pos,
|
||||
const Vec3fa (&v)[N+1],
|
||||
const Vec3fa (&inv_length)[N],
|
||||
BBox3fa& left_o,
|
||||
BBox3fa& right_o)
|
||||
{
|
||||
BBox3fa left = empty, right = empty;
|
||||
/* clip triangle to left and right box by processing all edges */
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const Vec3fa &v0 = v[i];
|
||||
const Vec3fa &v1 = v[i+1];
|
||||
const float v0d = v0[dim];
|
||||
const float v1d = v1[dim];
|
||||
|
||||
if (v0d <= pos) left. extend(v0); // this point is on left side
|
||||
if (v0d >= pos) right.extend(v0); // this point is on right side
|
||||
|
||||
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
|
||||
{
|
||||
assert((v1d-v0d) != 0.0f);
|
||||
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length[i][dim]),v1-v0,v0);
|
||||
left.extend(c);
|
||||
right.extend(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* clip against current bounds */
|
||||
left_o = intersect(left,bounds);
|
||||
right_o = intersect(right,bounds);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const PrimRef& prim,
|
||||
const size_t dim,
|
||||
const float pos,
|
||||
const Vec3fa (&v)[N+1],
|
||||
PrimRef& left_o,
|
||||
PrimRef& right_o)
|
||||
{
|
||||
BBox3fa left = empty, right = empty;
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const Vec3fa &v0 = v[i];
|
||||
const Vec3fa &v1 = v[i+1];
|
||||
const float v0d = v0[dim];
|
||||
const float v1d = v1[dim];
|
||||
|
||||
if (v0d <= pos) left. extend(v0); // this point is on left side
|
||||
if (v0d >= pos) right.extend(v0); // this point is on right side
|
||||
|
||||
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
|
||||
{
|
||||
assert((v1d-v0d) != 0.0f);
|
||||
const float inv_length = 1.0f/(v1d-v0d);
|
||||
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
|
||||
left.extend(c);
|
||||
right.extend(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* clip against current bounds */
|
||||
new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID());
|
||||
new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID());
|
||||
}
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
struct TriangleSplitter
|
||||
{
|
||||
__forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim)
|
||||
{
|
||||
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
const TriangleMesh* mesh = (const TriangleMesh*) scene->get(prim.geomID() & mask );
|
||||
TriangleMesh::Triangle tri = mesh->triangle(prim.primID());
|
||||
v[0] = mesh->vertex(tri.v[0]);
|
||||
v[1] = mesh->vertex(tri.v[1]);
|
||||
v[2] = mesh->vertex(tri.v[2]);
|
||||
v[3] = mesh->vertex(tri.v[0]);
|
||||
inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
|
||||
inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
|
||||
inv_length[2] = Vec3fa(1.0f) / (v[0]-v[2]);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
|
||||
splitPolygon<3>(prim,dim,pos,v,left_o,right_o);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
|
||||
splitPolygon<3>(prim,dim,pos,v,inv_length,left_o,right_o);
|
||||
}
|
||||
|
||||
private:
|
||||
Vec3fa v[4];
|
||||
Vec3fa inv_length[3];
|
||||
};
|
||||
|
||||
struct TriangleSplitterFactory
|
||||
{
|
||||
__forceinline TriangleSplitterFactory(const Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline TriangleSplitter operator() (const PrimRef& prim) const {
|
||||
return TriangleSplitter(scene,prim);
|
||||
}
|
||||
|
||||
private:
|
||||
const Scene* scene;
|
||||
};
|
||||
|
||||
struct QuadSplitter
|
||||
{
|
||||
__forceinline QuadSplitter(const Scene* scene, const PrimRef& prim)
|
||||
{
|
||||
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
const QuadMesh* mesh = (const QuadMesh*) scene->get(prim.geomID() & mask );
|
||||
QuadMesh::Quad quad = mesh->quad(prim.primID());
|
||||
v[0] = mesh->vertex(quad.v[1]);
|
||||
v[1] = mesh->vertex(quad.v[2]);
|
||||
v[2] = mesh->vertex(quad.v[3]);
|
||||
v[3] = mesh->vertex(quad.v[0]);
|
||||
v[4] = mesh->vertex(quad.v[1]);
|
||||
v[5] = mesh->vertex(quad.v[3]);
|
||||
inv_length[0] = Vec3fa(1.0f) / (v[1] - v[0]);
|
||||
inv_length[1] = Vec3fa(1.0f) / (v[2] - v[1]);
|
||||
inv_length[2] = Vec3fa(1.0f) / (v[3] - v[2]);
|
||||
inv_length[3] = Vec3fa(1.0f) / (v[4] - v[3]);
|
||||
inv_length[4] = Vec3fa(1.0f) / (v[5] - v[4]);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
|
||||
splitPolygon<5>(prim,dim,pos,v,left_o,right_o);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
|
||||
splitPolygon<5>(prim,dim,pos,v,inv_length,left_o,right_o);
|
||||
}
|
||||
|
||||
private:
|
||||
Vec3fa v[6];
|
||||
Vec3fa inv_length[5];
|
||||
};
|
||||
|
||||
struct QuadSplitterFactory
|
||||
{
|
||||
__forceinline QuadSplitterFactory(const Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline QuadSplitter operator() (const PrimRef& prim) const {
|
||||
return QuadSplitter(scene,prim);
|
||||
}
|
||||
|
||||
private:
|
||||
const Scene* scene;
|
||||
};
|
||||
|
||||
|
||||
struct DummySplitter
|
||||
{
|
||||
__forceinline DummySplitter(const Scene* scene, const PrimRef& prim)
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct DummySplitterFactory
|
||||
{
|
||||
__forceinline DummySplitterFactory(const Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline DummySplitter operator() (const PrimRef& prim) const {
|
||||
return DummySplitter(scene,prim);
|
||||
}
|
||||
|
||||
private:
|
||||
const Scene* scene;
|
||||
};
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user