initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
190
thirdparty/embree/kernels/bvh/bvh.cpp
vendored
Normal file
190
thirdparty/embree/kernels/bvh/bvh.cpp
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_statistics.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<int N>
|
||||
BVHN<N>::BVHN (const PrimitiveType& primTy, Scene* scene)
|
||||
: AccelData((N==4) ? AccelData::TY_BVH4 : (N==8) ? AccelData::TY_BVH8 : AccelData::TY_UNKNOWN),
|
||||
primTy(&primTy), device(scene->device), scene(scene),
|
||||
root(emptyNode), alloc(scene->device,scene->isStaticAccel()), numPrimitives(0), numVertices(0)
|
||||
{
|
||||
}
|
||||
|
||||
template<int N>
|
||||
BVHN<N>::~BVHN ()
|
||||
{
|
||||
for (size_t i=0; i<objects.size(); i++)
|
||||
delete objects[i];
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::clear()
|
||||
{
|
||||
set(BVHN::emptyNode,empty,0);
|
||||
alloc.clear();
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives)
|
||||
{
|
||||
this->root = root;
|
||||
this->bounds = bounds;
|
||||
this->numPrimitives = numPrimitives;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::clearBarrier(NodeRef& node)
|
||||
{
|
||||
if (node.isBarrier())
|
||||
node.clearBarrier();
|
||||
else if (!node.isLeaf()) {
|
||||
BaseNode* n = node.baseNode(); // FIXME: flags should be stored in BVH
|
||||
for (size_t c=0; c<N; c++)
|
||||
clearBarrier(n->child(c));
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::layoutLargeNodes(size_t num)
|
||||
{
|
||||
#if defined(__64BIT__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
|
||||
struct NodeArea
|
||||
{
|
||||
__forceinline NodeArea() {}
|
||||
|
||||
__forceinline NodeArea(NodeRef& node, const BBox3fa& bounds)
|
||||
: node(&node), A(node.isLeaf() ? float(neg_inf) : area(bounds)) {}
|
||||
|
||||
__forceinline bool operator< (const NodeArea& other) const {
|
||||
return this->A < other.A;
|
||||
}
|
||||
|
||||
NodeRef* node;
|
||||
float A;
|
||||
};
|
||||
std::vector<NodeArea> lst;
|
||||
lst.reserve(num);
|
||||
lst.push_back(NodeArea(root,empty));
|
||||
|
||||
while (lst.size() < num)
|
||||
{
|
||||
std::pop_heap(lst.begin(), lst.end());
|
||||
NodeArea n = lst.back(); lst.pop_back();
|
||||
if (!n.node->isAABBNode()) break;
|
||||
AABBNode* node = n.node->getAABBNode();
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (node->child(i) == BVHN::emptyNode) continue;
|
||||
lst.push_back(NodeArea(node->child(i),node->bounds(i)));
|
||||
std::push_heap(lst.begin(), lst.end());
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i=0; i<lst.size(); i++)
|
||||
lst[i].node->setBarrier();
|
||||
|
||||
root = layoutLargeNodesRecursion(root,alloc.getCachedAllocator());
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRef BVHN<N>::layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator)
|
||||
{
|
||||
if (node.isBarrier()) {
|
||||
node.clearBarrier();
|
||||
return node;
|
||||
}
|
||||
else if (node.isAABBNode())
|
||||
{
|
||||
AABBNode* oldnode = node.getAABBNode();
|
||||
AABBNode* newnode = (BVHN::AABBNode*) allocator.malloc0(sizeof(BVHN::AABBNode),byteNodeAlignment);
|
||||
*newnode = *oldnode;
|
||||
for (size_t c=0; c<N; c++)
|
||||
newnode->child(c) = layoutLargeNodesRecursion(oldnode->child(c),allocator);
|
||||
return encodeNode(newnode);
|
||||
}
|
||||
else return node;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
double BVHN<N>::preBuild(const std::string& builderName)
|
||||
{
|
||||
if (builderName == "")
|
||||
return inf;
|
||||
|
||||
if (device->verbosity(2))
|
||||
{
|
||||
Lock<MutexSys> lock(g_printMutex);
|
||||
std::cout << "building BVH" << N << (builderName.find("MBlur") != std::string::npos ? "MB" : "") << "<" << primTy->name() << "> using " << builderName << " ..." << std::endl << std::flush;
|
||||
}
|
||||
|
||||
double t0 = 0.0;
|
||||
if (device->benchmark || device->verbosity(2)) t0 = getSeconds();
|
||||
return t0;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::postBuild(double t0)
|
||||
{
|
||||
if (t0 == double(inf))
|
||||
return;
|
||||
|
||||
double dt = 0.0;
|
||||
if (device->benchmark || device->verbosity(2))
|
||||
dt = getSeconds()-t0;
|
||||
|
||||
std::unique_ptr<BVHNStatistics<N>> stat;
|
||||
|
||||
/* print statistics */
|
||||
if (device->verbosity(2))
|
||||
{
|
||||
if (!stat) stat.reset(new BVHNStatistics<N>(this));
|
||||
const size_t usedBytes = alloc.getUsedBytes();
|
||||
Lock<MutexSys> lock(g_printMutex);
|
||||
std::cout << "finished BVH" << N << "<" << primTy->name() << "> : " << 1000.0f*dt << "ms, " << 1E-6*double(numPrimitives)/dt << " Mprim/s, " << 1E-9*double(usedBytes)/dt << " GB/s" << std::endl;
|
||||
|
||||
if (device->verbosity(2))
|
||||
std::cout << stat->str();
|
||||
|
||||
if (device->verbosity(2))
|
||||
{
|
||||
FastAllocator::AllStatistics stat(&alloc);
|
||||
for (size_t i=0; i<objects.size(); i++)
|
||||
if (objects[i])
|
||||
stat = stat + FastAllocator::AllStatistics(&objects[i]->alloc);
|
||||
|
||||
stat.print(numPrimitives);
|
||||
}
|
||||
|
||||
if (device->verbosity(3))
|
||||
{
|
||||
alloc.print_blocks();
|
||||
for (size_t i=0; i<objects.size(); i++)
|
||||
if (objects[i])
|
||||
objects[i]->alloc.print_blocks();
|
||||
}
|
||||
|
||||
std::cout << std::flush;
|
||||
}
|
||||
|
||||
/* benchmark mode */
|
||||
if (device->benchmark)
|
||||
{
|
||||
if (!stat) stat.reset(new BVHNStatistics<N>(this));
|
||||
Lock<MutexSys> lock(g_printMutex);
|
||||
std::cout << "BENCHMARK_BUILD " << dt << " " << double(numPrimitives)/dt << " " << stat->sah() << " " << stat->bytesUsed() << " BVH" << N << "<" << primTy->name() << ">" << std::endl << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
template class BVHN<8>;
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__)
|
||||
template class BVHN<4>;
|
||||
#endif
|
||||
}
|
||||
|
||||
235
thirdparty/embree/kernels/bvh/bvh.h
vendored
Normal file
235
thirdparty/embree/kernels/bvh/bvh.h
vendored
Normal file
@@ -0,0 +1,235 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
/* include all node types */
|
||||
#include "bvh_node_aabb.h"
|
||||
#include "bvh_node_aabb_mb.h"
|
||||
#include "bvh_node_aabb_mb4d.h"
|
||||
#include "bvh_node_obb.h"
|
||||
#include "bvh_node_obb_mb.h"
|
||||
#include "bvh_node_qaabb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! flags used to enable specific node types in intersectors */
|
||||
enum BVHNodeFlags
|
||||
{
|
||||
BVH_FLAG_ALIGNED_NODE = 0x00001,
|
||||
BVH_FLAG_ALIGNED_NODE_MB = 0x00010,
|
||||
BVH_FLAG_UNALIGNED_NODE = 0x00100,
|
||||
BVH_FLAG_UNALIGNED_NODE_MB = 0x01000,
|
||||
BVH_FLAG_QUANTIZED_NODE = 0x100000,
|
||||
BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000,
|
||||
|
||||
/* short versions */
|
||||
BVH_AN1 = BVH_FLAG_ALIGNED_NODE,
|
||||
BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB,
|
||||
BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
|
||||
BVH_UN1 = BVH_FLAG_UNALIGNED_NODE,
|
||||
BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB,
|
||||
BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
|
||||
BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE,
|
||||
BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB,
|
||||
BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB,
|
||||
BVH_QN1 = BVH_FLAG_QUANTIZED_NODE
|
||||
};
|
||||
|
||||
/*! Multi BVH with N children. Each node stores the bounding box of
|
||||
* it's N children as well as N child references. */
|
||||
template<int N>
|
||||
class BVHN : public AccelData
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
public:
|
||||
|
||||
/*! forward declaration of node ref type */
|
||||
typedef NodeRefPtr<N> NodeRef;
|
||||
typedef BaseNode_t<NodeRef,N> BaseNode;
|
||||
typedef AABBNode_t<NodeRef,N> AABBNode;
|
||||
typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB;
|
||||
typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D;
|
||||
typedef OBBNode_t<NodeRef,N> OBBNode;
|
||||
typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB;
|
||||
typedef QuantizedBaseNode_t<N> QuantizedBaseNode;
|
||||
typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB;
|
||||
typedef QuantizedNode_t<NodeRef,N> QuantizedNode;
|
||||
|
||||
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
|
||||
static const size_t byteAlignment = 16;
|
||||
static const size_t byteNodeAlignment = 4*N;
|
||||
|
||||
/*! Empty node */
|
||||
static const size_t emptyNode = NodeRef::emptyNode;
|
||||
|
||||
/*! Invalid node, used as marker in traversal */
|
||||
static const size_t invalidNode = NodeRef::invalidNode;
|
||||
static const size_t popRay = NodeRef::popRay;
|
||||
|
||||
/*! Maximum depth of the BVH. */
|
||||
static const size_t maxBuildDepth = 32;
|
||||
static const size_t maxBuildDepthLeaf = maxBuildDepth+8;
|
||||
static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder
|
||||
|
||||
/*! Maximum number of primitive blocks in a leaf. */
|
||||
static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks;
|
||||
|
||||
public:
|
||||
|
||||
/*! Builder interface to create allocator */
|
||||
struct CreateAlloc : public FastAllocator::Create {
|
||||
__forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {}
|
||||
};
|
||||
|
||||
typedef BVHNodeRecord<NodeRef> NodeRecord;
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
public:
|
||||
|
||||
/*! BVHN default constructor. */
|
||||
BVHN (const PrimitiveType& primTy, Scene* scene);
|
||||
|
||||
/*! BVHN destruction */
|
||||
~BVHN ();
|
||||
|
||||
/*! clears the acceleration structure */
|
||||
void clear();
|
||||
|
||||
/*! sets BVH members after build */
|
||||
void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives);
|
||||
|
||||
/*! Clears the barrier bits of a subtree. */
|
||||
void clearBarrier(NodeRef& node);
|
||||
|
||||
/*! lays out num large nodes of the BVH */
|
||||
void layoutLargeNodes(size_t num);
|
||||
NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator);
|
||||
|
||||
/*! called by all builders before build starts */
|
||||
double preBuild(const std::string& builderName);
|
||||
|
||||
/*! called by all builders after build ended */
|
||||
void postBuild(double t0);
|
||||
|
||||
/*! allocator class */
|
||||
struct Allocator {
|
||||
BVHN* bvh;
|
||||
Allocator (BVHN* bvh) : bvh(bvh) {}
|
||||
__forceinline void* operator() (size_t bytes) const {
|
||||
return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes);
|
||||
}
|
||||
};
|
||||
|
||||
/*! post build cleanup */
|
||||
void cleanup() {
|
||||
alloc.cleanup();
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! Encodes a node */
|
||||
static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); }
|
||||
static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); }
|
||||
|
||||
public:
|
||||
|
||||
/*! Prefetches the node this reference points to */
|
||||
__forceinline static void prefetch(const NodeRef ref, int types=0)
|
||||
{
|
||||
#if defined(__AVX512PF__) // MIC
|
||||
if (types != BVH_FLAG_QUANTIZED_NODE) {
|
||||
prefetchL2(((char*)ref.ptr)+0*64);
|
||||
prefetchL2(((char*)ref.ptr)+1*64);
|
||||
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
prefetchL2(((char*)ref.ptr)+2*64);
|
||||
prefetchL2(((char*)ref.ptr)+3*64);
|
||||
}
|
||||
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
/* KNL still needs L2 prefetches for large nodes */
|
||||
prefetchL2(((char*)ref.ptr)+4*64);
|
||||
prefetchL2(((char*)ref.ptr)+5*64);
|
||||
prefetchL2(((char*)ref.ptr)+6*64);
|
||||
prefetchL2(((char*)ref.ptr)+7*64);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* todo: reduce if 32bit offsets are enabled */
|
||||
prefetchL2(((char*)ref.ptr)+0*64);
|
||||
prefetchL2(((char*)ref.ptr)+1*64);
|
||||
prefetchL2(((char*)ref.ptr)+2*64);
|
||||
}
|
||||
#else
|
||||
if (types != BVH_FLAG_QUANTIZED_NODE) {
|
||||
prefetchL1(((char*)ref.ptr)+0*64);
|
||||
prefetchL1(((char*)ref.ptr)+1*64);
|
||||
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
prefetchL1(((char*)ref.ptr)+2*64);
|
||||
prefetchL1(((char*)ref.ptr)+3*64);
|
||||
}
|
||||
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
/* deactivate for large nodes on Xeon, as it introduces regressions */
|
||||
//prefetchL1(((char*)ref.ptr)+4*64);
|
||||
//prefetchL1(((char*)ref.ptr)+5*64);
|
||||
//prefetchL1(((char*)ref.ptr)+6*64);
|
||||
//prefetchL1(((char*)ref.ptr)+7*64);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* todo: reduce if 32bit offsets are enabled */
|
||||
prefetchL1(((char*)ref.ptr)+0*64);
|
||||
prefetchL1(((char*)ref.ptr)+1*64);
|
||||
prefetchL1(((char*)ref.ptr)+2*64);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void prefetchW(const NodeRef ref, int types=0)
|
||||
{
|
||||
embree::prefetchEX(((char*)ref.ptr)+0*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+1*64);
|
||||
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
embree::prefetchEX(((char*)ref.ptr)+2*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+3*64);
|
||||
}
|
||||
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
embree::prefetchEX(((char*)ref.ptr)+4*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+5*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+6*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+7*64);
|
||||
}
|
||||
}
|
||||
|
||||
/*! bvh type information */
|
||||
public:
|
||||
const PrimitiveType* primTy; //!< primitive type stored in the BVH
|
||||
|
||||
/*! bvh data */
|
||||
public:
|
||||
Device* device; //!< device pointer
|
||||
Scene* scene; //!< scene pointer
|
||||
NodeRef root; //!< root node
|
||||
FastAllocator alloc; //!< allocator used to allocate nodes
|
||||
|
||||
/*! statistics data */
|
||||
public:
|
||||
size_t numPrimitives; //!< number of primitives the BVH is build over
|
||||
size_t numVertices; //!< number of vertices the BVH references
|
||||
|
||||
/*! data arrays for special builders */
|
||||
public:
|
||||
std::vector<BVHN*> objects;
|
||||
vector_t<char,aligned_allocator<char,32>> subdiv_patches;
|
||||
};
|
||||
|
||||
typedef BVHN<4> BVH4;
|
||||
typedef BVHN<8> BVH8;
|
||||
}
|
||||
1348
thirdparty/embree/kernels/bvh/bvh4_factory.cpp
vendored
Normal file
1348
thirdparty/embree/kernels/bvh/bvh4_factory.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
318
thirdparty/embree/kernels/bvh/bvh4_factory.h
vendored
Normal file
318
thirdparty/embree/kernels/bvh/bvh4_factory.h
vendored
Normal file
@@ -0,0 +1,318 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_factory.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVH4 instantiations */
|
||||
class BVH4Factory : public BVHFactory
|
||||
{
|
||||
public:
|
||||
BVH4Factory(int bfeatures, int ifeatures);
|
||||
|
||||
public:
|
||||
Accel* BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4i);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8i);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4v);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4iMB);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
|
||||
|
||||
Accel* BVH4Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::ROBUST);
|
||||
Accel* BVH4Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH4Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH4QuantizedTriangle4i(Scene* scene);
|
||||
Accel* BVH4QuantizedQuad4i(Scene* scene);
|
||||
|
||||
Accel* BVH4SubdivPatch1(Scene* scene);
|
||||
Accel* BVH4SubdivPatch1MB(Scene* scene);
|
||||
|
||||
Accel* BVH4UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4UserGeometryMB(Scene* scene);
|
||||
|
||||
Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
|
||||
|
||||
Accel* BVH4InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4InstanceArrayMB(Scene* scene);
|
||||
|
||||
Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
private:
|
||||
void selectBuilders(int features);
|
||||
void selectIntersectors(int features);
|
||||
|
||||
private:
|
||||
Accel::Intersectors BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors QBVH4Quad4iIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors QBVH4Triangle4iIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4UserGeometryIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4UserGeometryMBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4InstanceArrayIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4InstanceArrayMBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
|
||||
private:
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
|
||||
|
||||
// ==============
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
|
||||
|
||||
// ==============
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
|
||||
|
||||
// SAH scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// spatial scene builder
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// twolevel scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
};
|
||||
}
|
||||
1190
thirdparty/embree/kernels/bvh/bvh8_factory.cpp
vendored
Normal file
1190
thirdparty/embree/kernels/bvh/bvh8_factory.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
284
thirdparty/embree/kernels/bvh/bvh8_factory.h
vendored
Normal file
284
thirdparty/embree/kernels/bvh/bvh8_factory.h
vendored
Normal file
@@ -0,0 +1,284 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_factory.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVH8 instantiations */
|
||||
class BVH8Factory : public BVHFactory
|
||||
{
|
||||
public:
|
||||
BVH8Factory(int bfeatures, int ifeatures);
|
||||
|
||||
public:
|
||||
Accel* BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
|
||||
|
||||
Accel* BVH8Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH8Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH8QuantizedTriangle4i(Scene* scene);
|
||||
Accel* BVH8QuantizedTriangle4(Scene* scene);
|
||||
Accel* BVH8QuantizedQuad4i(Scene* scene);
|
||||
|
||||
Accel* BVH8UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8UserGeometryMB(Scene* scene);
|
||||
|
||||
Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
|
||||
|
||||
Accel* BVH8InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8InstanceArrayMB(Scene* scene);
|
||||
|
||||
Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
private:
|
||||
void selectBuilders(int features);
|
||||
void selectIntersectors(int features);
|
||||
|
||||
private:
|
||||
Accel::Intersectors BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors QBVH8Triangle4iIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors QBVH8Triangle4Intersectors(BVH8* bvh);
|
||||
Accel::Intersectors QBVH8Quad4iIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8UserGeometryIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8UserGeometryMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8InstanceArrayIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8InstanceArrayMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
|
||||
private:
|
||||
DEFINE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
|
||||
|
||||
// SAH scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// SAH spatial scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// twolevel scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
};
|
||||
}
|
||||
60
thirdparty/embree/kernels/bvh/bvh_builder.cpp
vendored
Normal file
60
thirdparty/embree/kernels/bvh/bvh_builder.cpp
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_builder.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRef BVHNBuilderVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
|
||||
{
|
||||
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
|
||||
return createLeaf(prims,set,alloc);
|
||||
};
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
return BVHBuilderBinnedSAH::build<NodeRef>
|
||||
(FastAllocator::Create(allocator),typename BVH::AABBNode::Create2(),typename BVH::AABBNode::Set3(allocator,prims),createLeafFunc,progressFunc,prims,pinfo,settings);
|
||||
}
|
||||
|
||||
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRef BVHNBuilderQuantizedVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
|
||||
{
|
||||
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
|
||||
return createLeaf(prims,set,alloc);
|
||||
};
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
return BVHBuilderBinnedSAH::build<NodeRef>
|
||||
(FastAllocator::Create(allocator),typename BVH::QuantizedNode::Create2(),typename BVH::QuantizedNode::Set2(),createLeafFunc,progressFunc,prims,pinfo,settings);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRecordMB BVHNBuilderMblurVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange)
|
||||
{
|
||||
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRecordMB {
|
||||
return createLeaf(prims,set,alloc);
|
||||
};
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
return BVHBuilderBinnedSAH::build<NodeRecordMB>
|
||||
(FastAllocator::Create(allocator),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::SetTimeRange(timeRange),createLeafFunc,progressFunc,prims,pinfo,settings);
|
||||
}
|
||||
|
||||
template struct BVHNBuilderVirtual<4>;
|
||||
template struct BVHNBuilderQuantizedVirtual<4>;
|
||||
template struct BVHNBuilderMblurVirtual<4>;
|
||||
|
||||
#if defined(__AVX__)
|
||||
template struct BVHNBuilderVirtual<8>;
|
||||
template struct BVHNBuilderQuantizedVirtual<8>;
|
||||
template struct BVHNBuilderMblurVirtual<8>;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
115
thirdparty/embree/kernels/bvh/bvh_builder.h
vendored
Normal file
115
thirdparty/embree/kernels/bvh/bvh_builder.h
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../builders/bvh_builder_sah.h"
|
||||
#include "../builders/bvh_builder_msmblur.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderVirtual
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
|
||||
struct BVHNBuilderV {
|
||||
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
|
||||
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
struct BVHNBuilderT : public BVHNBuilderV
|
||||
{
|
||||
BVHNBuilderT (CreateLeafFunc createLeafFunc)
|
||||
: createLeafFunc(createLeafFunc) {}
|
||||
|
||||
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
|
||||
return createLeafFunc(prims,set,alloc);
|
||||
}
|
||||
|
||||
private:
|
||||
CreateLeafFunc createLeafFunc;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
|
||||
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
|
||||
}
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderQuantizedVirtual
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
|
||||
struct BVHNBuilderV {
|
||||
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
|
||||
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
struct BVHNBuilderT : public BVHNBuilderV
|
||||
{
|
||||
BVHNBuilderT (CreateLeafFunc createLeafFunc)
|
||||
: createLeafFunc(createLeafFunc) {}
|
||||
|
||||
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
|
||||
return createLeafFunc(prims,set,alloc);
|
||||
}
|
||||
|
||||
private:
|
||||
CreateLeafFunc createLeafFunc;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
|
||||
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
|
||||
}
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderMblurVirtual
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNodeMB AABBNodeMB;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB NodeRecordMB;
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
|
||||
struct BVHNBuilderV {
|
||||
NodeRecordMB build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange);
|
||||
virtual NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
struct BVHNBuilderT : public BVHNBuilderV
|
||||
{
|
||||
BVHNBuilderT (CreateLeafFunc createLeafFunc)
|
||||
: createLeafFunc(createLeafFunc) {}
|
||||
|
||||
NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
|
||||
return createLeafFunc(prims,set,alloc);
|
||||
}
|
||||
|
||||
private:
|
||||
CreateLeafFunc createLeafFunc;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
static NodeRecordMB build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) {
|
||||
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings,timeRange);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
583
thirdparty/embree/kernels/bvh/bvh_builder_morton.cpp
vendored
Normal file
583
thirdparty/embree/kernels/bvh/bvh_builder_morton.cpp
vendored
Normal file
@@ -0,0 +1,583 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_statistics.h"
|
||||
#include "bvh_rotate.h"
|
||||
#include "../common/profile.h"
|
||||
#include "../../common/algorithms/parallel_prefix_sum.h"
|
||||
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/bvh_builder_morton.h"
|
||||
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
|
||||
#if defined(__64BIT__)
|
||||
# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
|
||||
#else
|
||||
# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
struct SetBVHNBounds
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
BVH* bvh;
|
||||
__forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num)
|
||||
{
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
|
||||
BBox3fa res = empty;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
const BBox3fa b = children[i].bounds;
|
||||
res.extend(b);
|
||||
node->setRef(i,children[i].ref);
|
||||
node->setBounds(i,b);
|
||||
}
|
||||
|
||||
BBox3fx result = (BBox3fx&)res;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
{
|
||||
size_t n = 0;
|
||||
for (size_t i=0; i<num; i++)
|
||||
n += children[i].bounds.lower.a;
|
||||
|
||||
if (n >= 4096) {
|
||||
for (size_t i=0; i<num; i++) {
|
||||
if (children[i].bounds.lower.a < 4096) {
|
||||
for (int j=0; j<ROTATE_TREE; j++)
|
||||
BVHNRotate<N>::rotate(node->child(i));
|
||||
node->child(i).setBarrier();
|
||||
}
|
||||
}
|
||||
}
|
||||
result.lower.a = unsigned(n);
|
||||
}
|
||||
#endif
|
||||
|
||||
return NodeRecord(ref,result);
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct CreateMortonLeaf;
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Triangle4>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
|
||||
const TriangleMesh* __restrict__ const mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
vgeomID [i] = geomID_;
|
||||
vprimID [i] = primID;
|
||||
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
|
||||
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
|
||||
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
|
||||
}
|
||||
|
||||
Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = unsigned(current.size());
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
|
||||
private:
|
||||
TriangleMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Triangle4v>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
|
||||
const TriangleMesh* __restrict__ mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
vgeomID [i] = geomID_;
|
||||
vprimID [i] = primID;
|
||||
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
|
||||
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
|
||||
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
|
||||
}
|
||||
Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
TriangleMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Triangle4i>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
|
||||
vuint4 v0 = zero, v1 = zero, v2 = zero;
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
const TriangleMesh* __restrict__ const mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
vgeomID[i] = geomID_;
|
||||
vprimID[i] = primID;
|
||||
unsigned int int_stride = mesh->vertices0.getStride()/4;
|
||||
v0[i] = tri.v[0] * int_stride;
|
||||
v1[i] = tri.v[1] * int_stride;
|
||||
v2[i] = tri.v[2] * int_stride;
|
||||
}
|
||||
|
||||
for (size_t i=items; i<4; i++)
|
||||
{
|
||||
vgeomID[i] = vgeomID[0];
|
||||
vprimID[i] = -1;
|
||||
v0[i] = 0;
|
||||
v1[i] = 0;
|
||||
v2[i] = 0;
|
||||
}
|
||||
Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
TriangleMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Quad4v>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero;
|
||||
const QuadMesh* __restrict__ mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const QuadMesh::Quad& tri = mesh->quad(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
const Vec3fa& p3 = mesh->vertex(tri.v[3]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
|
||||
vgeomID [i] = geomID_;
|
||||
vprimID [i] = primID;
|
||||
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
|
||||
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
|
||||
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
|
||||
v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
|
||||
}
|
||||
Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
QuadMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Object>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
|
||||
/* allocate leaf node */
|
||||
Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
|
||||
const UserGeometry* mesh = this->mesh;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int index = morton[start+i].index;
|
||||
const unsigned int primID = index;
|
||||
bounds.extend(mesh->bounds(primID));
|
||||
new (&accel[i]) Object(geomID_,primID);
|
||||
}
|
||||
|
||||
BBox3fx box_o = (BBox3fx&)bounds;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
UserGeometry* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,InstancePrimitive>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items <= 1);
|
||||
|
||||
/* allocate leaf node */
|
||||
InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
|
||||
const Instance* instance = this->mesh;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
bounds.extend(instance->bounds(primID));
|
||||
new (&accel[i]) InstancePrimitive(instance, geomID_);
|
||||
}
|
||||
|
||||
BBox3fx box_o = (BBox3fx&)bounds;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
Instance* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,InstanceArrayPrimitive>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (InstanceArray* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items <= 1);
|
||||
|
||||
/* allocate leaf node */
|
||||
InstanceArrayPrimitive* accel = (InstanceArrayPrimitive*) alloc.malloc1(items*sizeof(InstanceArrayPrimitive),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
|
||||
const InstanceArray* instance = this->mesh;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
bounds.extend(instance->bounds(primID));
|
||||
new (&accel[i]) InstanceArrayPrimitive(geomID_, primID);
|
||||
}
|
||||
|
||||
BBox3fx box_o = (BBox3fx&)bounds;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
InstanceArray* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<typename Mesh>
|
||||
struct CalculateMeshBounds
|
||||
{
|
||||
__forceinline CalculateMeshBounds (Mesh* mesh)
|
||||
: mesh(mesh) {}
|
||||
|
||||
__forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) {
|
||||
return mesh->bounds(morton.index);
|
||||
}
|
||||
|
||||
private:
|
||||
Mesh* mesh;
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
class BVHNMeshBuilderMorton : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
public:
|
||||
|
||||
BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD)
|
||||
: bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {}
|
||||
|
||||
/* build function */
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
morton.clear();
|
||||
}
|
||||
size_t numPrimitives = mesh->size();
|
||||
numPreviousPrimitives = numPrimitives;
|
||||
|
||||
/* skip build for empty scene */
|
||||
if (numPrimitives == 0) {
|
||||
bvh->set(BVH::emptyNode,empty,0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* preallocate arrays */
|
||||
morton.resize(numPrimitives);
|
||||
size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive));
|
||||
size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim);
|
||||
bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes
|
||||
bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated);
|
||||
|
||||
/* create morton code array */
|
||||
BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes);
|
||||
size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface);
|
||||
|
||||
/* create BVH */
|
||||
SetBVHNBounds<N> setBounds(bvh);
|
||||
CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data());
|
||||
CalculateMeshBounds<Mesh> calculateBounds(mesh);
|
||||
auto root = BVHBuilderMorton::build<NodeRecord>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create(),
|
||||
setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface,
|
||||
morton.data(),dest,numPrimitivesGen,settings);
|
||||
|
||||
bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives);
|
||||
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
{
|
||||
for (int i=0; i<ROTATE_TREE; i++)
|
||||
BVHNRotate<N>::rotate(bvh->root);
|
||||
bvh->clearBarrier(bvh->root);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
if (bvh->scene->isStaticAccel()) {
|
||||
morton.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
morton.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
BVH* bvh;
|
||||
Mesh* mesh;
|
||||
mvector<BVHBuilderMorton::BuildPrim> morton;
|
||||
BVHBuilderMorton::Settings settings;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
};
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
565
thirdparty/embree/kernels/bvh/bvh_builder_sah.cpp
vendored
Normal file
565
thirdparty/embree/kernels/bvh/bvh_builder_sah.cpp
vendored
Normal file
@@ -0,0 +1,565 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_builder.h"
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglev_mb.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
|
||||
#include "../common/state.h"
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
#define PROFILE 0
|
||||
#define PROFILE_RUNS 20
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeaf
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeaf (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t n = set.size();
|
||||
size_t items = Primitive::blocks(n);
|
||||
size_t start = set.begin();
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
|
||||
for (size_t i=0; i<items; i++) {
|
||||
accel[i].fill(prims,start,set.end(),bvh->scene);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeafQuantized
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeafQuantized (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t n = set.size();
|
||||
size_t items = Primitive::blocks(n);
|
||||
size_t start = set.begin();
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
|
||||
for (size_t i=0; i<items; i++) {
|
||||
accel[i].fill(prims,start,set.end(),bvh->scene);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct BVHNBuilderSAH : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
Geometry* mesh;
|
||||
mvector<PrimRef> prims;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
Geometry::GTypeMask gtype_;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
|
||||
bool primrefarrayalloc;
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize,
|
||||
const Geometry::GTypeMask gtype, bool primrefarrayalloc = false)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0),
|
||||
settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), primrefarrayalloc(primrefarrayalloc) {}
|
||||
|
||||
BVHNBuilderSAH (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID), primrefarrayalloc(false) {}
|
||||
|
||||
// FIXME: shrink bvh->alloc in destructor here and in other builders too
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* if we use the primrefarray for allocations we have to take it back from the BVH */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.unshare(prims);
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
|
||||
numPreviousPrimitives = numPrimitives;
|
||||
if (numPrimitives == 0) {
|
||||
bvh->clear();
|
||||
prims.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
|
||||
|
||||
#if PROFILE
|
||||
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
|
||||
#endif
|
||||
|
||||
/* create primref array */
|
||||
if (primrefarrayalloc) {
|
||||
settings.primrefarrayalloc = numPrimitives/1000;
|
||||
if (settings.primrefarrayalloc < 1000)
|
||||
settings.primrefarrayalloc = inf;
|
||||
}
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
/* initialize allocator */
|
||||
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
PrimInfo pinfo = mesh ?
|
||||
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
|
||||
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* pinfo might has zero size due to invalid geometry */
|
||||
if (unlikely(pinfo.size() == 0))
|
||||
{
|
||||
bvh->clear();
|
||||
prims.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
/* call BVH builder */
|
||||
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeaf<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
|
||||
|
||||
#if PROFILE
|
||||
});
|
||||
#endif
|
||||
|
||||
/* if we allocated using the primrefarray we have to keep it alive */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.share(prims);
|
||||
|
||||
/* for static geometries we can do some cleanups */
|
||||
else if (scene && scene->isStaticAccel()) {
|
||||
prims.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct BVHNBuilderSAHQuantized : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
Geometry* mesh;
|
||||
mvector<PrimRef> prims;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
Geometry::GTypeMask gtype_;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderSAHQuantized (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype) {}
|
||||
|
||||
BVHNBuilderSAHQuantized (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID) {}
|
||||
|
||||
// FIXME: shrink bvh->alloc in destructor here and in other builders too
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
|
||||
numPreviousPrimitives = numPrimitives;
|
||||
if (numPrimitives == 0) {
|
||||
prims.clear();
|
||||
bvh->clear();
|
||||
return;
|
||||
}
|
||||
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::QBVH" + toString(N) + "BuilderSAH");
|
||||
|
||||
#if PROFILE
|
||||
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
|
||||
#endif
|
||||
/* create primref array */
|
||||
prims.resize(numPrimitives);
|
||||
PrimInfo pinfo = mesh ?
|
||||
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
|
||||
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
/* call BVH builder */
|
||||
const size_t node_bytes = numPrimitives*sizeof(typename BVH::QuantizedNode)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
|
||||
NodeRef root = BVHNBuilderQuantizedVirtual<N>::build(&bvh->alloc,CreateLeafQuantized<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
//bvh->layoutLargeNodes(pinfo.size()*0.005f); // FIXME: COPY LAYOUT FOR LARGE NODES !!!
|
||||
#if PROFILE
|
||||
});
|
||||
#endif
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
if (scene && scene->isStaticAccel()) {
|
||||
prims.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeafGrid
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeafGrid (BVH* bvh, const SubGridBuildData * const sgrids) : bvh(bvh),sgrids(sgrids) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
const size_t items = set.size(); //Primitive::blocks(n);
|
||||
const size_t start = set.begin();
|
||||
|
||||
/* collect all subsets with unique geomIDs */
|
||||
assert(items <= N);
|
||||
unsigned int geomIDs[N];
|
||||
unsigned int num_geomIDs = 1;
|
||||
geomIDs[0] = prims[start].geomID();
|
||||
|
||||
for (size_t i=1;i<items;i++)
|
||||
{
|
||||
bool found = false;
|
||||
const unsigned int new_geomID = prims[start+i].geomID();
|
||||
for (size_t j=0;j<num_geomIDs;j++)
|
||||
if (new_geomID == geomIDs[j])
|
||||
{ found = true; break; }
|
||||
if (!found)
|
||||
geomIDs[num_geomIDs++] = new_geomID;
|
||||
}
|
||||
|
||||
/* allocate all leaf memory in one single block */
|
||||
SubGridQBVHN<N>* accel = (SubGridQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridQBVHN<N>),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,num_geomIDs);
|
||||
|
||||
for (size_t g=0;g<num_geomIDs;g++)
|
||||
{
|
||||
unsigned int x[N];
|
||||
unsigned int y[N];
|
||||
unsigned int primID[N];
|
||||
BBox3fa bounds[N];
|
||||
unsigned int pos = 0;
|
||||
for (size_t i=0;i<items;i++)
|
||||
{
|
||||
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
|
||||
|
||||
const SubGridBuildData& sgrid_bd = sgrids[prims[start+i].primID()];
|
||||
x[pos] = sgrid_bd.sx;
|
||||
y[pos] = sgrid_bd.sy;
|
||||
primID[pos] = sgrid_bd.primID;
|
||||
bounds[pos] = prims[start+i].bounds();
|
||||
pos++;
|
||||
}
|
||||
assert(pos <= N);
|
||||
new (&accel[g]) SubGridQBVHN<N>(x,y,primID,bounds,geomIDs[g],pos);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
const SubGridBuildData * const sgrids;
|
||||
};
|
||||
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderSAHGrid : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
GridMesh* mesh;
|
||||
mvector<PrimRef> prims;
|
||||
mvector<SubGridBuildData> sgrids;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
const unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD) {}
|
||||
|
||||
BVHNBuilderSAHGrid (BVH* bvh, GridMesh* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), geomID_(geomID) {}
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* if we use the primrefarray for allocations we have to take it back from the BVH */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.unshare(prims);
|
||||
|
||||
const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false);
|
||||
numPreviousPrimitives = numGridPrimitives;
|
||||
|
||||
|
||||
PrimInfo pinfo = mesh ? createPrimRefArrayGrids(mesh,prims,sgrids) : createPrimRefArrayGrids(scene,prims,sgrids);
|
||||
const size_t numPrimitives = pinfo.size();
|
||||
/* no primitives */
|
||||
if (numPrimitives == 0) {
|
||||
bvh->clear();
|
||||
prims.clear();
|
||||
sgrids.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
|
||||
|
||||
/* create primref array */
|
||||
settings.primrefarrayalloc = numPrimitives/1000;
|
||||
if (settings.primrefarrayalloc < 1000)
|
||||
settings.primrefarrayalloc = inf;
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
/* initialize allocator */
|
||||
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
|
||||
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
|
||||
|
||||
/* pinfo might has zero size due to invalid geometry */
|
||||
if (unlikely(pinfo.size() == 0))
|
||||
{
|
||||
bvh->clear();
|
||||
sgrids.clear();
|
||||
prims.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
/* call BVH builder */
|
||||
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafGrid<N,SubGridQBVHN<N>>(bvh,sgrids.data()),bvh->scene->progressInterface,prims.data(),pinfo,settings);
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
|
||||
|
||||
/* clear temporary array */
|
||||
sgrids.clear();
|
||||
|
||||
/* if we allocated using the primrefarray we have to keep it alive */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.share(prims);
|
||||
|
||||
/* for static geometries we can do some cleanups */
|
||||
else if (scene && scene->isStaticAccel()) {
|
||||
prims.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
|
||||
Builder* BVH4Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
|
||||
|
||||
Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
|
||||
Builder* BVH8Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
|
||||
Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4Quad4iMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
|
||||
Builder* BVH4QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH8Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
|
||||
Builder* BVH8QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH8QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
|
||||
Builder* BVH4VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_max_leaf_size;
|
||||
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
|
||||
}
|
||||
|
||||
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,UserGeometry::geom_type);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
|
||||
Builder* BVH8VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_max_leaf_size;
|
||||
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
|
||||
}
|
||||
|
||||
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,UserGeometry::geom_type);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,1,gtype);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
Builder* BVH4GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,4,mode); }
|
||||
Builder* BVH4GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4,mode); } // FIXME: check whether cost factors are correct
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,mesh,geomID,8,1.0f,8,8,mode); }
|
||||
Builder* BVH8GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8,mode); } // FIXME: check whether cost factors are correct
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
713
thirdparty/embree/kernels/bvh/bvh_builder_sah_mb.cpp
vendored
Normal file
713
thirdparty/embree/kernels/bvh/bvh_builder_sah_mb.cpp
vendored
Normal file
@@ -0,0 +1,713 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_builder.h"
|
||||
#include "../builders/bvh_builder_msmblur.h"
|
||||
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglev_mb.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
|
||||
#include "../common/state.h"
|
||||
|
||||
// FIXME: remove after removing BVHNBuilderMBlurRootTimeSplitsSAH
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
|
||||
#if 0
|
||||
template<int N, typename Primitive>
|
||||
struct CreateMBlurLeaf
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB NodeRecordMB;
|
||||
|
||||
__forceinline CreateMBlurLeaf (BVH* bvh, PrimRef* prims, size_t time) : bvh(bvh), prims(prims), time(time) {}
|
||||
|
||||
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t items = Primitive::blocks(set.size());
|
||||
size_t start = set.begin();
|
||||
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
NodeRef node = bvh->encodeLeaf((char*)accel,items);
|
||||
|
||||
LBBox3fa allBounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
allBounds.extend(accel[i].fillMB(prims, start, set.end(), bvh->scene, time));
|
||||
|
||||
return NodeRecordMB(node,allBounds);
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
PrimRef* prims;
|
||||
size_t time;
|
||||
};
|
||||
#endif
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct CreateMSMBlurLeaf
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
|
||||
|
||||
__forceinline CreateMSMBlurLeaf (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t items = Primitive::blocks(current.prims.size());
|
||||
size_t start = current.prims.begin();
|
||||
size_t end = current.prims.end();
|
||||
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteNodeAlignment);
|
||||
NodeRef node = bvh->encodeLeaf((char*)accel,items);
|
||||
LBBox3fa allBounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
allBounds.extend(accel[i].fillMB(current.prims.prims->data(), start, current.prims.end(), bvh->scene, current.prims.time_range));
|
||||
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
/* Motion blur BVH with 4D nodes and internal time splits */
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct BVHNBuilderMBlurSAH : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
|
||||
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
const size_t sahBlockSize;
|
||||
const float intCost;
|
||||
const size_t minLeafSize;
|
||||
const size_t maxLeafSize;
|
||||
const Geometry::GTypeMask gtype_;
|
||||
|
||||
BVHNBuilderMBlurSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks)), gtype_(gtype) {}
|
||||
|
||||
void build()
|
||||
{
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = scene->getNumPrimitives(gtype_,true);
|
||||
if (numPrimitives == 0) { bvh->clear(); return; }
|
||||
|
||||
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAH");
|
||||
|
||||
#if PROFILE
|
||||
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
|
||||
#endif
|
||||
|
||||
//const size_t numTimeSteps = scene->getNumTimeSteps<typename Mesh::type_t,true>();
|
||||
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
|
||||
|
||||
/*if (numTimeSegments == 1)
|
||||
buildSingleSegment(numPrimitives);
|
||||
else*/
|
||||
buildMultiSegment(numPrimitives);
|
||||
|
||||
#if PROFILE
|
||||
});
|
||||
#endif
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
#if 0 // No longer compatible when time_ranges are present for geometries. Would have to create temporal nodes sometimes, and put only a single geometry into leaf.
|
||||
void buildSingleSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRef> prims(scene->device,numPrimitives);
|
||||
const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface,0);
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
|
||||
(typename BVH::CreateAlloc(bvh),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::Set(),
|
||||
CreateMBlurLeaf<N,Primitive>(bvh,prims.data(),0),bvh->scene->progressInterface,
|
||||
prims.data(),pinfo,settings);
|
||||
|
||||
bvh->set(root.ref,root.lbounds,pinfo.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
void buildMultiSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRefMB> prims(scene->device,numPrimitives);
|
||||
PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
BVHBuilderMSMBlur::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxDepth;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleLeafTimeSegment = Primitive::singleTimeSegment;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root =
|
||||
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
|
||||
RecalculatePrimRef<Mesh>(scene),
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNodeMB4D::Create(),
|
||||
typename BVH::AABBNodeMB4D::Set(),
|
||||
CreateMSMBlurLeaf<N,Mesh,Primitive>(bvh),
|
||||
bvh->scene->progressInterface,
|
||||
settings);
|
||||
|
||||
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
struct GridRecalculatePrimRef
|
||||
{
|
||||
Scene* scene;
|
||||
const SubGridBuildData * const sgrids;
|
||||
|
||||
__forceinline GridRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids)
|
||||
: scene(scene), sgrids(sgrids) {}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
||||
{
|
||||
const unsigned int geomID = prim.geomID();
|
||||
const GridMesh* mesh = scene->get<GridMesh>(geomID);
|
||||
const unsigned int buildID = prim.primID();
|
||||
const SubGridBuildData &subgrid = sgrids[buildID];
|
||||
const unsigned int primID = subgrid.primID;
|
||||
const size_t x = subgrid.x();
|
||||
const size_t y = subgrid.y();
|
||||
const LBBox3fa lbounds = mesh->linearBounds(mesh->grid(primID),x,y,time_range);
|
||||
const unsigned num_time_segments = mesh->numTimeSegments();
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, num_time_segments, geomID, buildID);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
||||
const unsigned int geomID = prim.geomID();
|
||||
const GridMesh* mesh = scene->get<GridMesh>(geomID);
|
||||
const unsigned int buildID = prim.primID();
|
||||
const SubGridBuildData &subgrid = sgrids[buildID];
|
||||
const unsigned int primID = subgrid.primID;
|
||||
const size_t x = subgrid.x();
|
||||
const size_t y = subgrid.y();
|
||||
return mesh->linearBounds(mesh->grid(primID),x,y,time_range);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMSMBlurLeafGrid
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
|
||||
|
||||
__forceinline CreateMSMBlurLeafGrid (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) : scene(scene), bvh(bvh), sgrids(sgrids) {}
|
||||
|
||||
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
const size_t items = current.prims.size();
|
||||
const size_t start = current.prims.begin();
|
||||
|
||||
const PrimRefMB* prims = current.prims.prims->data();
|
||||
/* collect all subsets with unique geomIDs */
|
||||
assert(items <= N);
|
||||
unsigned int geomIDs[N];
|
||||
unsigned int num_geomIDs = 1;
|
||||
geomIDs[0] = prims[start].geomID();
|
||||
|
||||
for (size_t i=1;i<items;i++)
|
||||
{
|
||||
bool found = false;
|
||||
const unsigned int new_geomID = prims[start+i].geomID();
|
||||
for (size_t j=0;j<num_geomIDs;j++)
|
||||
if (new_geomID == geomIDs[j])
|
||||
{ found = true; break; }
|
||||
if (!found)
|
||||
geomIDs[num_geomIDs++] = new_geomID;
|
||||
}
|
||||
|
||||
/* allocate all leaf memory in one single block */
|
||||
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
|
||||
|
||||
LBBox3fa allBounds = empty;
|
||||
|
||||
for (size_t g=0;g<num_geomIDs;g++)
|
||||
{
|
||||
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
|
||||
unsigned int x[N];
|
||||
unsigned int y[N];
|
||||
unsigned int primID[N];
|
||||
BBox3fa bounds0[N];
|
||||
BBox3fa bounds1[N];
|
||||
unsigned int pos = 0;
|
||||
for (size_t i=0;i<items;i++)
|
||||
{
|
||||
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
|
||||
|
||||
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
|
||||
x[pos] = sgrid_bd.sx;
|
||||
y[pos] = sgrid_bd.sy;
|
||||
primID[pos] = sgrid_bd.primID;
|
||||
const size_t x = sgrid_bd.x();
|
||||
const size_t y = sgrid_bd.y();
|
||||
LBBox3fa newBounds = mesh->linearBounds(mesh->grid(sgrid_bd.primID),x,y,current.prims.time_range);
|
||||
allBounds.extend(newBounds);
|
||||
bounds0[pos] = newBounds.bounds0;
|
||||
bounds1[pos] = newBounds.bounds1;
|
||||
pos++;
|
||||
}
|
||||
assert(pos <= N);
|
||||
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],current.prims.time_range.lower,1.0f/current.prims.time_range.size(),pos);
|
||||
}
|
||||
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
Scene *scene;
|
||||
BVH* bvh;
|
||||
const SubGridBuildData * const sgrids;
|
||||
};
|
||||
|
||||
#if 0
|
||||
template<int N>
|
||||
struct CreateLeafGridMB
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB NodeRecordMB;
|
||||
|
||||
__forceinline CreateLeafGridMB (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids)
|
||||
: scene(scene), bvh(bvh), sgrids(sgrids) {}
|
||||
|
||||
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
const size_t items = set.size();
|
||||
const size_t start = set.begin();
|
||||
|
||||
/* collect all subsets with unique geomIDs */
|
||||
assert(items <= N);
|
||||
unsigned int geomIDs[N];
|
||||
unsigned int num_geomIDs = 1;
|
||||
geomIDs[0] = prims[start].geomID();
|
||||
|
||||
for (size_t i=1;i<items;i++)
|
||||
{
|
||||
bool found = false;
|
||||
const unsigned int new_geomID = prims[start+i].geomID();
|
||||
for (size_t j=0;j<num_geomIDs;j++)
|
||||
if (new_geomID == geomIDs[j])
|
||||
{ found = true; break; }
|
||||
if (!found)
|
||||
geomIDs[num_geomIDs++] = new_geomID;
|
||||
}
|
||||
|
||||
/* allocate all leaf memory in one single block */
|
||||
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
|
||||
|
||||
LBBox3fa allBounds = empty;
|
||||
|
||||
for (size_t g=0;g<num_geomIDs;g++)
|
||||
{
|
||||
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
|
||||
|
||||
unsigned int x[N];
|
||||
unsigned int y[N];
|
||||
unsigned int primID[N];
|
||||
BBox3fa bounds0[N];
|
||||
BBox3fa bounds1[N];
|
||||
unsigned int pos = 0;
|
||||
for (size_t i=0;i<items;i++)
|
||||
{
|
||||
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
|
||||
|
||||
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
|
||||
x[pos] = sgrid_bd.sx;
|
||||
y[pos] = sgrid_bd.sy;
|
||||
primID[pos] = sgrid_bd.primID;
|
||||
const size_t x = sgrid_bd.x();
|
||||
const size_t y = sgrid_bd.y();
|
||||
bool MAYBE_UNUSED valid0 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,0,bounds0[pos]);
|
||||
bool MAYBE_UNUSED valid1 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,1,bounds1[pos]);
|
||||
assert(valid0);
|
||||
assert(valid1);
|
||||
allBounds.extend(LBBox3fa(bounds0[pos],bounds1[pos]));
|
||||
pos++;
|
||||
}
|
||||
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],0.0f,1.0f,pos);
|
||||
}
|
||||
return NodeRecordMB(node,allBounds);
|
||||
}
|
||||
|
||||
Scene *scene;
|
||||
BVH* bvh;
|
||||
const SubGridBuildData * const sgrids;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/* Motion blur BVH with 4D nodes and internal time splits */
|
||||
template<int N>
|
||||
struct BVHNBuilderMBlurSAHGrid : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
|
||||
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
const size_t sahBlockSize;
|
||||
const float intCost;
|
||||
const size_t minLeafSize;
|
||||
const size_t maxLeafSize;
|
||||
mvector<SubGridBuildData> sgrids;
|
||||
|
||||
|
||||
BVHNBuilderMBlurSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize)
|
||||
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,BVH::maxLeafBlocks)), sgrids(scene->device,0) {}
|
||||
|
||||
|
||||
PrimInfo createPrimRefArrayMBlurGrid(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
|
||||
{
|
||||
/* first run to get #primitives */
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator<GridMesh,true> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
|
||||
/* iterate over all meshes in the scene */
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j,range<size_t>(0,1))) continue;
|
||||
BBox3fa bounds = empty;
|
||||
const PrimRef prim(bounds,unsigned(geomID),unsigned(j));
|
||||
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
size_t numPrimitives = pinfo.size();
|
||||
if (numPrimitives == 0) return pinfo;
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
|
||||
k = base.size();
|
||||
size_t p_index = k;
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
const GridMesh::Grid &g = mesh->grid(j);
|
||||
if (!mesh->valid(j,range<size_t>(0,1))) continue;
|
||||
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!mesh->buildBounds(g,x,y,itime,bounds)) continue; // get bounds of subgrid
|
||||
const PrimRef prim(bounds,unsigned(geomID),unsigned(p_index));
|
||||
pinfo.add_center2(prim);
|
||||
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[p_index++] = prim;
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
assert(pinfo.size() == numPrimitives);
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f))
|
||||
{
|
||||
/* first run to get #primitives */
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator<GridMesh,true> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
/* iterate over all meshes in the scene */
|
||||
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
|
||||
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
|
||||
LBBox3fa bounds(empty);
|
||||
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
|
||||
pinfoMB.merge(gridMB);
|
||||
}
|
||||
return pinfoMB;
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
size_t numPrimitives = pinfoMB.size();
|
||||
if (numPrimitives == 0) return pinfoMB;
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
|
||||
k = base.size();
|
||||
size_t p_index = k;
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
|
||||
const GridMesh::Grid &g = mesh->grid(j);
|
||||
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
const PrimRefMB prim(mesh->linearBounds(g,x,y,t0t1),mesh->numTimeSegments(),mesh->time_range,mesh->numTimeSegments(),unsigned(geomID),unsigned(p_index));
|
||||
pinfoMB.add_primref(prim);
|
||||
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[p_index++] = prim;
|
||||
}
|
||||
}
|
||||
return pinfoMB;
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
assert(pinfoMB.size() == numPrimitives);
|
||||
pinfoMB.time_range = t0t1;
|
||||
return pinfoMB;
|
||||
}
|
||||
|
||||
void build()
|
||||
{
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = scene->getNumPrimitives(GridMesh::geom_type,true);
|
||||
if (numPrimitives == 0) { bvh->clear(); return; }
|
||||
|
||||
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAHGrid");
|
||||
|
||||
//const size_t numTimeSteps = scene->getNumTimeSteps<GridMesh,true>();
|
||||
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
|
||||
//if (numTimeSegments == 1)
|
||||
// buildSingleSegment(numPrimitives);
|
||||
//else
|
||||
buildMultiSegment(numPrimitives);
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void buildSingleSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRef> prims(scene->device,numPrimitives);
|
||||
const PrimInfo pinfo = createPrimRefArrayMBlurGrid(scene,prims,bvh->scene->progressInterface,0);
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
|
||||
//TODO: check leaf_bytes
|
||||
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
|
||||
(typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNodeMB::Create(),
|
||||
typename BVH::AABBNodeMB::Set(),
|
||||
CreateLeafGridMB<N>(scene,bvh,sgrids.data()),
|
||||
bvh->scene->progressInterface,
|
||||
prims.data(),pinfo,settings);
|
||||
|
||||
bvh->set(root.ref,root.lbounds,pinfo.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
void buildMultiSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRefMB> prims(scene->device,numPrimitives);
|
||||
PrimInfoMB pinfo = createPrimRefArrayMSMBlurGrid(scene,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
|
||||
|
||||
|
||||
GridRecalculatePrimRef recalculatePrimRef(scene,sgrids.data());
|
||||
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
|
||||
//FIXME: check leaf_bytes
|
||||
//const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(SubGridQBVHN<N>));
|
||||
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
|
||||
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
BVHBuilderMSMBlur::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxDepth;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleLeafTimeSegment = false;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root =
|
||||
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
|
||||
recalculatePrimRef,
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNodeMB4D::Create(),
|
||||
typename BVH::AABBNodeMB4D::Set(),
|
||||
CreateMSMBlurLeafGrid<N>(scene,bvh,sgrids.data()),
|
||||
bvh->scene->progressInterface,
|
||||
settings);
|
||||
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
Builder* BVH4Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4vMB>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
Builder* BVH8Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4vMB>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,QuadMesh,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,QuadMesh,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
|
||||
return new BVHNBuilderMBlurSAH<4,UserGeometry,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
|
||||
return new BVHNBuilderMBlurSAH<8,UserGeometry,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8); }
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
201
thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp
vendored
Normal file
201
thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_builder.h"
|
||||
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/primrefgen_presplit.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglev_mb.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
|
||||
#include "../common/state.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeafSpatial
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeafSpatial (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t n = set.size();
|
||||
size_t items = Primitive::blocks(n);
|
||||
size_t start = set.begin();
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
|
||||
for (size_t i=0; i<items; i++) {
|
||||
accel[i].fill(prims,start,set.end(),bvh->scene);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive, typename Splitter>
|
||||
struct BVHNBuilderFastSpatialSAH : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
Mesh* mesh;
|
||||
mvector<PrimRef> prims0;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
const float splitFactor;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderFastSpatialSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims0(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
|
||||
splitFactor(scene->device->max_spatial_split_replications) {}
|
||||
|
||||
BVHNBuilderFastSpatialSAH (BVH* bvh, Mesh* mesh, const unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims0(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
|
||||
splitFactor(scene->device->max_spatial_split_replications), geomID_(geomID) {}
|
||||
|
||||
// FIXME: shrink bvh->alloc in destructor here and in other builders too
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numOriginalPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(Mesh::geom_type,false);
|
||||
numPreviousPrimitives = numOriginalPrimitives;
|
||||
if (numOriginalPrimitives == 0) {
|
||||
prims0.clear();
|
||||
bvh->clear();
|
||||
return;
|
||||
}
|
||||
|
||||
const unsigned int maxGeomID = mesh ? geomID_ : scene->getMaxGeomID<Mesh,false>();
|
||||
const bool usePreSplits = scene->device->useSpatialPreSplits || (maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)));
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + (usePreSplits ? "BuilderFastSpatialPresplitSAH" : "BuilderFastSpatialSAH"));
|
||||
|
||||
/* create primref array */
|
||||
const size_t numSplitPrimitives = max(numOriginalPrimitives,size_t(splitFactor*numOriginalPrimitives));
|
||||
prims0.resize(numSplitPrimitives);
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
NodeRef root(0);
|
||||
PrimInfo pinfo;
|
||||
|
||||
|
||||
if (likely(usePreSplits))
|
||||
{
|
||||
/* spatial presplit SAH BVH builder */
|
||||
pinfo = mesh ?
|
||||
createPrimRefArray_presplit<Mesh,Splitter>(mesh,maxGeomID,numOriginalPrimitives,prims0,bvh->scene->progressInterface) :
|
||||
createPrimRefArray_presplit<Mesh,Splitter>(scene,Mesh::geom_type,false,numOriginalPrimitives,prims0,bvh->scene->progressInterface);
|
||||
|
||||
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
|
||||
/* call BVH builder */
|
||||
root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafSpatial<N,Primitive>(bvh),bvh->scene->progressInterface,prims0.data(),pinfo,settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* standard spatial split SAH BVH builder */
|
||||
pinfo = mesh ?
|
||||
createPrimRefArray(mesh,geomID_,numSplitPrimitives,prims0,bvh->scene->progressInterface) :
|
||||
createPrimRefArray(scene,Mesh::geom_type,false,numSplitPrimitives,prims0,bvh->scene->progressInterface);
|
||||
|
||||
Splitter splitter(scene);
|
||||
|
||||
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
|
||||
/* call BVH builder */
|
||||
root = BVHBuilderBinnedFastSpatialSAH::build<NodeRef>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create2(),
|
||||
typename BVH::AABBNode::Set2(),
|
||||
CreateLeafSpatial<N,Primitive>(bvh),
|
||||
splitter,
|
||||
bvh->scene->progressInterface,
|
||||
prims0.data(),
|
||||
numSplitPrimitives,
|
||||
pinfo,settings);
|
||||
|
||||
/* ==================== */
|
||||
}
|
||||
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
if (scene && scene->isStaticAccel()) {
|
||||
prims0.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims0.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
|
||||
Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
|
||||
Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
|
||||
Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
Builder* BVH8Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,QuadMesh,Quad4v,QuadSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,QuadMesh,Quad4v,QuadSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
385
thirdparty/embree/kernels/bvh/bvh_builder_twolevel.cpp
vendored
Normal file
385
thirdparty/embree/kernels/bvh/bvh_builder_twolevel.cpp
vendored
Normal file
@@ -0,0 +1,385 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include "bvh_builder_twolevel.h"
|
||||
#include "bvh_statistics.h"
|
||||
#include "../builders/bvh_builder_sah.h"
|
||||
#include "../common/scene_line_segments.h"
|
||||
#include "../common/scene_triangle_mesh.h"
|
||||
#include "../common/scene_quad_mesh.h"
|
||||
|
||||
#define PROFILE 0
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
BVHNBuilderTwoLevel<N,Mesh,Primitive>::BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder, const size_t singleThreadThreshold)
|
||||
: bvh(bvh), scene(scene), refs(scene->device,0), prims(scene->device,0), singleThreadThreshold(singleThreadThreshold), gtype(gtype), useMortonBuilder_(useMortonBuilder) {}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
BVHNBuilderTwoLevel<N,Mesh,Primitive>::~BVHNBuilderTwoLevel () {
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
// ===========================================================================
|
||||
// ===========================================================================
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::build()
|
||||
{
|
||||
/* delete some objects */
|
||||
size_t num = scene->size();
|
||||
if (num < bvh->objects.size()) {
|
||||
parallel_for(num, bvh->objects.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
builders[i].reset();
|
||||
delete bvh->objects[i]; bvh->objects[i] = nullptr;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#if PROFILE
|
||||
while(1)
|
||||
#endif
|
||||
{
|
||||
/* reset memory allocator */
|
||||
bvh->alloc.reset();
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = scene->getNumPrimitives(gtype,false);
|
||||
|
||||
if (numPrimitives == 0) {
|
||||
prims.resize(0);
|
||||
bvh->set(BVH::emptyNode,empty,0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* calculate the size of the entire BVH */
|
||||
const size_t numLeafBlocks = Primitive::blocks(numPrimitives);
|
||||
const size_t node_bytes = 2*numLeafBlocks*sizeof(typename BVH::AABBNode)/N;
|
||||
const size_t leaf_bytes = size_t(1.2*numLeafBlocks*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderTwoLevel");
|
||||
|
||||
/* resize object array if scene got larger */
|
||||
if (bvh->objects.size() < num) bvh->objects.resize(num);
|
||||
if (builders.size() < num) builders.resize(num);
|
||||
resizeRefsList ();
|
||||
nextRef.store(0);
|
||||
|
||||
/* create acceleration structures */
|
||||
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
|
||||
{
|
||||
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
|
||||
{
|
||||
Mesh* mesh = scene->getSafe<Mesh>(objectID);
|
||||
|
||||
/* ignore meshes we do not support */
|
||||
if (mesh == nullptr || mesh->numTimeSteps != 1)
|
||||
continue;
|
||||
|
||||
if (isSmallGeometry(mesh)) {
|
||||
setupSmallBuildRefBuilder (objectID, mesh);
|
||||
} else {
|
||||
setupLargeBuildRefBuilder (objectID, mesh);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/* parallel build of acceleration structures */
|
||||
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
|
||||
{
|
||||
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
|
||||
{
|
||||
/* ignore if no triangle mesh or not enabled */
|
||||
Mesh* mesh = scene->getSafe<Mesh>(objectID);
|
||||
if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1)
|
||||
continue;
|
||||
|
||||
builders[objectID]->attachBuildRefs (this);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
#if PROFILE
|
||||
double d0 = getSeconds();
|
||||
#endif
|
||||
/* fast path for single geometry scenes */
|
||||
if (nextRef == 1) {
|
||||
bvh->set(refs[0].node,LBBox3fa(refs[0].bounds()),numPrimitives);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* open all large nodes */
|
||||
refs.resize(nextRef);
|
||||
|
||||
/* this probably needs some more tuning */
|
||||
const size_t extSize = max(max((size_t)SPLIT_MIN_EXT_SPACE,refs.size()*SPLIT_MEMORY_RESERVE_SCALE),size_t((float)numPrimitives / SPLIT_MEMORY_RESERVE_FACTOR));
|
||||
|
||||
#if !ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
|
||||
#if ENABLE_OPEN_SEQUENTIAL
|
||||
open_sequential(extSize);
|
||||
#endif
|
||||
/* compute PrimRefs */
|
||||
prims.resize(refs.size());
|
||||
#endif
|
||||
|
||||
{
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
|
||||
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
pinfo.add_center2(refs[i]);
|
||||
}
|
||||
return pinfo;
|
||||
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
|
||||
|
||||
#else
|
||||
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
pinfo.add_center2(refs[i]);
|
||||
prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
|
||||
}
|
||||
return pinfo;
|
||||
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
|
||||
#endif
|
||||
|
||||
/* skip if all objects where empty */
|
||||
if (pinfo.size() == 0)
|
||||
bvh->set(BVH::emptyNode,empty,0);
|
||||
|
||||
/* otherwise build toplevel hierarchy */
|
||||
else
|
||||
{
|
||||
/* settings for BVH build */
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
settings.logBlockSize = bsr(N);
|
||||
settings.minLeafSize = 1;
|
||||
settings.maxLeafSize = 1;
|
||||
settings.travCost = 1.0f;
|
||||
settings.intCost = 1.0f;
|
||||
settings.singleThreadThreshold = singleThreadThreshold;
|
||||
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
|
||||
refs.resize(extSize);
|
||||
|
||||
NodeRef root = BVHBuilderBinnedOpenMergeSAH::build<NodeRef,BuildRef>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create2(),
|
||||
typename BVH::AABBNode::Set2(),
|
||||
|
||||
[&] (const BuildRef* refs, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
|
||||
assert(range.size() == 1);
|
||||
return (NodeRef) refs[range.begin()].node;
|
||||
},
|
||||
[&] (BuildRef &bref, BuildRef *refs) -> size_t {
|
||||
return openBuildRef(bref,refs);
|
||||
},
|
||||
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
|
||||
refs.data(),extSize,pinfo,settings);
|
||||
#else
|
||||
NodeRef root = BVHBuilderBinnedSAH::build<NodeRef>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create2(),
|
||||
typename BVH::AABBNode::Set2(),
|
||||
|
||||
[&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
|
||||
assert(range.size() == 1);
|
||||
return (NodeRef) prims[range.begin()].ID();
|
||||
},
|
||||
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
|
||||
prims.data(),pinfo,settings);
|
||||
#endif
|
||||
|
||||
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bvh->alloc.cleanup();
|
||||
bvh->postBuild(t0);
|
||||
#if PROFILE
|
||||
double d1 = getSeconds();
|
||||
std::cout << "TOP_LEVEL OPENING/REBUILD TIME " << 1000.0*(d1-d0) << " ms" << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::deleteGeometry(size_t geomID)
|
||||
{
|
||||
if (geomID >= bvh->objects.size()) return;
|
||||
if (builders[geomID]) builders[geomID].reset();
|
||||
delete bvh->objects [geomID]; bvh->objects [geomID] = nullptr;
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::clear()
|
||||
{
|
||||
for (size_t i=0; i<bvh->objects.size(); i++)
|
||||
if (bvh->objects[i]) bvh->objects[i]->clear();
|
||||
|
||||
for (size_t i=0; i<builders.size(); i++)
|
||||
if (builders[i]) builders[i].reset();
|
||||
|
||||
refs.clear();
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::open_sequential(const size_t extSize)
|
||||
{
|
||||
if (refs.size() == 0)
|
||||
return;
|
||||
|
||||
refs.reserve(extSize);
|
||||
|
||||
#if 1
|
||||
for (size_t i=0;i<refs.size();i++)
|
||||
{
|
||||
NodeRef ref = refs[i].node;
|
||||
if (ref.isAABBNode())
|
||||
BVH::prefetch(ref);
|
||||
}
|
||||
#endif
|
||||
|
||||
std::make_heap(refs.begin(),refs.end());
|
||||
while (refs.size()+N-1 <= extSize)
|
||||
{
|
||||
std::pop_heap (refs.begin(),refs.end());
|
||||
NodeRef ref = refs.back().node;
|
||||
if (ref.isLeaf()) break;
|
||||
refs.pop_back();
|
||||
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (node->child(i) == BVH::emptyNode) continue;
|
||||
refs.push_back(BuildRef(node->bounds(i),node->child(i)));
|
||||
|
||||
#if 1
|
||||
NodeRef ref_pre = node->child(i);
|
||||
if (ref_pre.isAABBNode())
|
||||
ref_pre.prefetch();
|
||||
#endif
|
||||
std::push_heap (refs.begin(),refs.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupSmallBuildRefBuilder (size_t objectID, Mesh const * const /*mesh*/)
|
||||
{
|
||||
if (builders[objectID] == nullptr || // new mesh
|
||||
dynamic_cast<RefBuilderSmall*>(builders[objectID].get()) == nullptr) // size change resulted in large->small change
|
||||
{
|
||||
builders[objectID].reset (new RefBuilderSmall(objectID));
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh)
|
||||
{
|
||||
if (bvh->objects[objectID] == nullptr || // new mesh
|
||||
builders[objectID]->meshQualityChanged (mesh->quality) || // changed build quality
|
||||
dynamic_cast<RefBuilderLarge*>(builders[objectID].get()) == nullptr) // size change resulted in small->large change
|
||||
{
|
||||
Builder* builder = nullptr;
|
||||
delete bvh->objects[objectID];
|
||||
createMeshAccel(objectID, builder);
|
||||
builders[objectID].reset (new RefBuilderLarge(objectID, builder, mesh->quality));
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH4BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4v>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH4BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,QuadMesh,Quad4v>((BVH4*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,UserGeometry,Object>((BVH4*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH8BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4v>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH8BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH8BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,QuadMesh,Quad4v>((BVH8*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH8BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,UserGeometry,Object>((BVH8*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH8BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH8BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
262
thirdparty/embree/kernels/bvh/bvh_builder_twolevel.h
vendored
Normal file
262
thirdparty/embree/kernels/bvh/bvh_builder_twolevel.h
vendored
Normal file
@@ -0,0 +1,262 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "bvh_builder_twolevel_internal.h"
|
||||
#include "bvh.h"
|
||||
#include "../builders/priminfo.h"
|
||||
#include "../builders/primrefgen.h"
|
||||
|
||||
/* new open/merge builder */
|
||||
#define ENABLE_DIRECT_SAH_MERGE_BUILDER 1
|
||||
#define ENABLE_OPEN_SEQUENTIAL 0
|
||||
#define SPLIT_MEMORY_RESERVE_FACTOR 1000
|
||||
#define SPLIT_MEMORY_RESERVE_SCALE 2
|
||||
#define SPLIT_MIN_EXT_SPACE 1000
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
class BVHNBuilderTwoLevel : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline static bool isSmallGeometry(Mesh* mesh) {
|
||||
return mesh->size() <= 4;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
typedef void (*createMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
|
||||
|
||||
struct BuildRef : public PrimRef
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRef () {}
|
||||
|
||||
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node)
|
||||
: PrimRef(bounds,(size_t)node), node(node)
|
||||
{
|
||||
if (node.isLeaf())
|
||||
bounds_area = 0.0f;
|
||||
else
|
||||
bounds_area = area(this->bounds());
|
||||
}
|
||||
|
||||
/* used by the open/merge bvh builder */
|
||||
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node, const unsigned int geomID, const unsigned int numPrimitives)
|
||||
: PrimRef(bounds,geomID,numPrimitives), node(node)
|
||||
{
|
||||
/* important for relative buildref ordering */
|
||||
if (node.isLeaf())
|
||||
bounds_area = 0.0f;
|
||||
else
|
||||
bounds_area = area(this->bounds());
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return primID();
|
||||
}
|
||||
|
||||
friend bool operator< (const BuildRef& a, const BuildRef& b) {
|
||||
return a.bounds_area < b.bounds_area;
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const BuildRef& ref) {
|
||||
return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", center2 = " << ref.center2() << ", geomID = " << ref.geomID() << ", numPrimitives = " << ref.numPrimitives() << ", bounds_area = " << ref.bounds_area << " }";
|
||||
}
|
||||
|
||||
__forceinline unsigned int numPrimitives() const { return primID(); }
|
||||
|
||||
public:
|
||||
NodeRef node;
|
||||
float bounds_area;
|
||||
};
|
||||
|
||||
|
||||
__forceinline size_t openBuildRef(BuildRef &bref, BuildRef *const refs) {
|
||||
if (bref.node.isLeaf())
|
||||
{
|
||||
refs[0] = bref;
|
||||
return 1;
|
||||
}
|
||||
NodeRef ref = bref.node;
|
||||
unsigned int geomID = bref.geomID();
|
||||
unsigned int numPrims = max((unsigned int)bref.numPrimitives() / N,(unsigned int)1);
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
size_t n = 0;
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (node->child(i) == BVH::emptyNode) continue;
|
||||
refs[i] = BuildRef(node->bounds(i),node->child(i),geomID,numPrims);
|
||||
n++;
|
||||
}
|
||||
assert(n > 1);
|
||||
return n;
|
||||
}
|
||||
|
||||
/*! Constructor. */
|
||||
BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype = Mesh::geom_type, bool useMortonBuilder = false, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD);
|
||||
|
||||
/*! Destructor */
|
||||
~BVHNBuilderTwoLevel ();
|
||||
|
||||
/*! builder entry point */
|
||||
void build();
|
||||
void deleteGeometry(size_t geomID);
|
||||
void clear();
|
||||
|
||||
void open_sequential(const size_t extSize);
|
||||
|
||||
private:
|
||||
|
||||
class RefBuilderBase {
|
||||
public:
|
||||
virtual ~RefBuilderBase () {}
|
||||
virtual void attachBuildRefs (BVHNBuilderTwoLevel* builder) = 0;
|
||||
virtual bool meshQualityChanged (RTCBuildQuality currQuality) = 0;
|
||||
};
|
||||
|
||||
class RefBuilderSmall : public RefBuilderBase {
|
||||
public:
|
||||
|
||||
RefBuilderSmall (size_t objectID)
|
||||
: objectID_ (objectID) {}
|
||||
|
||||
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) {
|
||||
|
||||
Mesh* mesh = topBuilder->scene->template getSafe<Mesh>(objectID_);
|
||||
size_t meshSize = mesh->size();
|
||||
assert(isSmallGeometry(mesh));
|
||||
|
||||
mvector<PrimRef> prefs(topBuilder->scene->device, meshSize);
|
||||
auto pinfo = createPrimRefArray(mesh,objectID_,meshSize,prefs,topBuilder->bvh->scene->progressInterface);
|
||||
|
||||
size_t begin=0;
|
||||
while (begin < pinfo.size())
|
||||
{
|
||||
Primitive* accel = (Primitive*) topBuilder->bvh->alloc.getCachedAllocator().malloc1(sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,1);
|
||||
accel->fill(prefs.data(),begin,pinfo.size(),topBuilder->bvh->scene);
|
||||
|
||||
/* create build primitive */
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node,(unsigned int)objectID_,1);
|
||||
#else
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node);
|
||||
#endif
|
||||
}
|
||||
assert(begin == pinfo.size());
|
||||
}
|
||||
|
||||
bool meshQualityChanged (RTCBuildQuality /*currQuality*/) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t objectID_;
|
||||
};
|
||||
|
||||
class RefBuilderLarge : public RefBuilderBase {
|
||||
public:
|
||||
|
||||
RefBuilderLarge (size_t objectID, const Ref<Builder>& builder, RTCBuildQuality quality)
|
||||
: objectID_ (objectID), builder_ (builder), quality_ (quality) {}
|
||||
|
||||
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder)
|
||||
{
|
||||
BVH* object = topBuilder->getBVH(objectID_); assert(object);
|
||||
|
||||
/* build object if it got modified */
|
||||
if (topBuilder->isGeometryModified(objectID_))
|
||||
builder_->build();
|
||||
|
||||
/* create build primitive */
|
||||
if (!object->getBounds().empty())
|
||||
{
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
Mesh* mesh = topBuilder->getMesh(objectID_);
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root,(unsigned int)objectID_,(unsigned int)mesh->size());
|
||||
#else
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool meshQualityChanged (RTCBuildQuality currQuality) {
|
||||
return currQuality != quality_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t objectID_;
|
||||
Ref<Builder> builder_;
|
||||
RTCBuildQuality quality_;
|
||||
};
|
||||
|
||||
void setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh);
|
||||
void setupSmallBuildRefBuilder (size_t objectID, Mesh const * const mesh);
|
||||
|
||||
BVH* getBVH (size_t objectID) {
|
||||
return this->bvh->objects[objectID];
|
||||
}
|
||||
Mesh* getMesh (size_t objectID) {
|
||||
return this->scene->template getSafe<Mesh>(objectID);
|
||||
}
|
||||
bool isGeometryModified (size_t objectID) {
|
||||
return this->scene->isGeometryModified(objectID);
|
||||
}
|
||||
|
||||
void resizeRefsList ()
|
||||
{
|
||||
size_t num = parallel_reduce (size_t(0), scene->size(), size_t(0),
|
||||
[this](const range<size_t>& r)->size_t {
|
||||
size_t c = 0;
|
||||
for (auto i=r.begin(); i<r.end(); ++i) {
|
||||
Mesh* mesh = scene->getSafe<Mesh>(i);
|
||||
if (mesh == nullptr || mesh->numTimeSteps != 1)
|
||||
continue;
|
||||
size_t meshSize = mesh->size();
|
||||
c += isSmallGeometry(mesh) ? Primitive::blocks(meshSize) : 1;
|
||||
}
|
||||
return c;
|
||||
},
|
||||
std::plus<size_t>()
|
||||
);
|
||||
|
||||
if (refs.size() < num) {
|
||||
refs.resize(num);
|
||||
}
|
||||
}
|
||||
|
||||
void createMeshAccel (size_t geomID, Builder*& builder)
|
||||
{
|
||||
bvh->objects[geomID] = new BVH(Primitive::type,scene);
|
||||
BVH* accel = bvh->objects[geomID];
|
||||
auto mesh = scene->getSafe<Mesh>(geomID);
|
||||
if (nullptr == mesh) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"geomID does not return correct type");
|
||||
return;
|
||||
}
|
||||
|
||||
__internal_two_level_builder__::MeshBuilder<N,Mesh,Primitive>()(accel, mesh, geomID, this->gtype, this->useMortonBuilder_, builder);
|
||||
}
|
||||
|
||||
using BuilderList = std::vector<std::unique_ptr<RefBuilderBase>>;
|
||||
|
||||
BuilderList builders;
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
mvector<BuildRef> refs;
|
||||
mvector<PrimRef> prims;
|
||||
std::atomic<int> nextRef;
|
||||
const size_t singleThreadThreshold;
|
||||
Geometry::GTypeMask gtype;
|
||||
bool useMortonBuilder_ = false;
|
||||
};
|
||||
}
|
||||
}
|
||||
304
thirdparty/embree/kernels/bvh/bvh_builder_twolevel_internal.h
vendored
Normal file
304
thirdparty/embree/kernels/bvh/bvh_builder_twolevel_internal.h
vendored
Normal file
@@ -0,0 +1,304 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
|
||||
namespace isa
|
||||
{
|
||||
|
||||
namespace __internal_two_level_builder__ {
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct MortonBuilder {};
|
||||
template<>
|
||||
struct MortonBuilder<4,TriangleMesh,Triangle4> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,TriangleMesh,Triangle4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,TriangleMesh,Triangle4i> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,QuadMesh,Quad4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,UserGeometry,Object> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,Instance,InstancePrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,InstanceArray,InstanceArrayPrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,TriangleMesh,Triangle4> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,TriangleMesh,Triangle4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,TriangleMesh,Triangle4i> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,QuadMesh,Quad4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,UserGeometry,Object> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,Instance,InstancePrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,InstanceArray,InstanceArrayPrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct SAHBuilder {};
|
||||
template<>
|
||||
struct SAHBuilder<4,TriangleMesh,Triangle4> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,TriangleMesh,Triangle4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,TriangleMesh,Triangle4i> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,QuadMesh,Quad4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,UserGeometry,Object> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,Instance,InstancePrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,InstanceArray,InstanceArrayPrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,TriangleMesh,Triangle4> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,TriangleMesh,Triangle4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,TriangleMesh,Triangle4i> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,QuadMesh,Quad4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,UserGeometry,Object> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,Instance,InstancePrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,InstanceArray,InstanceArrayPrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct RefitBuilder {};
|
||||
template<>
|
||||
struct RefitBuilder<4,TriangleMesh,Triangle4> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,TriangleMesh,Triangle4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,TriangleMesh,Triangle4i> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,QuadMesh,Quad4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,UserGeometry,Object> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,Instance,InstancePrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,InstanceArray,InstanceArrayPrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,TriangleMesh,Triangle4> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,TriangleMesh,Triangle4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,TriangleMesh,Triangle4i> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,QuadMesh,Quad4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,UserGeometry,Object> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,Instance,InstancePrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,InstanceArray,InstanceArrayPrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct MeshBuilder {
|
||||
MeshBuilder () {}
|
||||
void operator () (void* bvh, Mesh* mesh, size_t geomID, Geometry::GTypeMask gtype, bool useMortonBuilder, Builder*& builder) {
|
||||
if(useMortonBuilder) {
|
||||
builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype);
|
||||
return;
|
||||
}
|
||||
switch (mesh->quality) {
|
||||
case RTC_BUILD_QUALITY_LOW: builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
|
||||
case RTC_BUILD_QUALITY_MEDIUM:
|
||||
case RTC_BUILD_QUALITY_HIGH: builder = SAHBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
|
||||
case RTC_BUILD_QUALITY_REFIT: builder = RefitBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
|
||||
default: throw_RTCError(RTC_ERROR_UNKNOWN,"invalid build quality");
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
377
thirdparty/embree/kernels/bvh/bvh_collider.cpp
vendored
Normal file
377
thirdparty/embree/kernels/bvh/bvh_collider.cpp
vendored
Normal file
@@ -0,0 +1,377 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_collider.h"
|
||||
|
||||
#include "../geometry/triangle_triangle_intersector.h"
|
||||
#include "../../common/algorithms/parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
#define CSTAT(x)
|
||||
|
||||
size_t parallel_depth_threshold = 3;
|
||||
CSTAT(std::atomic<size_t> bvh_collide_traversal_steps(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_leaf_pairs(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_leaf_iterations(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections1(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections2(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections3(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections4(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections5(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections(0));
|
||||
|
||||
struct Collision
|
||||
{
|
||||
__forceinline Collision() {}
|
||||
|
||||
__forceinline Collision (unsigned geomID0, unsigned primID0, unsigned geomID1, unsigned primID1)
|
||||
: geomID0(geomID0), primID0(primID0), geomID1(geomID1), primID1(primID1) {}
|
||||
|
||||
unsigned geomID0;
|
||||
unsigned primID0;
|
||||
unsigned geomID1;
|
||||
unsigned primID1;
|
||||
};
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t overlap(const BBox3fa& box0, const typename BVHN<N>::AABBNode& node1)
|
||||
{
|
||||
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),node1.lower_x);
|
||||
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),node1.lower_y);
|
||||
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),node1.lower_z);
|
||||
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),node1.upper_x);
|
||||
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),node1.upper_y);
|
||||
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),node1.upper_z);
|
||||
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t overlap(const BBox3fa& box0, const BBox<Vec3<vfloat<N>>>& box1)
|
||||
{
|
||||
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),box1.lower.x);
|
||||
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),box1.lower.y);
|
||||
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),box1.lower.z);
|
||||
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),box1.upper.x);
|
||||
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),box1.upper.y);
|
||||
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),box1.upper.z);
|
||||
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t overlap(const BBox<Vec3<vfloat<N>>>& box0, size_t i, const BBox<Vec3<vfloat<N>>>& box1)
|
||||
{
|
||||
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x[i]),box1.lower.x);
|
||||
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y[i]),box1.lower.y);
|
||||
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z[i]),box1.lower.z);
|
||||
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x[i]),box1.upper.x);
|
||||
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y[i]),box1.upper.y);
|
||||
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z[i]),box1.upper.z);
|
||||
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
|
||||
}
|
||||
|
||||
bool intersect_triangle_triangle (Scene* scene0, unsigned geomID0, unsigned primID0, Scene* scene1, unsigned geomID1, unsigned primID1)
|
||||
{
|
||||
CSTAT(bvh_collide_prim_intersections1++);
|
||||
const TriangleMesh* mesh0 = scene0->get<TriangleMesh>(geomID0);
|
||||
const TriangleMesh* mesh1 = scene1->get<TriangleMesh>(geomID1);
|
||||
const TriangleMesh::Triangle& tri0 = mesh0->triangle(primID0);
|
||||
const TriangleMesh::Triangle& tri1 = mesh1->triangle(primID1);
|
||||
|
||||
/* special culling for scene intersection with itself */
|
||||
if (scene0 == scene1 && geomID0 == geomID1)
|
||||
{
|
||||
/* ignore self intersections */
|
||||
if (primID0 == primID1)
|
||||
return false;
|
||||
}
|
||||
CSTAT(bvh_collide_prim_intersections2++);
|
||||
|
||||
if (scene0 == scene1 && geomID0 == geomID1)
|
||||
{
|
||||
/* ignore intersection with topological neighbors */
|
||||
const vint4 t0(tri0.v[0],tri0.v[1],tri0.v[2],tri0.v[2]);
|
||||
if (any(vint4(tri1.v[0]) == t0)) return false;
|
||||
if (any(vint4(tri1.v[1]) == t0)) return false;
|
||||
if (any(vint4(tri1.v[2]) == t0)) return false;
|
||||
}
|
||||
CSTAT(bvh_collide_prim_intersections3++);
|
||||
|
||||
const Vec3fa a0 = mesh0->vertex(tri0.v[0]);
|
||||
const Vec3fa a1 = mesh0->vertex(tri0.v[1]);
|
||||
const Vec3fa a2 = mesh0->vertex(tri0.v[2]);
|
||||
const Vec3fa b0 = mesh1->vertex(tri1.v[0]);
|
||||
const Vec3fa b1 = mesh1->vertex(tri1.v[1]);
|
||||
const Vec3fa b2 = mesh1->vertex(tri1.v[2]);
|
||||
|
||||
return TriangleTriangleIntersector::intersect_triangle_triangle(a0,a1,a2,b0,b1,b2);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline void BVHNColliderUserGeom<N>::processLeaf(NodeRef node0, NodeRef node1)
|
||||
{
|
||||
Collision collisions[16];
|
||||
size_t num_collisions = 0;
|
||||
|
||||
size_t N0; Object* leaf0 = (Object*) node0.leaf(N0);
|
||||
size_t N1; Object* leaf1 = (Object*) node1.leaf(N1);
|
||||
for (size_t i=0; i<N0; i++) {
|
||||
for (size_t j=0; j<N1; j++) {
|
||||
const unsigned geomID0 = leaf0[i].geomID();
|
||||
const unsigned primID0 = leaf0[i].primID();
|
||||
const unsigned geomID1 = leaf1[j].geomID();
|
||||
const unsigned primID1 = leaf1[j].primID();
|
||||
if (this->scene0 == this->scene1 && geomID0 == geomID1 && primID0 == primID1) continue;
|
||||
collisions[num_collisions++] = Collision(geomID0,primID0,geomID1,primID1);
|
||||
if (num_collisions == 16) {
|
||||
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
|
||||
num_collisions = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_collisions)
|
||||
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNCollider<N>::collide_recurse(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1, size_t depth0, size_t depth1)
|
||||
{
|
||||
CSTAT(bvh_collide_traversal_steps++);
|
||||
if (unlikely(ref0.isLeaf())) {
|
||||
if (unlikely(ref1.isLeaf())) {
|
||||
CSTAT(bvh_collide_leaf_pairs++);
|
||||
processLeaf(ref0,ref1);
|
||||
return;
|
||||
} else goto recurse_node1;
|
||||
|
||||
} else {
|
||||
if (unlikely(ref1.isLeaf())) {
|
||||
goto recurse_node0;
|
||||
} else {
|
||||
if (area(bounds0) > area(bounds1)) {
|
||||
goto recurse_node0;
|
||||
}
|
||||
else {
|
||||
goto recurse_node1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node0:
|
||||
AABBNode* node0 = ref0.getAABBNode();
|
||||
size_t mask = overlap<N>(bounds1,*node0);
|
||||
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
//for (size_t i=0; i<N; i++) {
|
||||
#if 0
|
||||
if (depth0 < parallel_depth_threshold)
|
||||
{
|
||||
parallel_for(size_t(N), [&] ( size_t i ) {
|
||||
if (mask & ( 1 << i)) {
|
||||
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node1:
|
||||
AABBNode* node1 = ref1.getAABBNode();
|
||||
size_t mask = overlap<N>(bounds0,*node1);
|
||||
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
//for (size_t i=0; i<N; i++) {
|
||||
#if 0
|
||||
if (depth1 < parallel_depth_threshold)
|
||||
{
|
||||
parallel_for(size_t(N), [&] ( size_t i ) {
|
||||
if (mask & ( 1 << i)) {
|
||||
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNCollider<N>::split(const CollideJob& job, jobvector& jobs)
|
||||
{
|
||||
if (unlikely(job.ref0.isLeaf())) {
|
||||
if (unlikely(job.ref1.isLeaf())) {
|
||||
jobs.push_back(job);
|
||||
return;
|
||||
} else goto recurse_node1;
|
||||
} else {
|
||||
if (unlikely(job.ref1.isLeaf())) {
|
||||
goto recurse_node0;
|
||||
} else {
|
||||
if (area(job.bounds0) > area(job.bounds1)) {
|
||||
goto recurse_node0;
|
||||
}
|
||||
else {
|
||||
goto recurse_node1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node0:
|
||||
const AABBNode* node0 = job.ref0.getAABBNode();
|
||||
size_t mask = overlap<N>(job.bounds1,*node0);
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
jobs.push_back(CollideJob(node0->child(i),node0->bounds(i),job.depth0+1,job.ref1,job.bounds1,job.depth1));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node1:
|
||||
const AABBNode* node1 = job.ref1.getAABBNode();
|
||||
size_t mask = overlap<N>(job.bounds0,*node1);
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
jobs.push_back(CollideJob(job.ref0,job.bounds0,job.depth0,node1->child(i),node1->bounds(i),job.depth1+1));
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNCollider<N>::collide_recurse_entry(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1)
|
||||
{
|
||||
CSTAT(bvh_collide_traversal_steps = 0);
|
||||
CSTAT(bvh_collide_leaf_pairs = 0);
|
||||
CSTAT(bvh_collide_leaf_iterations = 0);
|
||||
CSTAT(bvh_collide_prim_intersections1 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections2 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections3 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections4 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections5 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections = 0);
|
||||
#if 0
|
||||
collide_recurse(ref0,bounds0,ref1,bounds1,0,0);
|
||||
#else
|
||||
const int M = 2048;
|
||||
jobvector jobs[2];
|
||||
jobs[0].reserve(M);
|
||||
jobs[1].reserve(M);
|
||||
jobs[0].push_back(CollideJob(ref0,bounds0,0,ref1,bounds1,0));
|
||||
int source = 0;
|
||||
int target = 1;
|
||||
|
||||
/* try to split job until job list is full */
|
||||
while (jobs[source].size()+8 <= M)
|
||||
{
|
||||
for (size_t i=0; i<jobs[source].size(); i++)
|
||||
{
|
||||
const CollideJob& job = jobs[source][i];
|
||||
size_t remaining = jobs[source].size()-i;
|
||||
if (jobs[target].size()+remaining+8 > M) {
|
||||
jobs[target].push_back(job);
|
||||
} else {
|
||||
split(job,jobs[target]);
|
||||
}
|
||||
}
|
||||
|
||||
/* stop splitting jobs if we reached only leaves and cannot make progress anymore */
|
||||
if (jobs[target].size() == jobs[source].size())
|
||||
break;
|
||||
|
||||
jobs[source].resize(0);
|
||||
std::swap(source,target);
|
||||
}
|
||||
|
||||
/* parallel processing of all jobs */
|
||||
parallel_for(size_t(jobs[source].size()), [&] ( size_t i ) {
|
||||
CollideJob& j = jobs[source][i];
|
||||
collide_recurse(j.ref0,j.bounds0,j.ref1,j.bounds1,j.depth0,j.depth1);
|
||||
});
|
||||
|
||||
|
||||
#endif
|
||||
CSTAT(PRINT(bvh_collide_traversal_steps));
|
||||
CSTAT(PRINT(bvh_collide_leaf_pairs));
|
||||
CSTAT(PRINT(bvh_collide_leaf_iterations));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections1));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections2));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections3));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections4));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections5));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNColliderUserGeom<N>::collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr)
|
||||
{
|
||||
BVHNColliderUserGeom<N>(bvh0->scene,bvh1->scene,callback,userPtr).
|
||||
collide_recurse_entry(bvh0->root,bvh0->bounds.bounds(),bvh1->root,bvh1->bounds.bounds());
|
||||
}
|
||||
|
||||
#if defined (EMBREE_LOWEST_ISA)
|
||||
struct collision_regression_test : public RegressionTest
|
||||
{
|
||||
collision_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(-0.008815f, 0.041848f, -2.49875e-06f), Vec3fa(-0.008276f, 0.053318f, -2.49875e-06f), Vec3fa(0.003023f, 0.048969f, -2.49875e-06f),
|
||||
Vec3fa(0.00245f, 0.037612f, -2.49875e-06f), Vec3fa(0.01434f, 0.042634f, -2.49875e-06f), Vec3fa(0.013499f, 0.031309f, -2.49875e-06f)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,1),Vec3fa(0,1,1)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,-0.1f),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,-0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(-0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
|
||||
Vec3fa(-1,1,0) + Vec3fa(0,0,0),Vec3fa(-1,1,0) + Vec3fa(0.1f,0,0),Vec3fa(-1,1,0) + Vec3fa(0,0.1f,0)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
|
||||
Vec3fa( 2,0.5f,0) + Vec3fa(0,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0.1f,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0,0.1f,0)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
|
||||
Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0.1f,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0.1f,0)) == false;
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
collision_regression_test collision_regression("collision_regression_test");
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Collider Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
DEFINE_COLLIDER(BVH4ColliderUserGeom,BVHNColliderUserGeom<4>);
|
||||
|
||||
#if defined(__AVX__)
|
||||
DEFINE_COLLIDER(BVH8ColliderUserGeom,BVHNColliderUserGeom<8>);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
72
thirdparty/embree/kernels/bvh/bvh_collider.h
vendored
Normal file
72
thirdparty/embree/kernels/bvh/bvh_collider.h
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/object.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
class BVHNCollider
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
struct CollideJob
|
||||
{
|
||||
CollideJob () {}
|
||||
|
||||
CollideJob (NodeRef ref0, const BBox3fa& bounds0, size_t depth0,
|
||||
NodeRef ref1, const BBox3fa& bounds1, size_t depth1)
|
||||
: ref0(ref0), bounds0(bounds0), depth0(depth0), ref1(ref1), bounds1(bounds1), depth1(depth1) {}
|
||||
|
||||
NodeRef ref0;
|
||||
BBox3fa bounds0;
|
||||
size_t depth0;
|
||||
NodeRef ref1;
|
||||
BBox3fa bounds1;
|
||||
size_t depth1;
|
||||
};
|
||||
|
||||
typedef vector_t<CollideJob, aligned_allocator<CollideJob,16>> jobvector;
|
||||
|
||||
void split(const CollideJob& job, jobvector& jobs);
|
||||
|
||||
public:
|
||||
__forceinline BVHNCollider (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
|
||||
: scene0(scene0), scene1(scene1), callback(callback), userPtr(userPtr) {}
|
||||
|
||||
public:
|
||||
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1) = 0;
|
||||
void collide_recurse(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1, size_t depth0, size_t depth1);
|
||||
void collide_recurse_entry(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1);
|
||||
|
||||
protected:
|
||||
Scene* scene0;
|
||||
Scene* scene1;
|
||||
RTCCollideFunc callback;
|
||||
void* userPtr;
|
||||
};
|
||||
|
||||
template<int N>
|
||||
class BVHNColliderUserGeom : public BVHNCollider<N>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
__forceinline BVHNColliderUserGeom (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
|
||||
: BVHNCollider<N>(scene0,scene1,callback,userPtr) {}
|
||||
|
||||
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1);
|
||||
public:
|
||||
static void collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr);
|
||||
};
|
||||
}
|
||||
}
|
||||
21
thirdparty/embree/kernels/bvh/bvh_factory.h
vendored
Normal file
21
thirdparty/embree/kernels/bvh/bvh_factory.h
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../common/isa.h"
|
||||
#include "../common/accel.h"
|
||||
#include "../common/scene.h"
|
||||
#include "../geometry/curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVH instantiations */
|
||||
class BVHFactory
|
||||
{
|
||||
public:
|
||||
enum class BuildVariant { STATIC, DYNAMIC, HIGH_QUALITY };
|
||||
enum class IntersectVariant { FAST, ROBUST };
|
||||
};
|
||||
}
|
||||
322
thirdparty/embree/kernels/bvh/bvh_intersector1.cpp
vendored
Normal file
322
thirdparty/embree/kernels/bvh/bvh_intersector1.cpp
vendored
Normal file
@@ -0,0 +1,322 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector1.h"
|
||||
#include "node_intersector1.h"
|
||||
#include "bvh_traverser1.h"
|
||||
|
||||
#include "../geometry/intersector_iterators.h"
|
||||
#include "../geometry/triangle_intersector.h"
|
||||
#include "../geometry/trianglev_intersector.h"
|
||||
#include "../geometry/trianglev_mb_intersector.h"
|
||||
#include "../geometry/trianglei_intersector.h"
|
||||
#include "../geometry/quadv_intersector.h"
|
||||
#include "../geometry/quadi_intersector.h"
|
||||
#include "../geometry/curveNv_intersector.h"
|
||||
#include "../geometry/curveNi_intersector.h"
|
||||
#include "../geometry/curveNi_mb_intersector.h"
|
||||
#include "../geometry/linei_intersector.h"
|
||||
#include "../geometry/subdivpatch1_intersector.h"
|
||||
#include "../geometry/object_intersector.h"
|
||||
#include "../geometry/instance_intersector.h"
|
||||
#include "../geometry/instance_array_intersector.h"
|
||||
#include "../geometry/subgrid_intersector.h"
|
||||
#include "../geometry/subgrid_mb_intersector.h"
|
||||
#include "../geometry/curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::intersect(const Accel::Intersectors* __restrict__ This,
|
||||
RayHit& __restrict__ ray,
|
||||
RayQueryContext* __restrict__ context)
|
||||
{
|
||||
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
/* perform per ray precalculations required by the primitive intersector */
|
||||
Precalculations pre(ray, bvh);
|
||||
|
||||
/* stack state */
|
||||
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
|
||||
StackItemT<NodeRef>* stackEnd = stack+stackSize;
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
/* filter out invalid rays */
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
if (!ray.valid()) return;
|
||||
#endif
|
||||
/* verify correct input */
|
||||
assert(ray.valid());
|
||||
assert(ray.tnear() >= 0.0f);
|
||||
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
|
||||
|
||||
/* initialize the node traverser */
|
||||
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* if popped node is too far, pop next one */
|
||||
if (unlikely(*(float*)&stackPtr->dist > ray.tfar))
|
||||
continue;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(normal.trav_nodes,1,1,1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(normal.trav_leaves,1,1,1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersector1::intersect(This, pre, ray, context, prim, num, tray, lazy_node);
|
||||
tray.tfar = ray.tfar;
|
||||
|
||||
/* push lazy node onto stack */
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::occluded(const Accel::Intersectors* __restrict__ This,
|
||||
Ray& __restrict__ ray,
|
||||
RayQueryContext* __restrict__ context)
|
||||
{
|
||||
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
/* early out for already occluded rays */
|
||||
if (unlikely(ray.tfar < 0.0f))
|
||||
return;
|
||||
|
||||
/* perform per ray precalculations required by the primitive intersector */
|
||||
Precalculations pre(ray, bvh);
|
||||
|
||||
/* stack state */
|
||||
NodeRef stack[stackSize]; // stack of nodes that still need to get traversed
|
||||
NodeRef* stackPtr = stack+1; // current stack pointer
|
||||
NodeRef* stackEnd = stack+stackSize;
|
||||
stack[0] = bvh->root;
|
||||
|
||||
/* filter out invalid rays */
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
if (!ray.valid()) return;
|
||||
#endif
|
||||
|
||||
/* verify correct input */
|
||||
assert(ray.valid());
|
||||
assert(ray.tnear() >= 0.0f);
|
||||
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
|
||||
|
||||
/* initialize the node traverser */
|
||||
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = (NodeRef)*stackPtr;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(shadow.trav_nodes,1,1,1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
nodeTraverser.traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(shadow.trav_leaves,1,1,1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
size_t lazy_node = 0;
|
||||
if (PrimitiveIntersector1::occluded(This, pre, ray, context, prim, num, tray, lazy_node)) {
|
||||
ray.tfar = neg_inf;
|
||||
break;
|
||||
}
|
||||
|
||||
/* push lazy node onto stack */
|
||||
if (unlikely(lazy_node)) {
|
||||
*stackPtr = (NodeRef)lazy_node;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
struct PointQueryDispatch
|
||||
{
|
||||
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
|
||||
typedef typename PrimitiveIntersector1::Primitive Primitive;
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
|
||||
|
||||
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
|
||||
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
|
||||
{
|
||||
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return false;
|
||||
|
||||
/* stack state */
|
||||
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
|
||||
StackItemT<NodeRef>* stackEnd = stack+stackSize;
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
/* verify correct input */
|
||||
assert(!(types & BVH_MB) || (query->time >= 0.0f && query->time <= 1.0f));
|
||||
|
||||
/* load the point query into SIMD registers */
|
||||
TravPointQuery<N> tquery(query->p, context->query_radius);
|
||||
|
||||
/* initialize the node traverser */
|
||||
BVHNNodeTraverser1Hit<N,types> nodeTraverser;
|
||||
|
||||
bool changed = false;
|
||||
float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
|
||||
? query->radius * query->radius
|
||||
: dot(context->query_radius, context->query_radius);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* if popped node is too far, pop next one */
|
||||
if (unlikely(*(float*)&stackPtr->dist > cull_radius))
|
||||
continue;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(point_query.trav_nodes,1,1,1);
|
||||
bool nodeIntersected;
|
||||
if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
|
||||
nodeIntersected = BVHNNodePointQuerySphere1<N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
|
||||
} else {
|
||||
nodeIntersected = BVHNNodePointQueryAABB1 <N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
|
||||
}
|
||||
if (unlikely(!nodeIntersected)) { STAT3(point_query.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(point_query.trav_leaves,1,1,1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
size_t lazy_node = 0;
|
||||
if (PrimitiveIntersector1::pointQuery(This, query, context, prim, num, tquery, lazy_node))
|
||||
{
|
||||
changed = true;
|
||||
tquery.rad = context->query_radius;
|
||||
cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
|
||||
? query->radius * query->radius
|
||||
: dot(context->query_radius, context->query_radius);
|
||||
}
|
||||
|
||||
/* push lazy node onto stack */
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
};
|
||||
|
||||
/* disable point queries for not yet supported geometry types */
|
||||
template<int N, int types, bool robust>
|
||||
struct PointQueryDispatch<N, types, robust, VirtualCurveIntersector1> {
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
|
||||
};
|
||||
|
||||
template<int N, int types, bool robust>
|
||||
struct PointQueryDispatch<N, types, robust, SubdivPatch1Intersector1> {
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
|
||||
};
|
||||
|
||||
template<int N, int types, bool robust>
|
||||
struct PointQueryDispatch<N, types, robust, SubdivPatch1MBIntersector1> {
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
|
||||
};
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
bool BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::pointQuery(
|
||||
const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
|
||||
{
|
||||
return PointQueryDispatch<N, types, robust, PrimitiveIntersector1>::pointQuery(This, query, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
34
thirdparty/embree/kernels/bvh/bvh_intersector1.h
vendored
Normal file
34
thirdparty/embree/kernels/bvh/bvh_intersector1.h
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../common/ray.h"
|
||||
#include "../common/point_query.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! BVH single ray intersector. */
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
class BVHNIntersector1
|
||||
{
|
||||
/* shortcuts for frequently used types */
|
||||
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
|
||||
typedef typename PrimitiveIntersector1::Primitive Primitive;
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
|
||||
|
||||
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
|
||||
|
||||
public:
|
||||
static void intersect (const Accel::Intersectors* This, RayHit& ray, RayQueryContext* context);
|
||||
static void occluded (const Accel::Intersectors* This, Ray& ray, RayQueryContext* context);
|
||||
static bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
|
||||
};
|
||||
}
|
||||
}
|
||||
64
thirdparty/embree/kernels/bvh/bvh_intersector1_bvh4.cpp
vendored
Normal file
64
thirdparty/embree/kernels/bvh/bvh_intersector1_bvh4.cpp
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector1.cpp"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
int getISA() {
|
||||
return VerifyMultiTargetLinking::getISA();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// BVH4Intersector1 Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersector1 >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersector1 >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMvIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<QuadMiMBIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<QuadMiMBIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1Intersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector1>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1MBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubdivPatch1MBIntersector1>));
|
||||
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<ObjectIntersector1<false>> >));
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<ObjectIntersector1<true>> >));
|
||||
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >));
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1> >));
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1MB> >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >));
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersector1Pluecker<4 COMMA true> >));
|
||||
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
|
||||
|
||||
}
|
||||
}
|
||||
918
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
vendored
Normal file
918
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
vendored
Normal file
@@ -0,0 +1,918 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_hybrid.h"
|
||||
#include "bvh_traverser1.h"
|
||||
#include "node_intersector1.h"
|
||||
#include "node_intersector_packet.h"
|
||||
|
||||
#include "../geometry/intersector_iterators.h"
|
||||
#include "../geometry/triangle_intersector.h"
|
||||
#include "../geometry/trianglev_intersector.h"
|
||||
#include "../geometry/trianglev_mb_intersector.h"
|
||||
#include "../geometry/trianglei_intersector.h"
|
||||
#include "../geometry/quadv_intersector.h"
|
||||
#include "../geometry/quadi_intersector.h"
|
||||
#include "../geometry/curveNv_intersector.h"
|
||||
#include "../geometry/curveNi_intersector.h"
|
||||
#include "../geometry/curveNi_mb_intersector.h"
|
||||
#include "../geometry/linei_intersector.h"
|
||||
#include "../geometry/subdivpatch1_intersector.h"
|
||||
#include "../geometry/object_intersector.h"
|
||||
#include "../geometry/instance_intersector.h"
|
||||
#include "../geometry/instance_array_intersector.h"
|
||||
#include "../geometry/subgrid_intersector.h"
|
||||
#include "../geometry/subgrid_mb_intersector.h"
|
||||
#include "../geometry/curve_intersector_virtual.h"
|
||||
|
||||
#define SWITCH_DURING_DOWN_TRAVERSAL 1
|
||||
#define FORCE_SINGLE_MODE 0
|
||||
|
||||
#define ENABLE_FAST_COHERENT_CODEPATHS 1
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
|
||||
const BVH* bvh,
|
||||
NodeRef root,
|
||||
size_t k,
|
||||
Precalculations& pre,
|
||||
RayHitK<K>& ray,
|
||||
const TravRayK<K, robust>& tray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
/* stack state */
|
||||
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
|
||||
stack[0].ptr = root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray1;
|
||||
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* if popped node is too far, pop next one */
|
||||
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
|
||||
continue;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(normal.trav_leaves, 1, 1, 1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
|
||||
|
||||
tray1.tfar = ray.tfar[k];
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>& __restrict__ ray,
|
||||
RayQueryContext* __restrict__ context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
|
||||
assert(context);
|
||||
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
|
||||
{
|
||||
intersectCoherent(valid_i, This, ray, context);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
|
||||
#if defined(__AVX__)
|
||||
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
|
||||
#endif
|
||||
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
if (single)
|
||||
{
|
||||
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
for (; valid_bits!=0; ) {
|
||||
const size_t i = bscf(valid_bits);
|
||||
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* determine switch threshold based on flags */
|
||||
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
/* test whether we have ray with opposing direction signs in the packet */
|
||||
bool split = false;
|
||||
{
|
||||
size_t bits = valid_bits;
|
||||
vbool<K> vsplit( false );
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(bits);
|
||||
vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
bits &= ~(size_t)movemask(octant_valid);
|
||||
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
|
||||
} while (bits);
|
||||
if (any(vsplit)) split = true;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
|
||||
const vint<K> count_diff_octant = \
|
||||
((diff_octant >> 2) & 1) +
|
||||
((diff_octant >> 1) & 1) +
|
||||
((diff_octant >> 0) & 1);
|
||||
|
||||
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
|
||||
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
|
||||
|
||||
|
||||
octant = select(octant_valid,vint<K>(0xffffffff),octant);
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
/* allocate stack and push root node */
|
||||
vfloat<K> stack_near[stackSizeChunk];
|
||||
NodeRef stack_node[stackSizeChunk];
|
||||
stack_node[0] = BVH::invalidNode;
|
||||
stack_near[0] = inf;
|
||||
stack_node[1] = bvh->root;
|
||||
stack_near[1] = tray.tnear;
|
||||
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
|
||||
NodeRef* __restrict__ sptr_node = stack_node + 2;
|
||||
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
assert(sptr_node > stack_node);
|
||||
sptr_node--;
|
||||
sptr_near--;
|
||||
NodeRef cur = *sptr_node;
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *sptr_near;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active)))
|
||||
continue;
|
||||
|
||||
/* switch to single ray traversal */
|
||||
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (single)
|
||||
#endif
|
||||
{
|
||||
size_t bits = movemask(active);
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (unlikely(popcnt(bits) <= switchThreshold))
|
||||
#endif
|
||||
{
|
||||
for (; bits!=0; ) {
|
||||
const size_t i = bscf(bits);
|
||||
intersect1(This, bvh, cur, i, pre, ray, tray, context);
|
||||
}
|
||||
tray.tfar = min(tray.tfar, ray.tfar);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
const vbool<K> valid_node = tray.tfar > curDist;
|
||||
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const BaseNode* __restrict__ const node = nodeRef.baseNode();
|
||||
|
||||
/* set cur to invalid */
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
size_t num_child_hits = 0;
|
||||
|
||||
for (unsigned i = 0; i < N; i++)
|
||||
{
|
||||
const NodeRef child = node->children[i];
|
||||
if (unlikely(child == BVH::emptyNode)) break;
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = valid_node;
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
/* if we hit the child we choose to continue with that child if it
|
||||
is closer than the current next child, or we push it onto the stack */
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
assert(sptr_node < stackEnd);
|
||||
assert(child != BVH::emptyNode);
|
||||
const vfloat<K> childDist = select(lhit, lnearP, inf);
|
||||
/* push cur node onto stack and continue with hit child */
|
||||
if (any(childDist < curDist))
|
||||
{
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
num_child_hits++;
|
||||
*sptr_node = cur; sptr_node++;
|
||||
*sptr_near = curDist; sptr_near++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
|
||||
/* push hit child onto stack */
|
||||
else {
|
||||
num_child_hits++;
|
||||
*sptr_node = child; sptr_node++;
|
||||
*sptr_near = childDist; sptr_near++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
|
||||
#endif
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode))
|
||||
goto pop;
|
||||
|
||||
/* improved distance sorting for 3 or more hits */
|
||||
if (unlikely(num_child_hits >= 2))
|
||||
{
|
||||
if (any(sptr_near[-2] < sptr_near[-1]))
|
||||
{
|
||||
std::swap(sptr_near[-2],sptr_near[-1]);
|
||||
std::swap(sptr_node[-2],sptr_node[-1]);
|
||||
}
|
||||
if (unlikely(num_child_hits >= 3))
|
||||
{
|
||||
if (any(sptr_near[-3] < sptr_near[-1]))
|
||||
{
|
||||
std::swap(sptr_near[-3],sptr_near[-1]);
|
||||
std::swap(sptr_node[-3],sptr_node[-1]);
|
||||
}
|
||||
if (any(sptr_near[-3] < sptr_near[-2]))
|
||||
{
|
||||
std::swap(sptr_near[-3],sptr_near[-2]);
|
||||
std::swap(sptr_node[-3],sptr_node[-2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
|
||||
if (single)
|
||||
{
|
||||
// seems to be the best place for testing utilization
|
||||
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
|
||||
{
|
||||
*sptr_node++ = cur;
|
||||
*sptr_near++ = curDist;
|
||||
goto pop;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return if stack is empty */
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*sptr_node = lazy_node; sptr_node++;
|
||||
*sptr_near = neg_inf; sptr_near++;
|
||||
}
|
||||
}
|
||||
} while(valid_bits);
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>& __restrict__ ray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
const vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
Frustum<robust> frustum;
|
||||
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
|
||||
|
||||
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *(float*)&stackPtr->dist;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active))) continue;
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
|
||||
|
||||
vfloat<N> fmin;
|
||||
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
|
||||
|
||||
if (unlikely(!m_frustum_node)) goto pop;
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
|
||||
#endif
|
||||
size_t num_child_hits = 0;
|
||||
do {
|
||||
const size_t i = bscf(m_frustum_node);
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
const vfloat<K> childDist = fmin[i];
|
||||
const NodeRef child = node->child(i);
|
||||
BVHN<N>::prefetch(child);
|
||||
if (any(childDist < curDist))
|
||||
{
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
num_child_hits++;
|
||||
stackPtr->ptr = cur;
|
||||
*(float*)&stackPtr->dist = toScalar(curDist);
|
||||
stackPtr++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
/* push hit child onto stack */
|
||||
else {
|
||||
num_child_hits++;
|
||||
stackPtr->ptr = child;
|
||||
*(float*)&stackPtr->dist = toScalar(childDist);
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
} while(m_frustum_node);
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode)) goto pop;
|
||||
|
||||
/* improved distance sorting for 3 or more hits */
|
||||
if (unlikely(num_child_hits >= 2))
|
||||
{
|
||||
if (stackPtr[-2].dist < stackPtr[-1].dist)
|
||||
std::swap(stackPtr[-2],stackPtr[-1]);
|
||||
if (unlikely(num_child_hits >= 3))
|
||||
{
|
||||
if (stackPtr[-3].dist < stackPtr[-1].dist)
|
||||
std::swap(stackPtr[-3],stackPtr[-1]);
|
||||
if (stackPtr[-3].dist < stackPtr[-2].dist)
|
||||
std::swap(stackPtr[-3],stackPtr[-2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::invalidNode);
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
|
||||
/* reduce max distance interval on successful intersection */
|
||||
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
|
||||
{
|
||||
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
|
||||
frustum.template updateMaxDist<K>(tray.tfar);
|
||||
}
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
|
||||
} while(valid_bits);
|
||||
}
|
||||
|
||||
// ===================================================================================================================================================================
|
||||
// ===================================================================================================================================================================
|
||||
// ===================================================================================================================================================================
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
|
||||
const BVH* bvh,
|
||||
NodeRef root,
|
||||
size_t k,
|
||||
Precalculations& pre,
|
||||
RayK<K>& ray,
|
||||
const TravRayK<K, robust>& tray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
/* stack state */
|
||||
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
|
||||
NodeRef* stackPtr = stack+1; // current stack pointer
|
||||
NodeRef* stackEnd = stack+stackSizeSingle;
|
||||
stack[0] = root;
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray1;
|
||||
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = (NodeRef)*stackPtr;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(shadow.trav_nodes, 1, 1, 1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(shadow.trav_leaves, 1, 1, 1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
|
||||
ray.tfar[k] = neg_inf;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*stackPtr = lazy_node;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>& __restrict__ ray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
|
||||
assert(context);
|
||||
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
|
||||
{
|
||||
occludedCoherent(valid_i, This, ray, context);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* filter out already occluded and invalid rays */
|
||||
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
const size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
vbool<K> terminated = !valid;
|
||||
const vfloat<K> inf = vfloat<K>(pos_inf);
|
||||
|
||||
/* determine switch threshold based on flags */
|
||||
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
|
||||
|
||||
/* allocate stack and push root node */
|
||||
vfloat<K> stack_near[stackSizeChunk];
|
||||
NodeRef stack_node[stackSizeChunk];
|
||||
stack_node[0] = BVH::invalidNode;
|
||||
stack_near[0] = inf;
|
||||
stack_node[1] = bvh->root;
|
||||
stack_near[1] = tray.tnear;
|
||||
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
|
||||
NodeRef* __restrict__ sptr_node = stack_node + 2;
|
||||
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
assert(sptr_node > stack_node);
|
||||
sptr_node--;
|
||||
sptr_near--;
|
||||
NodeRef cur = *sptr_node;
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *sptr_near;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active)))
|
||||
continue;
|
||||
|
||||
/* switch to single ray traversal */
|
||||
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (single)
|
||||
#endif
|
||||
{
|
||||
size_t bits = movemask(active);
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (unlikely(popcnt(bits) <= switchThreshold))
|
||||
#endif
|
||||
{
|
||||
for (; bits!=0; ) {
|
||||
const size_t i = bscf(bits);
|
||||
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
|
||||
set(terminated, i);
|
||||
}
|
||||
if (all(terminated)) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
const vbool<K> valid_node = tray.tfar > curDist;
|
||||
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const BaseNode* __restrict__ const node = nodeRef.baseNode();
|
||||
|
||||
/* set cur to invalid */
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
for (unsigned i = 0; i < N; i++)
|
||||
{
|
||||
const NodeRef child = node->children[i];
|
||||
if (unlikely(child == BVH::emptyNode)) break;
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = valid_node;
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
assert(sptr_node < stackEnd);
|
||||
assert(child != BVH::emptyNode);
|
||||
const vfloat<K> childDist = select(lhit, lnearP, inf);
|
||||
|
||||
/* push 'cur' node onto stack and continue with hit child */
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
*sptr_node = cur; sptr_node++;
|
||||
*sptr_near = curDist; sptr_near++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
}
|
||||
if (unlikely(cur == BVH::emptyNode))
|
||||
goto pop;
|
||||
|
||||
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
|
||||
if (single)
|
||||
{
|
||||
// seems to be the best place for testing utilization
|
||||
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
|
||||
{
|
||||
*sptr_node++ = cur;
|
||||
*sptr_near++ = curDist;
|
||||
goto pop;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return if stack is empty */
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
if (all(terminated)) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*sptr_node = lazy_node; sptr_node++;
|
||||
*sptr_near = neg_inf; sptr_near++;
|
||||
}
|
||||
}
|
||||
|
||||
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>& __restrict__ ray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid,ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
vbool<K> terminated = !valid;
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
|
||||
|
||||
Frustum<robust> frustum;
|
||||
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
|
||||
|
||||
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].mask = movemask(octant_valid);
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* cull node of active rays have already been terminated */
|
||||
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
|
||||
|
||||
if (unlikely(m_active == 0)) continue;
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
|
||||
|
||||
vfloat<N> fmin;
|
||||
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
|
||||
|
||||
if (unlikely(!m_frustum_node)) goto pop;
|
||||
cur = BVH::emptyNode;
|
||||
m_active = 0;
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
|
||||
#endif
|
||||
//size_t num_child_hits = 0;
|
||||
do {
|
||||
const size_t i = bscf(m_frustum_node);
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
const NodeRef child = node->child(i);
|
||||
assert(child != BVH::emptyNode);
|
||||
BVHN<N>::prefetch(child);
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
//num_child_hits++;
|
||||
stackPtr->ptr = cur;
|
||||
stackPtr->mask = m_active;
|
||||
stackPtr++;
|
||||
}
|
||||
cur = child;
|
||||
m_active = movemask(lhit);
|
||||
}
|
||||
} while(m_frustum_node);
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode)) goto pop;
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::invalidNode);
|
||||
assert(cur != BVH::emptyNode);
|
||||
#if defined(__AVX__)
|
||||
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
|
||||
#endif
|
||||
if (unlikely(!m_active)) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
octant_valid &= !terminated;
|
||||
if (unlikely(none(octant_valid))) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->mask = movemask(octant_valid);
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
} while(valid_bits);
|
||||
|
||||
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
|
||||
}
|
||||
}
|
||||
}
|
||||
58
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.h
vendored
Normal file
58
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.h
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../common/ray.h"
|
||||
#include "../common/stack_item.h"
|
||||
#include "node_intersector_frustum.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int K, bool robust>
|
||||
struct TravRayK;
|
||||
|
||||
/*! BVH hybrid packet intersector. Switches between packet and single ray traversal (optional). */
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true>
|
||||
class BVHNIntersectorKHybrid
|
||||
{
|
||||
/* shortcuts for frequently used types */
|
||||
typedef typename PrimitiveIntersectorK::Precalculations Precalculations;
|
||||
typedef typename PrimitiveIntersectorK::Primitive Primitive;
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::BaseNode BaseNode;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
|
||||
static const size_t stackSizeChunk = 1+(N-1)*BVH::maxDepth;
|
||||
|
||||
static const size_t switchThresholdIncoherent = \
|
||||
(K==4) ? 3 :
|
||||
(K==8) ? ((N==4) ? 5 : 7) :
|
||||
(K==16) ? 14 : // 14 seems to work best for KNL due to better ordered chunk traversal
|
||||
0;
|
||||
|
||||
private:
|
||||
static void intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
|
||||
RayHitK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
|
||||
static bool occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
|
||||
RayK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
|
||||
|
||||
public:
|
||||
static void intersect(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
|
||||
static void occluded (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
|
||||
|
||||
static void intersectCoherent(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
|
||||
static void occludedCoherent (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
|
||||
|
||||
};
|
||||
|
||||
/*! BVH packet intersector. */
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK>
|
||||
class BVHNIntersectorKChunk : public BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, false> {};
|
||||
}
|
||||
}
|
||||
62
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp
vendored
Normal file
62
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_hybrid.cpp"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// BVH4Intersector4 Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoellerNoFilter, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoellerNoFilter,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridPluecker,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersectorK<4> >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersectorK<4> >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersectorK<4> >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersectorK<4> >));
|
||||
|
||||
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
|
||||
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
|
||||
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4> >));
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4MB> >));
|
||||
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorK<4>> >));
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorKMB<4>> >));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorK<4>> >));
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorKMB<4>> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
|
||||
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersectorKPluecker <4 COMMA 4 COMMA true> >));
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersectorKPluecker <4 COMMA 4 COMMA true> >));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
229
thirdparty/embree/kernels/bvh/bvh_node_aabb.h
vendored
Normal file
229
thirdparty/embree/kernels/bvh/bvh_node_aabb.h
vendored
Normal file
@@ -0,0 +1,229 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVHN AABBNode */
|
||||
template<typename NodeRef, int N>
|
||||
struct AABBNode_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
|
||||
struct Create
|
||||
{
|
||||
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc, size_t numChildren = 0) const
|
||||
{
|
||||
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const BBox3fa& bounds) const {
|
||||
node.getAABBNode()->setRef(i,child);
|
||||
node.getAABBNode()->setBounds(i,bounds);
|
||||
}
|
||||
};
|
||||
|
||||
struct Create2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t), NodeRef::byteNodeAlignment); node->clear();
|
||||
for (size_t i=0; i<num; i++) node->setBounds(i,children[i].bounds());
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i] == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i] == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
AABBNode_t* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
|
||||
return ref;
|
||||
}
|
||||
};
|
||||
|
||||
struct Set3
|
||||
{
|
||||
Set3 (FastAllocator* allocator, PrimRef* prims)
|
||||
: allocator(allocator), prims(prims) {}
|
||||
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i] == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i] == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
AABBNode_t* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
|
||||
|
||||
if (unlikely(precord.alloc_barrier))
|
||||
{
|
||||
PrimRef* begin = &prims[precord.prims.begin()];
|
||||
PrimRef* end = &prims[precord.prims.end()]; // FIXME: extended end for spatial split builder!!!!!
|
||||
size_t bytes = (size_t)end - (size_t)begin;
|
||||
allocator->addBlock(begin,bytes);
|
||||
}
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
FastAllocator* const allocator;
|
||||
PrimRef* const prims;
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
lower_x = lower_y = lower_z = pos_inf;
|
||||
upper_x = upper_y = upper_z = neg_inf;
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const BBox3fa& bounds)
|
||||
{
|
||||
assert(i < N);
|
||||
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
|
||||
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, const NodeRef& ref, const BBox3fa& bounds) {
|
||||
setBounds(i,bounds);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Returns bounds of node. */
|
||||
__forceinline BBox3fa bounds() const {
|
||||
const Vec3fa lower(reduce_min(lower_x),reduce_min(lower_y),reduce_min(lower_z));
|
||||
const Vec3fa upper(reduce_max(upper_x),reduce_max(upper_y),reduce_max(upper_z));
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! Returns bounds of specified child. */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
const Vec3fa lower(lower_x[i],lower_y[i],lower_z[i]);
|
||||
const Vec3fa upper(upper_x[i],upper_y[i],upper_z[i]);
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! Returns extent of bounds of specified child. */
|
||||
__forceinline Vec3fa extend(size_t i) const {
|
||||
return bounds(i).size();
|
||||
}
|
||||
|
||||
/*! Returns bounds of all children (implemented later as specializations) */
|
||||
__forceinline void bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const;
|
||||
|
||||
/*! swap two children of the node */
|
||||
__forceinline void swap(size_t i, size_t j)
|
||||
{
|
||||
assert(i<N && j<N);
|
||||
std::swap(children[i],children[j]);
|
||||
std::swap(lower_x[i],lower_x[j]);
|
||||
std::swap(lower_y[i],lower_y[j]);
|
||||
std::swap(lower_z[i],lower_z[j]);
|
||||
std::swap(upper_x[i],upper_x[j]);
|
||||
std::swap(upper_y[i],upper_y[j]);
|
||||
std::swap(upper_z[i],upper_z[j]);
|
||||
}
|
||||
|
||||
/*! swap the children of two nodes */
|
||||
__forceinline static void swap(AABBNode_t* a, size_t i, AABBNode_t* b, size_t j)
|
||||
{
|
||||
assert(i<N && j<N);
|
||||
std::swap(a->children[i],b->children[j]);
|
||||
std::swap(a->lower_x[i],b->lower_x[j]);
|
||||
std::swap(a->lower_y[i],b->lower_y[j]);
|
||||
std::swap(a->lower_z[i],b->lower_z[j]);
|
||||
std::swap(a->upper_x[i],b->upper_x[j]);
|
||||
std::swap(a->upper_y[i],b->upper_y[j]);
|
||||
std::swap(a->upper_z[i],b->upper_z[j]);
|
||||
}
|
||||
|
||||
/*! compacts a node (moves empty children to the end) */
|
||||
__forceinline static void compact(AABBNode_t* a)
|
||||
{
|
||||
/* find right most filled node */
|
||||
ssize_t j=N;
|
||||
for (j=j-1; j>=0; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
|
||||
/* replace empty nodes with filled nodes */
|
||||
for (ssize_t i=0; i<j; i++) {
|
||||
if (a->child(i) == NodeRef::emptyNode) {
|
||||
a->swap(i,j);
|
||||
for (j=j-1; j>i; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! output operator */
|
||||
friend embree_ostream operator<<(embree_ostream o, const AABBNode_t& n)
|
||||
{
|
||||
o << "AABBNode { " << embree_endl;
|
||||
o << " lower_x " << n.lower_x << embree_endl;
|
||||
o << " upper_x " << n.upper_x << embree_endl;
|
||||
o << " lower_y " << n.lower_y << embree_endl;
|
||||
o << " upper_y " << n.upper_y << embree_endl;
|
||||
o << " lower_z " << n.lower_z << embree_endl;
|
||||
o << " upper_z " << n.upper_z << embree_endl;
|
||||
o << " children = ";
|
||||
for (size_t i=0; i<N; i++) o << n.children[i] << " ";
|
||||
o << embree_endl;
|
||||
o << "}" << embree_endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
|
||||
};
|
||||
|
||||
template<>
|
||||
__forceinline void AABBNode_t<NodeRefPtr<4>,4>::bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const {
|
||||
transpose(lower_x,lower_y,lower_z,vfloat4(zero),bounds0.lower,bounds1.lower,bounds2.lower,bounds3.lower);
|
||||
transpose(upper_x,upper_y,upper_z,vfloat4(zero),bounds0.upper,bounds1.upper,bounds2.upper,bounds3.upper);
|
||||
}
|
||||
}
|
||||
255
thirdparty/embree/kernels/bvh/bvh_node_aabb_mb.h
vendored
Normal file
255
thirdparty/embree/kernels/bvh/bvh_node_aabb_mb.h
vendored
Normal file
@@ -0,0 +1,255 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Motion Blur AABBNode */
|
||||
template<typename NodeRef, int N>
|
||||
struct AABBNodeMB_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
typedef BVHNodeRecord<NodeRef> NodeRecord;
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
struct Create
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
AABBNodeMB_t* node = (AABBNodeMB_t*) alloc.malloc0(sizeof(AABBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i].ref == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
AABBNodeMB_t* node = ref.getAABBNodeMB();
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
node->setRef(i,children[i].ref);
|
||||
node->setBounds(i,children[i].lbounds);
|
||||
bounds.extend(children[i].lbounds);
|
||||
}
|
||||
return NodeRecordMB(ref,bounds);
|
||||
}
|
||||
};
|
||||
|
||||
struct SetTimeRange
|
||||
{
|
||||
__forceinline SetTimeRange(BBox1f tbounds) : tbounds(tbounds) {}
|
||||
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
|
||||
{
|
||||
AABBNodeMB_t* node = ref.getAABBNodeMB();
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
node->setRef(i, children[i].ref);
|
||||
node->setBounds(i, children[i].lbounds, tbounds);
|
||||
bounds.extend(children[i].lbounds);
|
||||
}
|
||||
return NodeRecordMB(ref,bounds);
|
||||
}
|
||||
|
||||
BBox1f tbounds;
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
lower_x = lower_y = lower_z = vfloat<N>(pos_inf);
|
||||
upper_x = upper_y = upper_z = vfloat<N>(neg_inf);
|
||||
lower_dx = lower_dy = lower_dz = vfloat<N>(0.0f);
|
||||
upper_dx = upper_dy = upper_dz = vfloat<N>(0.0f);
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets ID of child. */
|
||||
__forceinline void setRef(size_t i, NodeRef ref) {
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const BBox3fa& bounds0_i, const BBox3fa& bounds1_i)
|
||||
{
|
||||
/*! for empty bounds we have to avoid inf-inf=nan */
|
||||
BBox3fa bounds0(min(bounds0_i.lower,Vec3fa(+FLT_MAX)),max(bounds0_i.upper,Vec3fa(-FLT_MAX)));
|
||||
BBox3fa bounds1(min(bounds1_i.lower,Vec3fa(+FLT_MAX)),max(bounds1_i.upper,Vec3fa(-FLT_MAX)));
|
||||
bounds0 = bounds0.enlarge_by(4.0f*float(ulp));
|
||||
bounds1 = bounds1.enlarge_by(4.0f*float(ulp));
|
||||
Vec3fa dlower = bounds1.lower-bounds0.lower;
|
||||
Vec3fa dupper = bounds1.upper-bounds0.upper;
|
||||
|
||||
lower_x[i] = bounds0.lower.x; lower_y[i] = bounds0.lower.y; lower_z[i] = bounds0.lower.z;
|
||||
upper_x[i] = bounds0.upper.x; upper_y[i] = bounds0.upper.y; upper_z[i] = bounds0.upper.z;
|
||||
|
||||
lower_dx[i] = dlower.x; lower_dy[i] = dlower.y; lower_dz[i] = dlower.z;
|
||||
upper_dx[i] = dupper.x; upper_dy[i] = dupper.y; upper_dz[i] = dupper.z;
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const LBBox3fa& bounds) {
|
||||
setBounds(i, bounds.bounds0, bounds.bounds1);
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) {
|
||||
setBounds(i, bounds.global(tbounds));
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, NodeRef ref, const BBox3fa& bounds) {
|
||||
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
|
||||
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, const NodeRecordMB4D& child)
|
||||
{
|
||||
setRef(i, child.ref);
|
||||
setBounds(i, child.lbounds, child.dt);
|
||||
}
|
||||
|
||||
/*! Return bounding box for time 0 */
|
||||
__forceinline BBox3fa bounds0(size_t i) const {
|
||||
return BBox3fa(Vec3fa(lower_x[i],lower_y[i],lower_z[i]),
|
||||
Vec3fa(upper_x[i],upper_y[i],upper_z[i]));
|
||||
}
|
||||
|
||||
/*! Return bounding box for time 1 */
|
||||
__forceinline BBox3fa bounds1(size_t i) const {
|
||||
return BBox3fa(Vec3fa(lower_x[i]+lower_dx[i],lower_y[i]+lower_dy[i],lower_z[i]+lower_dz[i]),
|
||||
Vec3fa(upper_x[i]+upper_dx[i],upper_y[i]+upper_dy[i],upper_z[i]+upper_dz[i]));
|
||||
}
|
||||
|
||||
/*! Returns bounds of node. */
|
||||
__forceinline BBox3fa bounds() const {
|
||||
return BBox3fa(Vec3fa(reduce_min(min(lower_x,lower_x+lower_dx)),
|
||||
reduce_min(min(lower_y,lower_y+lower_dy)),
|
||||
reduce_min(min(lower_z,lower_z+lower_dz))),
|
||||
Vec3fa(reduce_max(max(upper_x,upper_x+upper_dx)),
|
||||
reduce_max(max(upper_y,upper_y+upper_dy)),
|
||||
reduce_max(max(upper_z,upper_z+upper_dz))));
|
||||
}
|
||||
|
||||
/*! Return bounding box of child i */
|
||||
__forceinline BBox3fa bounds(size_t i) const {
|
||||
return merge(bounds0(i),bounds1(i));
|
||||
}
|
||||
|
||||
/*! Return linear bounding box of child i */
|
||||
__forceinline LBBox3fa lbounds(size_t i) const {
|
||||
return LBBox3fa(bounds0(i),bounds1(i));
|
||||
}
|
||||
|
||||
/*! Return bounding box of child i at specified time */
|
||||
__forceinline BBox3fa bounds(size_t i, float time) const {
|
||||
return lerp(bounds0(i),bounds1(i),time);
|
||||
}
|
||||
|
||||
/*! Returns the expected surface area when randomly sampling the time. */
|
||||
__forceinline float expectedHalfArea(size_t i) const {
|
||||
return lbounds(i).expectedHalfArea();
|
||||
}
|
||||
|
||||
/*! Returns the expected surface area when randomly sampling the time. */
|
||||
__forceinline float expectedHalfArea(size_t i, const BBox1f& t0t1) const {
|
||||
return lbounds(i).expectedHalfArea(t0t1);
|
||||
}
|
||||
|
||||
/*! swap two children of the node */
|
||||
__forceinline void swap(size_t i, size_t j)
|
||||
{
|
||||
assert(i<N && j<N);
|
||||
std::swap(children[i],children[j]);
|
||||
|
||||
std::swap(lower_x[i],lower_x[j]);
|
||||
std::swap(upper_x[i],upper_x[j]);
|
||||
std::swap(lower_y[i],lower_y[j]);
|
||||
std::swap(upper_y[i],upper_y[j]);
|
||||
std::swap(lower_z[i],lower_z[j]);
|
||||
std::swap(upper_z[i],upper_z[j]);
|
||||
|
||||
std::swap(lower_dx[i],lower_dx[j]);
|
||||
std::swap(upper_dx[i],upper_dx[j]);
|
||||
std::swap(lower_dy[i],lower_dy[j]);
|
||||
std::swap(upper_dy[i],upper_dy[j]);
|
||||
std::swap(lower_dz[i],lower_dz[j]);
|
||||
std::swap(upper_dz[i],upper_dz[j]);
|
||||
}
|
||||
|
||||
/*! compacts a node (moves empty children to the end) */
|
||||
__forceinline static void compact(AABBNodeMB_t* a)
|
||||
{
|
||||
/* find right most filled node */
|
||||
ssize_t j=N;
|
||||
for (j=j-1; j>=0; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
|
||||
/* replace empty nodes with filled nodes */
|
||||
for (ssize_t i=0; i<j; i++) {
|
||||
if (a->child(i) == NodeRef::emptyNode) {
|
||||
a->swap(i,j);
|
||||
for (j=j-1; j>i; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! stream output operator */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB_t& n)
|
||||
{
|
||||
cout << "AABBNodeMB {" << embree_endl;
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const BBox3fa b0 = n.bounds0(i);
|
||||
const BBox3fa b1 = n.bounds1(i);
|
||||
cout << " child" << i << " { " << embree_endl;
|
||||
cout << " bounds0 = " << b0 << ", " << embree_endl;
|
||||
cout << " bounds1 = " << b1 << ", " << embree_endl;
|
||||
cout << " }";
|
||||
}
|
||||
cout << "}";
|
||||
return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
|
||||
|
||||
vfloat<N> lower_dx; //!< X dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_dx; //!< X dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_dy; //!< Y dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_dy; //!< Y dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_dz; //!< Z dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_dz; //!< Z dimension of upper bounds of all N children.
|
||||
};
|
||||
}
|
||||
115
thirdparty/embree/kernels/bvh/bvh_node_aabb_mb4d.h
vendored
Normal file
115
thirdparty/embree/kernels/bvh/bvh_node_aabb_mb4d.h
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_aabb_mb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Aligned 4D Motion Blur Node */
|
||||
template<typename NodeRef, int N>
|
||||
struct AABBNodeMB4D_t : public AABBNodeMB_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
using AABBNodeMB_t<NodeRef,N>::set;
|
||||
|
||||
typedef BVHNodeRecord<NodeRef> NodeRecord;
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
struct Create
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord*, const size_t, const FastAllocator::CachedAllocator& alloc, bool hasTimeSplits = true) const
|
||||
{
|
||||
if (hasTimeSplits)
|
||||
{
|
||||
AABBNodeMB4D_t* node = (AABBNodeMB4D_t*) alloc.malloc0(sizeof(AABBNodeMB4D_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
else
|
||||
{
|
||||
AABBNodeMB_t<NodeRef,N>* node = (AABBNodeMB_t<NodeRef,N>*) alloc.malloc0(sizeof(AABBNodeMB_t<NodeRef,N>),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline void operator() (const BuildRecord&, const BuildRecord*, NodeRef ref, NodeRecordMB4D* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i].ref == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
if (likely(ref.isAABBNodeMB())) {
|
||||
for (size_t i=0; i<num; i++)
|
||||
ref.getAABBNodeMB()->set(i, children[i]);
|
||||
} else {
|
||||
for (size_t i=0; i<num; i++)
|
||||
ref.getAABBNodeMB4D()->set(i, children[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
lower_t = vfloat<N>(pos_inf);
|
||||
upper_t = vfloat<N>(neg_inf);
|
||||
AABBNodeMB_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds)
|
||||
{
|
||||
AABBNodeMB_t<NodeRef,N>::setBounds(i, bounds.global(tbounds));
|
||||
lower_t[i] = tbounds.lower;
|
||||
upper_t[i] = tbounds.upper == 1.0f ? 1.0f+float(ulp) : tbounds.upper;
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, const NodeRecordMB4D& child) {
|
||||
AABBNodeMB_t<NodeRef,N>::setRef(i,child.ref);
|
||||
setBounds(i, child.lbounds, child.dt);
|
||||
}
|
||||
|
||||
/*! Returns the expected surface area when randomly sampling the time. */
|
||||
__forceinline float expectedHalfArea(size_t i) const {
|
||||
return AABBNodeMB_t<NodeRef,N>::lbounds(i).expectedHalfArea(timeRange(i));
|
||||
}
|
||||
|
||||
/*! returns time range for specified child */
|
||||
__forceinline BBox1f timeRange(size_t i) const {
|
||||
return BBox1f(lower_t[i],upper_t[i]);
|
||||
}
|
||||
|
||||
/*! stream output operator */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB4D_t& n)
|
||||
{
|
||||
cout << "AABBNodeMB4D {" << embree_endl;
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const BBox3fa b0 = n.bounds0(i);
|
||||
const BBox3fa b1 = n.bounds1(i);
|
||||
cout << " child" << i << " { " << embree_endl;
|
||||
cout << " bounds0 = " << lerp(b0,b1,n.lower_t[i]) << ", " << embree_endl;
|
||||
cout << " bounds1 = " << lerp(b0,b1,n.upper_t[i]) << ", " << embree_endl;
|
||||
cout << " time_bounds = " << n.lower_t[i] << ", " << n.upper_t[i] << embree_endl;
|
||||
cout << " }";
|
||||
}
|
||||
cout << "}";
|
||||
return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat<N> lower_t; //!< time dimension of lower bounds of all N children
|
||||
vfloat<N> upper_t; //!< time dimension of upper bounds of all N children
|
||||
};
|
||||
}
|
||||
43
thirdparty/embree/kernels/bvh/bvh_node_base.h
vendored
Normal file
43
thirdparty/embree/kernels/bvh/bvh_node_base.h
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_ref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
||||
/*! BVHN Base Node */
|
||||
template<typename NodeRef, int N>
|
||||
struct BaseNode_t
|
||||
{
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear()
|
||||
{
|
||||
for (size_t i=0; i<N; i++)
|
||||
children[i] = NodeRef::emptyNode;
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! verifies the node */
|
||||
__forceinline bool verify() const
|
||||
{
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (child(i) == NodeRef::emptyNode) {
|
||||
for (; i<N; i++) {
|
||||
if (child(i) != NodeRef::emptyNode)
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
NodeRef children[N]; //!< Pointer to the N children (can be a node or leaf)
|
||||
};
|
||||
}
|
||||
98
thirdparty/embree/kernels/bvh/bvh_node_obb.h
vendored
Normal file
98
thirdparty/embree/kernels/bvh/bvh_node_obb.h
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Node with unaligned bounds */
|
||||
template<typename NodeRef, int N>
|
||||
struct OBBNode_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
|
||||
struct Create
|
||||
{
|
||||
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
OBBNode_t* node = (OBBNode_t*) alloc.malloc0(sizeof(OBBNode_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const OBBox3fa& bounds) const {
|
||||
node.ungetAABBNode()->setRef(i,child);
|
||||
node.ungetAABBNode()->setBounds(i,bounds);
|
||||
}
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear()
|
||||
{
|
||||
naabb.l.vx = Vec3fa(nan);
|
||||
naabb.l.vy = Vec3fa(nan);
|
||||
naabb.l.vz = Vec3fa(nan);
|
||||
naabb.p = Vec3fa(nan);
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets bounding box. */
|
||||
__forceinline void setBounds(size_t i, const OBBox3fa& b)
|
||||
{
|
||||
assert(i < N);
|
||||
|
||||
AffineSpace3fa space = b.space;
|
||||
space.p -= b.bounds.lower;
|
||||
space = AffineSpace3fa::scale(1.0f/max(Vec3fa(1E-19f),b.bounds.upper-b.bounds.lower))*space;
|
||||
|
||||
naabb.l.vx.x[i] = space.l.vx.x;
|
||||
naabb.l.vx.y[i] = space.l.vx.y;
|
||||
naabb.l.vx.z[i] = space.l.vx.z;
|
||||
|
||||
naabb.l.vy.x[i] = space.l.vy.x;
|
||||
naabb.l.vy.y[i] = space.l.vy.y;
|
||||
naabb.l.vy.z[i] = space.l.vy.z;
|
||||
|
||||
naabb.l.vz.x[i] = space.l.vz.x;
|
||||
naabb.l.vz.y[i] = space.l.vz.y;
|
||||
naabb.l.vz.z[i] = space.l.vz.z;
|
||||
|
||||
naabb.p.x[i] = space.p.x;
|
||||
naabb.p.y[i] = space.p.y;
|
||||
naabb.p.z[i] = space.p.z;
|
||||
}
|
||||
|
||||
/*! Sets ID of child. */
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Returns the extent of the bounds of the ith child */
|
||||
__forceinline Vec3fa extent(size_t i) const {
|
||||
assert(i<N);
|
||||
const Vec3fa vx(naabb.l.vx.x[i],naabb.l.vx.y[i],naabb.l.vx.z[i]);
|
||||
const Vec3fa vy(naabb.l.vy.x[i],naabb.l.vy.y[i],naabb.l.vy.z[i]);
|
||||
const Vec3fa vz(naabb.l.vz.x[i],naabb.l.vz.y[i],naabb.l.vz.z[i]);
|
||||
return rsqrt(vx*vx + vy*vy + vz*vz);
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! output operator */
|
||||
friend embree_ostream operator<<(embree_ostream o, const OBBNode_t& n)
|
||||
{
|
||||
o << "UnAABBNode { " << n.naabb << " } " << embree_endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
public:
|
||||
AffineSpace3vf<N> naabb; //!< non-axis aligned bounding boxes (bounds are [0,1] in specified space)
|
||||
};
|
||||
}
|
||||
90
thirdparty/embree/kernels/bvh/bvh_node_obb_mb.h
vendored
Normal file
90
thirdparty/embree/kernels/bvh/bvh_node_obb_mb.h
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename NodeRef, int N>
|
||||
struct OBBNodeMB_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
|
||||
struct Create
|
||||
{
|
||||
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
OBBNodeMB_t* node = (OBBNodeMB_t*) alloc.malloc0(sizeof(OBBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const LinearSpace3fa& space, const LBBox3fa& lbounds, const BBox1f dt) const {
|
||||
node.ungetAABBNodeMB()->setRef(i,child);
|
||||
node.ungetAABBNodeMB()->setBounds(i,space,lbounds.global(dt));
|
||||
}
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear()
|
||||
{
|
||||
space0 = one;
|
||||
//b0.lower = b0.upper = Vec3fa(nan);
|
||||
b1.lower = b1.upper = Vec3fa(nan);
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets space and bounding boxes. */
|
||||
__forceinline void setBounds(size_t i, const AffineSpace3fa& space, const LBBox3fa& lbounds) {
|
||||
setBounds(i,space,lbounds.bounds0,lbounds.bounds1);
|
||||
}
|
||||
|
||||
/*! Sets space and bounding boxes. */
|
||||
__forceinline void setBounds(size_t i, const AffineSpace3fa& s0, const BBox3fa& a, const BBox3fa& c)
|
||||
{
|
||||
assert(i < N);
|
||||
|
||||
AffineSpace3fa space = s0;
|
||||
space.p -= a.lower;
|
||||
Vec3fa scale = 1.0f/max(Vec3fa(1E-19f),a.upper-a.lower);
|
||||
space = AffineSpace3fa::scale(scale)*space;
|
||||
BBox3fa a1((a.lower-a.lower)*scale,(a.upper-a.lower)*scale);
|
||||
BBox3fa c1((c.lower-a.lower)*scale,(c.upper-a.lower)*scale);
|
||||
|
||||
space0.l.vx.x[i] = space.l.vx.x; space0.l.vx.y[i] = space.l.vx.y; space0.l.vx.z[i] = space.l.vx.z;
|
||||
space0.l.vy.x[i] = space.l.vy.x; space0.l.vy.y[i] = space.l.vy.y; space0.l.vy.z[i] = space.l.vy.z;
|
||||
space0.l.vz.x[i] = space.l.vz.x; space0.l.vz.y[i] = space.l.vz.y; space0.l.vz.z[i] = space.l.vz.z;
|
||||
space0.p .x[i] = space.p .x; space0.p .y[i] = space.p .y; space0.p .z[i] = space.p .z;
|
||||
|
||||
/*b0.lower.x[i] = a1.lower.x; b0.lower.y[i] = a1.lower.y; b0.lower.z[i] = a1.lower.z;
|
||||
b0.upper.x[i] = a1.upper.x; b0.upper.y[i] = a1.upper.y; b0.upper.z[i] = a1.upper.z;*/
|
||||
|
||||
b1.lower.x[i] = c1.lower.x; b1.lower.y[i] = c1.lower.y; b1.lower.z[i] = c1.lower.z;
|
||||
b1.upper.x[i] = c1.upper.x; b1.upper.y[i] = c1.upper.y; b1.upper.z[i] = c1.upper.z;
|
||||
}
|
||||
|
||||
/*! Sets ID of child. */
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Returns the extent of the bounds of the ith child */
|
||||
__forceinline Vec3fa extent0(size_t i) const {
|
||||
assert(i < N);
|
||||
const Vec3fa vx(space0.l.vx.x[i],space0.l.vx.y[i],space0.l.vx.z[i]);
|
||||
const Vec3fa vy(space0.l.vy.x[i],space0.l.vy.y[i],space0.l.vy.z[i]);
|
||||
const Vec3fa vz(space0.l.vz.x[i],space0.l.vz.y[i],space0.l.vz.z[i]);
|
||||
return rsqrt(vx*vx + vy*vy + vz*vz);
|
||||
}
|
||||
|
||||
public:
|
||||
AffineSpace3vf<N> space0;
|
||||
//BBox3vf<N> b0; // these are the unit bounds
|
||||
BBox3vf<N> b1;
|
||||
};
|
||||
}
|
||||
273
thirdparty/embree/kernels/bvh/bvh_node_qaabb.h
vendored
Normal file
273
thirdparty/embree/kernels/bvh/bvh_node_qaabb.h
vendored
Normal file
@@ -0,0 +1,273 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVHN Quantized Node */
|
||||
template<int N>
|
||||
struct __aligned(8) QuantizedBaseNode_t
|
||||
{
|
||||
typedef unsigned char T;
|
||||
static const T MIN_QUAN = 0;
|
||||
static const T MAX_QUAN = 255;
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN;
|
||||
for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN;
|
||||
}
|
||||
|
||||
/*! Returns bounds of specified child. */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x),
|
||||
madd(scale.y,(float)lower_y[i],start.y),
|
||||
madd(scale.z,(float)lower_z[i],start.z));
|
||||
const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x),
|
||||
madd(scale.y,(float)upper_y[i],start.y),
|
||||
madd(scale.z,(float)upper_z[i],start.z));
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! Returns extent of bounds of specified child. */
|
||||
__forceinline Vec3fa extent(size_t i) const {
|
||||
return bounds(i).size();
|
||||
}
|
||||
|
||||
static __forceinline void init_dim(const vfloat<N> &lower,
|
||||
const vfloat<N> &upper,
|
||||
T lower_quant[N],
|
||||
T upper_quant[N],
|
||||
float &start,
|
||||
float &scale)
|
||||
{
|
||||
/* quantize bounds */
|
||||
const vbool<N> m_valid = lower != vfloat<N>(pos_inf);
|
||||
const float minF = reduce_min(lower);
|
||||
const float maxF = reduce_max(upper);
|
||||
float diff = (1.0f+2.0f*float(ulp))*(maxF - minF);
|
||||
float decode_scale = diff / float(MAX_QUAN);
|
||||
if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero
|
||||
assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF);
|
||||
const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f;
|
||||
vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN);
|
||||
vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN);
|
||||
|
||||
/* lower/upper correction */
|
||||
vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower;
|
||||
vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper;
|
||||
ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN);
|
||||
iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN);
|
||||
|
||||
/* disable invalid lanes */
|
||||
ilower = select(m_valid,ilower,MAX_QUAN);
|
||||
iupper = select(m_valid,iupper,MIN_QUAN);
|
||||
|
||||
/* store as uchar to memory */
|
||||
vint<N>::store(lower_quant,ilower);
|
||||
vint<N>::store(upper_quant,iupper);
|
||||
start = minF;
|
||||
scale = decode_scale;
|
||||
|
||||
#if defined(DEBUG)
|
||||
vfloat<N> extract_lower( vint<N>::loadu(lower_quant) );
|
||||
vfloat<N> extract_upper( vint<N>::loadu(upper_quant) );
|
||||
vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF);
|
||||
vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF);
|
||||
assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid));
|
||||
assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node)
|
||||
{
|
||||
init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x);
|
||||
init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y);
|
||||
init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z);
|
||||
}
|
||||
|
||||
__forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); }
|
||||
|
||||
#if defined(__AVX512F__) // KNL
|
||||
__forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); }
|
||||
#endif
|
||||
__forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); }
|
||||
|
||||
template <int M>
|
||||
__forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); }
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
__forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); }
|
||||
__forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); }
|
||||
__forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); }
|
||||
#endif
|
||||
|
||||
union {
|
||||
struct {
|
||||
T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children
|
||||
T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children
|
||||
T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children
|
||||
T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children
|
||||
T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children
|
||||
T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children
|
||||
};
|
||||
T all_planes[6*N];
|
||||
};
|
||||
|
||||
Vec3f start;
|
||||
Vec3f scale;
|
||||
|
||||
friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n)
|
||||
{
|
||||
o << "QuantizedBaseNode { " << embree_endl;
|
||||
o << " start " << n.start << embree_endl;
|
||||
o << " scale " << n.scale << embree_endl;
|
||||
o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl;
|
||||
o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl;
|
||||
o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl;
|
||||
o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl;
|
||||
o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl;
|
||||
o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl;
|
||||
o << "}" << embree_endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename NodeRef, int N>
|
||||
struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
using QuantizedBaseNode_t<N>::lower_x;
|
||||
using QuantizedBaseNode_t<N>::upper_x;
|
||||
using QuantizedBaseNode_t<N>::lower_y;
|
||||
using QuantizedBaseNode_t<N>::upper_y;
|
||||
using QuantizedBaseNode_t<N>::lower_z;
|
||||
using QuantizedBaseNode_t<N>::upper_z;
|
||||
using QuantizedBaseNode_t<N>::start;
|
||||
using QuantizedBaseNode_t<N>::scale;
|
||||
using QuantizedBaseNode_t<N>::init_dim;
|
||||
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
struct Create2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
__aligned(64) AABBNode_t<NodeRef,N> node;
|
||||
node.clear();
|
||||
for (size_t i=0; i<n; i++) {
|
||||
node.setBounds(i,children[i].bounds());
|
||||
}
|
||||
QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment);
|
||||
qnode->init(node);
|
||||
|
||||
return (size_t)qnode | NodeRef::tyQuantizedNode;
|
||||
}
|
||||
};
|
||||
|
||||
struct Set2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i] == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i] == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
QuantizedNode_t* node = ref.quantizedNode();
|
||||
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
|
||||
return ref;
|
||||
}
|
||||
};
|
||||
|
||||
__forceinline void init(AABBNode_t<NodeRef,N>& node)
|
||||
{
|
||||
for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode;
|
||||
init_dim(node);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/*! BVHN Quantized Node */
|
||||
template<int N>
|
||||
struct __aligned(8) QuantizedBaseNodeMB_t
|
||||
{
|
||||
QuantizedBaseNode_t<N> node0;
|
||||
QuantizedBaseNode_t<N> node1;
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
node0.clear();
|
||||
node1.clear();
|
||||
}
|
||||
|
||||
/*! Returns bounds of specified child. */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
BBox3fa bounds0 = node0.bounds(i);
|
||||
BBox3fa bounds1 = node1.bounds(i);
|
||||
bounds0.extend(bounds1);
|
||||
return bounds0;
|
||||
}
|
||||
|
||||
/*! Returns extent of bounds of specified child. */
|
||||
__forceinline Vec3fa extent(size_t i) const {
|
||||
return bounds(i).size();
|
||||
}
|
||||
|
||||
__forceinline vbool<N> validMask() const { return node0.validMask(); }
|
||||
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); }
|
||||
|
||||
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); }
|
||||
|
||||
};
|
||||
}
|
||||
242
thirdparty/embree/kernels/bvh/bvh_node_ref.h
vendored
Normal file
242
thirdparty/embree/kernels/bvh/bvh_node_ref.h
vendored
Normal file
@@ -0,0 +1,242 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "../common/alloc.h"
|
||||
#include "../common/accel.h"
|
||||
#include "../common/device.h"
|
||||
#include "../common/scene.h"
|
||||
#include "../geometry/primitive.h"
|
||||
#include "../common/ray.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* BVH node reference with bounds */
|
||||
template<typename NodeRef>
|
||||
struct BVHNodeRecord
|
||||
{
|
||||
__forceinline BVHNodeRecord() {}
|
||||
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fa& bounds) : ref(ref), bounds((BBox3fx)bounds) {}
|
||||
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fx& bounds) : ref(ref), bounds(bounds) {}
|
||||
|
||||
NodeRef ref;
|
||||
BBox3fx bounds;
|
||||
};
|
||||
|
||||
template<typename NodeRef>
|
||||
struct BVHNodeRecordMB
|
||||
{
|
||||
__forceinline BVHNodeRecordMB() {}
|
||||
__forceinline BVHNodeRecordMB(NodeRef ref, const LBBox3fa& lbounds) : ref(ref), lbounds(lbounds) {}
|
||||
|
||||
NodeRef ref;
|
||||
LBBox3fa lbounds;
|
||||
};
|
||||
|
||||
template<typename NodeRef>
|
||||
struct BVHNodeRecordMB4D
|
||||
{
|
||||
__forceinline BVHNodeRecordMB4D() {}
|
||||
__forceinline BVHNodeRecordMB4D(NodeRef ref, const LBBox3fa& lbounds, const BBox1f& dt) : ref(ref), lbounds(lbounds), dt(dt) {}
|
||||
|
||||
NodeRef ref;
|
||||
LBBox3fa lbounds;
|
||||
BBox1f dt;
|
||||
};
|
||||
|
||||
template<typename NodeRef, int N> struct BaseNode_t;
|
||||
template<typename NodeRef, int N> struct AABBNode_t;
|
||||
template<typename NodeRef, int N> struct AABBNodeMB_t;
|
||||
template<typename NodeRef, int N> struct AABBNodeMB4D_t;
|
||||
template<typename NodeRef, int N> struct OBBNode_t;
|
||||
template<typename NodeRef, int N> struct OBBNodeMB_t;
|
||||
template<typename NodeRef, int N> struct QuantizedNode_t;
|
||||
template<typename NodeRef, int N> struct QuantizedNodeMB_t;
|
||||
|
||||
/*! Pointer that points to a node or a list of primitives */
|
||||
template<int N>
|
||||
struct NodeRefPtr
|
||||
{
|
||||
//template<int NN> friend class BVHN;
|
||||
|
||||
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
|
||||
static const size_t byteAlignment = 16;
|
||||
static const size_t byteNodeAlignment = 4*N;
|
||||
|
||||
/*! highest address bit is used as barrier for some algorithms */
|
||||
static const size_t barrier_mask = (1LL << (8*sizeof(size_t)-1));
|
||||
|
||||
/*! Masks the bits that store the number of items per leaf. */
|
||||
static const size_t align_mask = byteAlignment-1;
|
||||
static const size_t items_mask = byteAlignment-1;
|
||||
|
||||
/*! different supported node types */
|
||||
static const size_t tyAABBNode = 0;
|
||||
static const size_t tyAABBNodeMB = 1;
|
||||
static const size_t tyAABBNodeMB4D = 6;
|
||||
static const size_t tyOBBNode = 2;
|
||||
static const size_t tyOBBNodeMB = 3;
|
||||
static const size_t tyQuantizedNode = 5;
|
||||
static const size_t tyLeaf = 8;
|
||||
|
||||
/*! Empty node */
|
||||
static const size_t emptyNode = tyLeaf;
|
||||
|
||||
/*! Invalid node, used as marker in traversal */
|
||||
static const size_t invalidNode = (((size_t)-1) & (~items_mask)) | (tyLeaf+0);
|
||||
static const size_t popRay = (((size_t)-1) & (~items_mask)) | (tyLeaf+1);
|
||||
|
||||
/*! Maximum number of primitive blocks in a leaf. */
|
||||
static const size_t maxLeafBlocks = items_mask-tyLeaf;
|
||||
|
||||
/*! Default constructor */
|
||||
__forceinline NodeRefPtr () {}
|
||||
|
||||
/*! Construction from integer */
|
||||
__forceinline NodeRefPtr (size_t ptr) : ptr(ptr) {}
|
||||
|
||||
/*! Cast to size_t */
|
||||
__forceinline operator size_t() const { return ptr; }
|
||||
|
||||
/*! Sets the barrier bit. */
|
||||
__forceinline void setBarrier() {
|
||||
#if defined(__64BIT__)
|
||||
assert(!isBarrier());
|
||||
ptr |= barrier_mask;
|
||||
#else
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! Clears the barrier bit. */
|
||||
__forceinline void clearBarrier() {
|
||||
#if defined(__64BIT__)
|
||||
ptr &= ~barrier_mask;
|
||||
#else
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! Checks if this is an barrier. A barrier tells the top level tree rotations how deep to enter the tree. */
|
||||
__forceinline bool isBarrier() const { return (ptr & barrier_mask) != 0; }
|
||||
|
||||
/*! checks if this is a leaf */
|
||||
__forceinline size_t isLeaf() const { return ptr & tyLeaf; }
|
||||
|
||||
/*! returns node type */
|
||||
__forceinline int type() const { return ptr & (size_t)align_mask; }
|
||||
|
||||
/*! checks if this is a node */
|
||||
__forceinline int isAABBNode() const { return (ptr & (size_t)align_mask) == tyAABBNode; }
|
||||
|
||||
/*! checks if this is a motion blur node */
|
||||
__forceinline int isAABBNodeMB() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB; }
|
||||
|
||||
/*! checks if this is a 4D motion blur node */
|
||||
__forceinline int isAABBNodeMB4D() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB4D; }
|
||||
|
||||
/*! checks if this is a node with unaligned bounding boxes */
|
||||
__forceinline int isOBBNode() const { return (ptr & (size_t)align_mask) == tyOBBNode; }
|
||||
|
||||
/*! checks if this is a motion blur node with unaligned bounding boxes */
|
||||
__forceinline int isOBBNodeMB() const { return (ptr & (size_t)align_mask) == tyOBBNodeMB; }
|
||||
|
||||
/*! checks if this is a quantized node */
|
||||
__forceinline int isQuantizedNode() const { return (ptr & (size_t)align_mask) == tyQuantizedNode; }
|
||||
|
||||
/*! Encodes a node */
|
||||
static __forceinline NodeRefPtr encodeNode(AABBNode_t<NodeRefPtr,N>* node) {
|
||||
assert(!((size_t)node & align_mask));
|
||||
return NodeRefPtr((size_t) node);
|
||||
}
|
||||
|
||||
static __forceinline NodeRefPtr encodeNode(AABBNodeMB_t<NodeRefPtr,N>* node) {
|
||||
assert(!((size_t)node & align_mask));
|
||||
return NodeRefPtr((size_t) node | tyAABBNodeMB);
|
||||
}
|
||||
|
||||
static __forceinline NodeRefPtr encodeNode(AABBNodeMB4D_t<NodeRefPtr,N>* node) {
|
||||
assert(!((size_t)node & align_mask));
|
||||
return NodeRefPtr((size_t) node | tyAABBNodeMB4D);
|
||||
}
|
||||
|
||||
/*! Encodes an unaligned node */
|
||||
static __forceinline NodeRefPtr encodeNode(OBBNode_t<NodeRefPtr,N>* node) {
|
||||
return NodeRefPtr((size_t) node | tyOBBNode);
|
||||
}
|
||||
|
||||
/*! Encodes an unaligned motion blur node */
|
||||
static __forceinline NodeRefPtr encodeNode(OBBNodeMB_t<NodeRefPtr,N>* node) {
|
||||
return NodeRefPtr((size_t) node | tyOBBNodeMB);
|
||||
}
|
||||
|
||||
/*! Encodes a leaf */
|
||||
static __forceinline NodeRefPtr encodeLeaf(void* tri, size_t num) {
|
||||
assert(!((size_t)tri & align_mask));
|
||||
assert(num <= maxLeafBlocks);
|
||||
return NodeRefPtr((size_t)tri | (tyLeaf+min(num,(size_t)maxLeafBlocks)));
|
||||
}
|
||||
|
||||
/*! Encodes a leaf */
|
||||
static __forceinline NodeRefPtr encodeTypedLeaf(void* ptr, size_t ty) {
|
||||
assert(!((size_t)ptr & align_mask));
|
||||
return NodeRefPtr((size_t)ptr | (tyLeaf+ty));
|
||||
}
|
||||
|
||||
/*! returns base node pointer */
|
||||
__forceinline BaseNode_t<NodeRefPtr,N>* baseNode()
|
||||
{
|
||||
assert(!isLeaf());
|
||||
return (BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
|
||||
}
|
||||
__forceinline const BaseNode_t<NodeRefPtr,N>* baseNode() const
|
||||
{
|
||||
assert(!isLeaf());
|
||||
return (const BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
|
||||
}
|
||||
|
||||
/*! returns node pointer */
|
||||
__forceinline AABBNode_t<NodeRefPtr,N>* getAABBNode() { assert(isAABBNode()); return ( AABBNode_t<NodeRefPtr,N>*)ptr; }
|
||||
__forceinline const AABBNode_t<NodeRefPtr,N>* getAABBNode() const { assert(isAABBNode()); return (const AABBNode_t<NodeRefPtr,N>*)ptr; }
|
||||
|
||||
/*! returns motion blur node pointer */
|
||||
__forceinline AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() { assert(isAABBNodeMB() || isAABBNodeMB4D()); return ( AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() const { assert(isAABBNodeMB() || isAABBNodeMB4D()); return (const AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns 4D motion blur node pointer */
|
||||
__forceinline AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() { assert(isAABBNodeMB4D()); return ( AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() const { assert(isAABBNodeMB4D()); return (const AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns unaligned node pointer */
|
||||
__forceinline OBBNode_t<NodeRefPtr,N>* ungetAABBNode() { assert(isOBBNode()); return ( OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const OBBNode_t<NodeRefPtr,N>* ungetAABBNode() const { assert(isOBBNode()); return (const OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns unaligned motion blur node pointer */
|
||||
__forceinline OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() { assert(isOBBNodeMB()); return ( OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() const { assert(isOBBNodeMB()); return (const OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns quantized node pointer */
|
||||
__forceinline QuantizedNode_t<NodeRefPtr,N>* quantizedNode() { assert(isQuantizedNode()); return ( QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
|
||||
__forceinline const QuantizedNode_t<NodeRefPtr,N>* quantizedNode() const { assert(isQuantizedNode()); return (const QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
|
||||
|
||||
/*! returns leaf pointer */
|
||||
__forceinline char* leaf(size_t& num) const {
|
||||
assert(isLeaf());
|
||||
num = (ptr & (size_t)items_mask)-tyLeaf;
|
||||
return (char*)(ptr & ~(size_t)align_mask);
|
||||
}
|
||||
|
||||
/*! clear all bit flags */
|
||||
__forceinline void clearFlags() {
|
||||
ptr &= ~(size_t)align_mask;
|
||||
}
|
||||
|
||||
/*! returns the wideness */
|
||||
__forceinline size_t getN() const { return N; }
|
||||
|
||||
public:
|
||||
size_t ptr;
|
||||
};
|
||||
}
|
||||
258
thirdparty/embree/kernels/bvh/bvh_refit.cpp
vendored
Normal file
258
thirdparty/embree/kernels/bvh/bvh_refit.cpp
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_refit.h"
|
||||
#include "bvh_statistics.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
|
||||
#include "../../common/algorithms/parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
static const size_t SINGLE_THREAD_THRESHOLD = 4*1024;
|
||||
|
||||
template<int N>
|
||||
__forceinline bool compare(const typename BVHN<N>::NodeRef* a, const typename BVHN<N>::NodeRef* b)
|
||||
{
|
||||
size_t sa = *(size_t*)&a->node()->lower_x;
|
||||
size_t sb = *(size_t*)&b->node()->lower_x;
|
||||
return sa < sb;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
BVHNRefitter<N>::BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds)
|
||||
: bvh(bvh), leafBounds(leafBounds), numSubTrees(0)
|
||||
{
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNRefitter<N>::refit()
|
||||
{
|
||||
if (bvh->numPrimitives <= SINGLE_THREAD_THRESHOLD) {
|
||||
bvh->bounds = LBBox3fa(recurse_bottom(bvh->root));
|
||||
}
|
||||
else
|
||||
{
|
||||
BBox3fa subTreeBounds[MAX_NUM_SUB_TREES];
|
||||
numSubTrees = 0;
|
||||
gather_subtree_refs(bvh->root,numSubTrees,0);
|
||||
if (numSubTrees)
|
||||
parallel_for(size_t(0), numSubTrees, size_t(1), [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
NodeRef& ref = subTrees[i];
|
||||
subTreeBounds[i] = recurse_bottom(ref);
|
||||
}
|
||||
});
|
||||
|
||||
numSubTrees = 0;
|
||||
bvh->bounds = LBBox3fa(refit_toplevel(bvh->root,numSubTrees,subTreeBounds,0));
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNRefitter<N>::gather_subtree_refs(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const size_t depth)
|
||||
{
|
||||
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
|
||||
{
|
||||
assert(subtrees < MAX_NUM_SUB_TREES);
|
||||
subTrees[subtrees++] = ref;
|
||||
return;
|
||||
}
|
||||
|
||||
if (ref.isAABBNode())
|
||||
{
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<N; i++) {
|
||||
NodeRef& child = node->child(i);
|
||||
if (unlikely(child == BVH::emptyNode)) continue;
|
||||
gather_subtree_refs(child,subtrees,depth+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
BBox3fa BVHNRefitter<N>::refit_toplevel(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const BBox3fa *const subTreeBounds,
|
||||
const size_t depth)
|
||||
{
|
||||
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
|
||||
{
|
||||
assert(subtrees < MAX_NUM_SUB_TREES);
|
||||
assert(subTrees[subtrees] == ref);
|
||||
return subTreeBounds[subtrees++];
|
||||
}
|
||||
|
||||
if (ref.isAABBNode())
|
||||
{
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
BBox3fa bounds[N];
|
||||
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
NodeRef& child = node->child(i);
|
||||
|
||||
if (unlikely(child == BVH::emptyNode))
|
||||
bounds[i] = BBox3fa(empty);
|
||||
else
|
||||
bounds[i] = refit_toplevel(child,subtrees,subTreeBounds,depth+1);
|
||||
}
|
||||
|
||||
BBox3vf<N> boundsT = transpose<N>(bounds);
|
||||
|
||||
/* set new bounds */
|
||||
node->lower_x = boundsT.lower.x;
|
||||
node->lower_y = boundsT.lower.y;
|
||||
node->lower_z = boundsT.lower.z;
|
||||
node->upper_x = boundsT.upper.x;
|
||||
node->upper_y = boundsT.upper.y;
|
||||
node->upper_z = boundsT.upper.z;
|
||||
|
||||
return merge<N>(bounds);
|
||||
}
|
||||
else
|
||||
return leafBounds.leafBounds(ref);
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// =========================================================
|
||||
// =========================================================
|
||||
|
||||
|
||||
template<int N>
|
||||
BBox3fa BVHNRefitter<N>::recurse_bottom(NodeRef& ref)
|
||||
{
|
||||
/* this is a leaf node */
|
||||
if (unlikely(ref.isLeaf()))
|
||||
return leafBounds.leafBounds(ref);
|
||||
|
||||
/* recurse if this is an internal node */
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
|
||||
/* enable exclusive prefetch for >= AVX platforms */
|
||||
#if defined(__AVX__)
|
||||
BVH::prefetchW(ref);
|
||||
#endif
|
||||
BBox3fa bounds[N];
|
||||
|
||||
for (size_t i=0; i<N; i++)
|
||||
if (unlikely(node->child(i) == BVH::emptyNode))
|
||||
{
|
||||
bounds[i] = BBox3fa(empty);
|
||||
}
|
||||
else
|
||||
bounds[i] = recurse_bottom(node->child(i));
|
||||
|
||||
/* AOS to SOA transform */
|
||||
BBox3vf<N> boundsT = transpose<N>(bounds);
|
||||
|
||||
/* set new bounds */
|
||||
node->lower_x = boundsT.lower.x;
|
||||
node->lower_y = boundsT.lower.y;
|
||||
node->lower_z = boundsT.lower.z;
|
||||
node->upper_x = boundsT.upper.x;
|
||||
node->upper_y = boundsT.upper.y;
|
||||
node->upper_z = boundsT.upper.z;
|
||||
|
||||
return merge<N>(bounds);
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
BVHNRefitT<N,Mesh,Primitive>::BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode)
|
||||
: bvh(bvh), builder(builder), refitter(new BVHNRefitter<N>(bvh,*(typename BVHNRefitter<N>::LeafBoundsInterface*)this)), mesh(mesh), topologyVersion(0) {}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNRefitT<N,Mesh,Primitive>::clear()
|
||||
{
|
||||
if (builder)
|
||||
builder->clear();
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNRefitT<N,Mesh,Primitive>::build()
|
||||
{
|
||||
if (mesh->topologyChanged(topologyVersion)) {
|
||||
topologyVersion = mesh->getTopologyVersion();
|
||||
builder->build();
|
||||
}
|
||||
else
|
||||
refitter->refit();
|
||||
}
|
||||
|
||||
template class BVHNRefitter<4>;
|
||||
#if defined(__AVX__)
|
||||
template class BVHNRefitter<8>;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
|
||||
Builder* BVH4Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4> ((BVH4*)accel,BVH4Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH4Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4v>((BVH4*)accel,BVH4Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH4Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4i>((BVH4*)accel,BVH4Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
|
||||
Builder* BVH8Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4> ((BVH8*)accel,BVH8Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH8Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4v>((BVH8*)accel,BVH8Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH8Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4i>((BVH8*)accel,BVH8Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,QuadMesh,Quad4v>((BVH4*)accel,BVH4Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,QuadMesh,Quad4v>((BVH8*)accel,BVH8Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,UserGeometry,Object>((BVH4*)accel,BVH4VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,UserGeometry,Object>((BVH8*)accel,BVH8VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,Instance,InstancePrimitive>((BVH4*)accel,BVH4InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,Instance,InstancePrimitive>((BVH8*)accel,BVH8InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)accel,BVH4InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)accel,BVH8InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
95
thirdparty/embree/kernels/bvh/bvh_refit.h
vendored
Normal file
95
thirdparty/embree/kernels/bvh/bvh_refit.h
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
class BVHNRefitter
|
||||
{
|
||||
public:
|
||||
|
||||
/*! Type shortcuts */
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
struct LeafBoundsInterface {
|
||||
virtual const BBox3fa leafBounds(NodeRef& ref) const = 0;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! Constructor. */
|
||||
BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds);
|
||||
|
||||
/*! refits the BVH */
|
||||
void refit();
|
||||
|
||||
private:
|
||||
/* single-threaded subtree extraction based on BVH depth */
|
||||
void gather_subtree_refs(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const size_t depth = 0);
|
||||
|
||||
/* single-threaded top-level refit */
|
||||
BBox3fa refit_toplevel(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const BBox3fa *const subTreeBounds,
|
||||
const size_t depth = 0);
|
||||
|
||||
/* single-threaded subtree refit */
|
||||
BBox3fa recurse_bottom(NodeRef& ref);
|
||||
|
||||
public:
|
||||
BVH* bvh; //!< BVH to refit
|
||||
const LeafBoundsInterface& leafBounds; //!< calculates bounds of leaves
|
||||
|
||||
static const size_t MAX_SUB_TREE_EXTRACTION_DEPTH = (N==4) ? 4 : (N==8) ? 3 : 3;
|
||||
static const size_t MAX_NUM_SUB_TREES = (N==4) ? 256 : (N==8) ? 512 : N*N*N; // N ^ MAX_SUB_TREE_EXTRACTION_DEPTH
|
||||
size_t numSubTrees;
|
||||
NodeRef subTrees[MAX_NUM_SUB_TREES];
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
class BVHNRefitT : public Builder, public BVHNRefitter<N>::LeafBoundsInterface
|
||||
{
|
||||
public:
|
||||
|
||||
/*! Type shortcuts */
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
public:
|
||||
BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode);
|
||||
|
||||
virtual void build();
|
||||
|
||||
virtual void clear();
|
||||
|
||||
virtual const BBox3fa leafBounds (NodeRef& ref) const
|
||||
{
|
||||
size_t num; char* prim = ref.leaf(num);
|
||||
if (unlikely(ref == BVH::emptyNode)) return empty;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<num; i++)
|
||||
bounds.extend(((Primitive*)prim)[i].update(mesh));
|
||||
return bounds;
|
||||
}
|
||||
|
||||
private:
|
||||
BVH* bvh;
|
||||
std::unique_ptr<Builder> builder;
|
||||
std::unique_ptr<BVHNRefitter<N>> refitter;
|
||||
Mesh* mesh;
|
||||
unsigned int topologyVersion;
|
||||
};
|
||||
}
|
||||
}
|
||||
127
thirdparty/embree/kernels/bvh/bvh_rotate.cpp
vendored
Normal file
127
thirdparty/embree/kernels/bvh/bvh_rotate.cpp
vendored
Normal file
@@ -0,0 +1,127 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_rotate.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Computes half surface area of box. */
|
||||
__forceinline float halfArea3f(const BBox<vfloat4>& box) {
|
||||
const vfloat4 d = box.size();
|
||||
const vfloat4 a = d*shuffle<1,2,0,3>(d);
|
||||
return a[0]+a[1]+a[2];
|
||||
}
|
||||
|
||||
size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth)
|
||||
{
|
||||
/*! nothing to rotate if we reached a leaf node. */
|
||||
if (parentRef.isBarrier()) return 0;
|
||||
if (parentRef.isLeaf()) return 0;
|
||||
AABBNode* parent = parentRef.getAABBNode();
|
||||
|
||||
/*! rotate all children first */
|
||||
vint4 cdepth;
|
||||
for (size_t c=0; c<4; c++)
|
||||
cdepth[c] = (int)rotate(parent->child(c),depth+1);
|
||||
|
||||
/* compute current areas of all children */
|
||||
vfloat4 sizeX = parent->upper_x-parent->lower_x;
|
||||
vfloat4 sizeY = parent->upper_y-parent->lower_y;
|
||||
vfloat4 sizeZ = parent->upper_z-parent->lower_z;
|
||||
vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ);
|
||||
|
||||
/*! get node bounds */
|
||||
BBox<vfloat4> child1_0,child1_1,child1_2,child1_3;
|
||||
parent->bounds(child1_0,child1_1,child1_2,child1_3);
|
||||
|
||||
/*! Find best rotation. We pick a first child (child1) and a sub-child
|
||||
(child2child) of a different second child (child2), and swap child1
|
||||
and child2child. We perform the best such swap. */
|
||||
float bestArea = 0;
|
||||
size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1;
|
||||
for (size_t c2=0; c2<4; c2++)
|
||||
{
|
||||
/*! ignore leaf nodes as we cannot descent into them */
|
||||
if (parent->child(c2).isBarrier()) continue;
|
||||
if (parent->child(c2).isLeaf()) continue;
|
||||
AABBNode* child2 = parent->child(c2).getAABBNode();
|
||||
|
||||
/*! transpose child bounds */
|
||||
BBox<vfloat4> child2c0,child2c1,child2c2,child2c3;
|
||||
child2->bounds(child2c0,child2c1,child2c2,child2c3);
|
||||
|
||||
/*! put child1_0 at each child2 position */
|
||||
float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3));
|
||||
float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3));
|
||||
float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3));
|
||||
float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0));
|
||||
vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03);
|
||||
vfloat4 min0 = vreduce_min(cost0);
|
||||
int pos0 = (int)bsf(movemask(min0 == cost0));
|
||||
|
||||
/*! put child1_1 at each child2 position */
|
||||
float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3));
|
||||
float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3));
|
||||
float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3));
|
||||
float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1));
|
||||
vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13);
|
||||
vfloat4 min1 = vreduce_min(cost1);
|
||||
int pos1 = (int)bsf(movemask(min1 == cost1));
|
||||
|
||||
/*! put child1_2 at each child2 position */
|
||||
float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3));
|
||||
float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3));
|
||||
float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3));
|
||||
float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2));
|
||||
vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23);
|
||||
vfloat4 min2 = vreduce_min(cost2);
|
||||
int pos2 = (int)bsf(movemask(min2 == cost2));
|
||||
|
||||
/*! put child1_3 at each child2 position */
|
||||
float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3));
|
||||
float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3));
|
||||
float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3));
|
||||
float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3));
|
||||
vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33);
|
||||
vfloat4 min3 = vreduce_min(cost3);
|
||||
int pos3 = (int)bsf(movemask(min3 == cost3));
|
||||
|
||||
/*! find best other child */
|
||||
vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]);
|
||||
int pos[4] = { pos0,pos1,pos2,pos3 };
|
||||
const size_t mbd = BVH4::maxBuildDepth;
|
||||
vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints
|
||||
valid &= vint4(int(c2)) != vint4(step);
|
||||
if (none(valid)) continue;
|
||||
size_t c1 = select_min(valid,area0123);
|
||||
float area = area0123[c1];
|
||||
if (c1 == c2) continue; // can happen if bounds are NANs
|
||||
|
||||
/*! accept a swap when it reduces cost and is not swapping a node with itself */
|
||||
if (area < bestArea) {
|
||||
bestArea = area;
|
||||
bestChild1 = c1;
|
||||
bestChild2 = c2;
|
||||
bestChild2Child = pos[c1];
|
||||
}
|
||||
}
|
||||
|
||||
/*! if we did not find a swap that improves the SAH then do nothing */
|
||||
if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth);
|
||||
|
||||
/*! perform the best found tree rotation */
|
||||
AABBNode* child2 = parent->child(bestChild2).getAABBNode();
|
||||
AABBNode::swap(parent,bestChild1,child2,bestChild2Child);
|
||||
parent->setBounds(bestChild2,child2->bounds());
|
||||
AABBNode::compact(parent);
|
||||
AABBNode::compact(child2);
|
||||
|
||||
/*! This returned depth is conservative as the child that was
|
||||
* pulled up in the tree could have been on the critical path. */
|
||||
cdepth[bestChild1]++; // bestChild1 was pushed down one level
|
||||
return 1+reduce_max(cdepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
37
thirdparty/embree/kernels/bvh/bvh_rotate.h
vendored
Normal file
37
thirdparty/embree/kernels/bvh/bvh_rotate.h
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
class BVHNRotate
|
||||
{
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
public:
|
||||
static const bool enabled = false;
|
||||
|
||||
static __forceinline size_t rotate(NodeRef parentRef, size_t depth = 1) { return 0; }
|
||||
static __forceinline void restructure(NodeRef ref, size_t depth = 1) {}
|
||||
};
|
||||
|
||||
/* BVH4 tree rotations */
|
||||
template<>
|
||||
class BVHNRotate<4>
|
||||
{
|
||||
typedef BVH4::AABBNode AABBNode;
|
||||
typedef BVH4::NodeRef NodeRef;
|
||||
|
||||
public:
|
||||
static const bool enabled = true;
|
||||
|
||||
static size_t rotate(NodeRef parentRef, size_t depth = 1);
|
||||
};
|
||||
}
|
||||
}
|
||||
165
thirdparty/embree/kernels/bvh/bvh_statistics.cpp
vendored
Normal file
165
thirdparty/embree/kernels/bvh/bvh_statistics.cpp
vendored
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_statistics.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<int N>
|
||||
BVHNStatistics<N>::BVHNStatistics (BVH* bvh) : bvh(bvh)
|
||||
{
|
||||
double A = max(0.0f,bvh->getLinearBounds().expectedHalfArea());
|
||||
stat = statistics(bvh->root,A,BBox1f(0.0f,1.0f));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
std::string BVHNStatistics<N>::str()
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
stream << " primitives = " << bvh->numPrimitives << ", vertices = " << bvh->numVertices << ", depth = " << stat.depth << std::endl;
|
||||
size_t totalBytes = stat.bytes(bvh);
|
||||
double totalSAH = stat.sah(bvh);
|
||||
stream << " total : sah = " << std::setw(7) << std::setprecision(3) << totalSAH << " (100.00%), ";
|
||||
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << totalBytes/1E6 << " MB (100.00%), ";
|
||||
stream << "#nodes = " << std::setw(7) << stat.size() << " (" << std::setw(6) << std::setprecision(2) << 100.0*stat.fillRate(bvh) << "% filled), ";
|
||||
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(totalBytes)/double(bvh->numPrimitives) << std::endl;
|
||||
if (stat.statAABBNodes.numNodes ) stream << " getAABBNodes : " << stat.statAABBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statOBBNodes.numNodes ) stream << " ungetAABBNodes : " << stat.statOBBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statAABBNodesMB.numNodes ) stream << " getAABBNodesMB : " << stat.statAABBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statAABBNodesMB4D.numNodes) stream << " getAABBNodesMB4D : " << stat.statAABBNodesMB4D.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statOBBNodesMB.numNodes) stream << " ungetAABBNodesMB : " << stat.statOBBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statQuantizedNodes.numNodes ) stream << " quantizedNodes : " << stat.statQuantizedNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (true) stream << " leaves : " << stat.statLeaf.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (true) stream << " histogram : " << stat.statLeaf.histToString() << std::endl;
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
template<int N>
|
||||
typename BVHNStatistics<N>::Statistics BVHNStatistics<N>::statistics(NodeRef node, const double A, const BBox1f t0t1)
|
||||
{
|
||||
Statistics s;
|
||||
assert(t0t1.size() > 0.0f);
|
||||
double dt = max(0.0f,t0t1.size());
|
||||
if (node.isAABBNode())
|
||||
{
|
||||
AABBNode* n = node.getAABBNode();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extend(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statAABBNodes.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statAABBNodes.numNodes++;
|
||||
s.statAABBNodes.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isOBBNode())
|
||||
{
|
||||
OBBNode* n = node.ungetAABBNode();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extent(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statOBBNodes.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statOBBNodes.numNodes++;
|
||||
s.statOBBNodes.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isAABBNodeMB())
|
||||
{
|
||||
AABBNodeMB* n = node.getAABBNodeMB();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,n->expectedHalfArea(i,t0t1));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statAABBNodesMB.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statAABBNodesMB.numNodes++;
|
||||
s.statAABBNodesMB.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isAABBNodeMB4D())
|
||||
{
|
||||
AABBNodeMB4D* n = node.getAABBNodeMB4D();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const BBox1f t0t1i = intersect(t0t1,n->timeRange(i));
|
||||
assert(!t0t1i.empty());
|
||||
const double Ai = n->AABBNodeMB::expectedHalfArea(i,t0t1i);
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1i);
|
||||
s.statAABBNodesMB4D.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statAABBNodesMB4D.numNodes++;
|
||||
s.statAABBNodesMB4D.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isOBBNodeMB())
|
||||
{
|
||||
OBBNodeMB* n = node.ungetAABBNodeMB();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extent0(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statOBBNodesMB.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statOBBNodesMB.numNodes++;
|
||||
s.statOBBNodesMB.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isQuantizedNode())
|
||||
{
|
||||
QuantizedNode* n = node.quantizedNode();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extent(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statQuantizedNodes.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statQuantizedNodes.numNodes++;
|
||||
s.statQuantizedNodes.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isLeaf())
|
||||
{
|
||||
size_t num; const char* tri = node.leaf(num);
|
||||
if (num)
|
||||
{
|
||||
for (size_t i=0; i<num; i++)
|
||||
{
|
||||
const size_t bytes = bvh->primTy->getBytes(tri);
|
||||
s.statLeaf.numPrimsActive += bvh->primTy->sizeActive(tri);
|
||||
s.statLeaf.numPrimsTotal += bvh->primTy->sizeTotal(tri);
|
||||
s.statLeaf.numBytes += bytes;
|
||||
tri+=bytes;
|
||||
}
|
||||
s.statLeaf.numLeaves++;
|
||||
s.statLeaf.numPrimBlocks += num;
|
||||
s.statLeaf.leafSAH += dt*A*num;
|
||||
if (num-1 < Statistics::LeafStat::NHIST) {
|
||||
s.statLeaf.numPrimBlocksHistogram[num-1]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
abort(); //throw std::runtime_error("not supported node type in bvh_statistics");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
template class BVHNStatistics<8>;
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__)
|
||||
template class BVHNStatistics<4>;
|
||||
#endif
|
||||
}
|
||||
285
thirdparty/embree/kernels/bvh/bvh_statistics.h
vendored
Normal file
285
thirdparty/embree/kernels/bvh/bvh_statistics.h
vendored
Normal file
@@ -0,0 +1,285 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include <sstream>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<int N>
|
||||
class BVHNStatistics
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::OBBNode OBBNode;
|
||||
typedef typename BVH::AABBNodeMB AABBNodeMB;
|
||||
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
|
||||
typedef typename BVH::OBBNodeMB OBBNodeMB;
|
||||
typedef typename BVH::QuantizedNode QuantizedNode;
|
||||
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
struct Statistics
|
||||
{
|
||||
template<typename Node>
|
||||
struct NodeStat
|
||||
{
|
||||
NodeStat ( double nodeSAH = 0,
|
||||
size_t numNodes = 0,
|
||||
size_t numChildren = 0)
|
||||
: nodeSAH(nodeSAH),
|
||||
numNodes(numNodes),
|
||||
numChildren(numChildren) {}
|
||||
|
||||
double sah(BVH* bvh) const {
|
||||
return nodeSAH/bvh->getLinearBounds().expectedHalfArea();
|
||||
}
|
||||
|
||||
size_t bytes() const {
|
||||
return numNodes*sizeof(Node);
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return numNodes;
|
||||
}
|
||||
|
||||
double fillRateNom () const { return double(numChildren); }
|
||||
double fillRateDen () const { return double(numNodes*N); }
|
||||
double fillRate () const { return fillRateNom()/fillRateDen(); }
|
||||
|
||||
__forceinline friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
|
||||
{
|
||||
return NodeStat(a.nodeSAH + b.nodeSAH,
|
||||
a.numNodes+b.numNodes,
|
||||
a.numChildren+b.numChildren);
|
||||
}
|
||||
|
||||
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
|
||||
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
|
||||
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << bytes()/1E6 << " MB ";
|
||||
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes())/double(bytesTotal) << "%), ";
|
||||
stream << "#nodes = " << std::setw(7) << numNodes << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate() << "% filled), ";
|
||||
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes())/double(bvh->numPrimitives);
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
public:
|
||||
double nodeSAH;
|
||||
size_t numNodes;
|
||||
size_t numChildren;
|
||||
};
|
||||
|
||||
struct LeafStat
|
||||
{
|
||||
static const int NHIST = 8;
|
||||
|
||||
LeafStat ( double leafSAH = 0.0f,
|
||||
size_t numLeaves = 0,
|
||||
size_t numPrimsActive = 0,
|
||||
size_t numPrimsTotal = 0,
|
||||
size_t numPrimBlocks = 0,
|
||||
size_t numBytes = 0)
|
||||
: leafSAH(leafSAH),
|
||||
numLeaves(numLeaves),
|
||||
numPrimsActive(numPrimsActive),
|
||||
numPrimsTotal(numPrimsTotal),
|
||||
numPrimBlocks(numPrimBlocks),
|
||||
numBytes(numBytes)
|
||||
{
|
||||
for (size_t i=0; i<NHIST; i++)
|
||||
numPrimBlocksHistogram[i] = 0;
|
||||
}
|
||||
|
||||
double sah(BVH* bvh) const {
|
||||
return leafSAH/bvh->getLinearBounds().expectedHalfArea();
|
||||
}
|
||||
|
||||
size_t bytes(BVH* bvh) const {
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return numLeaves;
|
||||
}
|
||||
|
||||
double fillRateNom (BVH* bvh) const { return double(numPrimsActive); }
|
||||
double fillRateDen (BVH* bvh) const { return double(numPrimsTotal); }
|
||||
double fillRate (BVH* bvh) const { return fillRateNom(bvh)/fillRateDen(bvh); }
|
||||
|
||||
__forceinline friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
|
||||
{
|
||||
LeafStat stat(a.leafSAH + b.leafSAH,
|
||||
a.numLeaves+b.numLeaves,
|
||||
a.numPrimsActive+b.numPrimsActive,
|
||||
a.numPrimsTotal+b.numPrimsTotal,
|
||||
a.numPrimBlocks+b.numPrimBlocks,
|
||||
a.numBytes+b.numBytes);
|
||||
for (size_t i=0; i<NHIST; i++) {
|
||||
stat.numPrimBlocksHistogram[i] += a.numPrimBlocksHistogram[i];
|
||||
stat.numPrimBlocksHistogram[i] += b.numPrimBlocksHistogram[i];
|
||||
}
|
||||
return stat;
|
||||
}
|
||||
|
||||
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
|
||||
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
|
||||
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << double(bytes(bvh))/1E6 << " MB ";
|
||||
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes(bvh))/double(bytesTotal) << "%), ";
|
||||
stream << "#nodes = " << std::setw(7) << numLeaves << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate(bvh) << "% filled), ";
|
||||
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes(bvh))/double(bvh->numPrimitives);
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
std::string histToString() const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
for (size_t i=0; i<NHIST; i++)
|
||||
stream << std::setw(6) << std::setprecision(2) << 100.0f*float(numPrimBlocksHistogram[i])/float(numLeaves) << "% ";
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
public:
|
||||
double leafSAH; //!< SAH of the leaves only
|
||||
size_t numLeaves; //!< Number of leaf nodes.
|
||||
size_t numPrimsActive; //!< Number of active primitives (
|
||||
size_t numPrimsTotal; //!< Number of active and inactive primitives
|
||||
size_t numPrimBlocks; //!< Number of primitive blocks.
|
||||
size_t numBytes; //!< Number of bytes of leaves.
|
||||
size_t numPrimBlocksHistogram[8];
|
||||
};
|
||||
|
||||
public:
|
||||
Statistics (size_t depth = 0,
|
||||
LeafStat statLeaf = LeafStat(),
|
||||
NodeStat<AABBNode> statAABBNodes = NodeStat<AABBNode>(),
|
||||
NodeStat<OBBNode> statOBBNodes = NodeStat<OBBNode>(),
|
||||
NodeStat<AABBNodeMB> statAABBNodesMB = NodeStat<AABBNodeMB>(),
|
||||
NodeStat<AABBNodeMB4D> statAABBNodesMB4D = NodeStat<AABBNodeMB4D>(),
|
||||
NodeStat<OBBNodeMB> statOBBNodesMB = NodeStat<OBBNodeMB>(),
|
||||
NodeStat<QuantizedNode> statQuantizedNodes = NodeStat<QuantizedNode>())
|
||||
|
||||
: depth(depth),
|
||||
statLeaf(statLeaf),
|
||||
statAABBNodes(statAABBNodes),
|
||||
statOBBNodes(statOBBNodes),
|
||||
statAABBNodesMB(statAABBNodesMB),
|
||||
statAABBNodesMB4D(statAABBNodesMB4D),
|
||||
statOBBNodesMB(statOBBNodesMB),
|
||||
statQuantizedNodes(statQuantizedNodes) {}
|
||||
|
||||
double sah(BVH* bvh) const
|
||||
{
|
||||
return statLeaf.sah(bvh) +
|
||||
statAABBNodes.sah(bvh) +
|
||||
statOBBNodes.sah(bvh) +
|
||||
statAABBNodesMB.sah(bvh) +
|
||||
statAABBNodesMB4D.sah(bvh) +
|
||||
statOBBNodesMB.sah(bvh) +
|
||||
statQuantizedNodes.sah(bvh);
|
||||
}
|
||||
|
||||
size_t bytes(BVH* bvh) const {
|
||||
return statLeaf.bytes(bvh) +
|
||||
statAABBNodes.bytes() +
|
||||
statOBBNodes.bytes() +
|
||||
statAABBNodesMB.bytes() +
|
||||
statAABBNodesMB4D.bytes() +
|
||||
statOBBNodesMB.bytes() +
|
||||
statQuantizedNodes.bytes();
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return statLeaf.size() +
|
||||
statAABBNodes.size() +
|
||||
statOBBNodes.size() +
|
||||
statAABBNodesMB.size() +
|
||||
statAABBNodesMB4D.size() +
|
||||
statOBBNodesMB.size() +
|
||||
statQuantizedNodes.size();
|
||||
}
|
||||
|
||||
double fillRate (BVH* bvh) const
|
||||
{
|
||||
double nom = statLeaf.fillRateNom(bvh) +
|
||||
statAABBNodes.fillRateNom() +
|
||||
statOBBNodes.fillRateNom() +
|
||||
statAABBNodesMB.fillRateNom() +
|
||||
statAABBNodesMB4D.fillRateNom() +
|
||||
statOBBNodesMB.fillRateNom() +
|
||||
statQuantizedNodes.fillRateNom();
|
||||
double den = statLeaf.fillRateDen(bvh) +
|
||||
statAABBNodes.fillRateDen() +
|
||||
statOBBNodes.fillRateDen() +
|
||||
statAABBNodesMB.fillRateDen() +
|
||||
statAABBNodesMB4D.fillRateDen() +
|
||||
statOBBNodesMB.fillRateDen() +
|
||||
statQuantizedNodes.fillRateDen();
|
||||
return nom/den;
|
||||
}
|
||||
|
||||
friend Statistics operator+ ( const Statistics& a, const Statistics& b )
|
||||
{
|
||||
return Statistics(max(a.depth,b.depth),
|
||||
a.statLeaf + b.statLeaf,
|
||||
a.statAABBNodes + b.statAABBNodes,
|
||||
a.statOBBNodes + b.statOBBNodes,
|
||||
a.statAABBNodesMB + b.statAABBNodesMB,
|
||||
a.statAABBNodesMB4D + b.statAABBNodesMB4D,
|
||||
a.statOBBNodesMB + b.statOBBNodesMB,
|
||||
a.statQuantizedNodes + b.statQuantizedNodes);
|
||||
}
|
||||
|
||||
static Statistics add ( const Statistics& a, const Statistics& b ) {
|
||||
return a+b;
|
||||
}
|
||||
|
||||
public:
|
||||
size_t depth;
|
||||
LeafStat statLeaf;
|
||||
NodeStat<AABBNode> statAABBNodes;
|
||||
NodeStat<OBBNode> statOBBNodes;
|
||||
NodeStat<AABBNodeMB> statAABBNodesMB;
|
||||
NodeStat<AABBNodeMB4D> statAABBNodesMB4D;
|
||||
NodeStat<OBBNodeMB> statOBBNodesMB;
|
||||
NodeStat<QuantizedNode> statQuantizedNodes;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/* Constructor gathers statistics. */
|
||||
BVHNStatistics (BVH* bvh);
|
||||
|
||||
/*! Convert statistics into a string */
|
||||
std::string str();
|
||||
|
||||
double sah() const {
|
||||
return stat.sah(bvh);
|
||||
}
|
||||
|
||||
size_t bytesUsed() const {
|
||||
return stat.bytes(bvh);
|
||||
}
|
||||
|
||||
private:
|
||||
Statistics statistics(NodeRef node, const double A, const BBox1f dt);
|
||||
|
||||
private:
|
||||
BVH* bvh;
|
||||
Statistics stat;
|
||||
};
|
||||
|
||||
typedef BVHNStatistics<4> BVH4Statistics;
|
||||
typedef BVHNStatistics<8> BVH8Statistics;
|
||||
}
|
||||
466
thirdparty/embree/kernels/bvh/bvh_traverser1.h
vendored
Normal file
466
thirdparty/embree/kernels/bvh/bvh_traverser1.h
vendored
Normal file
@@ -0,0 +1,466 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "node_intersector1.h"
|
||||
#include "../common/stack_item.h"
|
||||
|
||||
#define NEW_SORTING_CODE 1
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! BVH regular node traversal for single rays. */
|
||||
template<int N, int types>
|
||||
class BVHNNodeTraverser1Hit;
|
||||
|
||||
#if defined(__AVX512VL__) // SKX
|
||||
|
||||
template<int N>
|
||||
__forceinline void isort_update(vint<N> &dist, const vint<N> &d)
|
||||
{
|
||||
const vint<N> dist_shift = align_shift_right<N-1>(dist,dist);
|
||||
const vboolf<N> m_geq = d >= dist;
|
||||
const vboolf<N> m_geq_shift = m_geq << 1;
|
||||
dist = select(m_geq,d,dist);
|
||||
dist = select(m_geq_shift,dist_shift,dist);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) {
|
||||
dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero)));
|
||||
}
|
||||
|
||||
__forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) {
|
||||
return toScalar(permutex2var((__m256i)index,n0,n1));
|
||||
}
|
||||
|
||||
__forceinline float permuteExtract(const vint8& index, const vfloat8& n) {
|
||||
return toScalar(permute(n,index));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Specialization for BVH4. */
|
||||
template<int types>
|
||||
class BVHNNodeTraverser1Hit<4, types>
|
||||
{
|
||||
typedef BVH4 BVH;
|
||||
typedef BVH4::NodeRef NodeRef;
|
||||
typedef BVH4::BaseNode BaseNode;
|
||||
|
||||
|
||||
public:
|
||||
/* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */
|
||||
static __forceinline void traverseClosestHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat4& tNear,
|
||||
StackItemT<NodeRef>*& stackPtr,
|
||||
StackItemT<NodeRef>* stackEnd)
|
||||
{
|
||||
assert(mask != 0);
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
if (likely(mask == 0)) {
|
||||
assert(cur != BVH::emptyNode);
|
||||
return;
|
||||
}
|
||||
|
||||
/*! two children are hit, push far child, and continue with closer child */
|
||||
NodeRef c0 = cur;
|
||||
const unsigned int d0 = ((unsigned int*)&tNear)[r];
|
||||
r = bscf(mask);
|
||||
NodeRef c1 = node->child(r);
|
||||
BVH::prefetch(c1,types);
|
||||
const unsigned int d1 = ((unsigned int*)&tNear)[r];
|
||||
assert(c0 != BVH::emptyNode);
|
||||
assert(c1 != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
assert(stackPtr < stackEnd);
|
||||
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
|
||||
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
|
||||
}
|
||||
|
||||
#if NEW_SORTING_CODE == 1
|
||||
vint4 s0((size_t)c0,(size_t)d0);
|
||||
vint4 s1((size_t)c1,(size_t)d1);
|
||||
r = bscf(mask);
|
||||
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s2((size_t)c2,(size_t)d2);
|
||||
/* 3 hits */
|
||||
if (likely(mask == 0)) {
|
||||
StackItemT<NodeRef>::sort3(s0,s1,s2);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
|
||||
cur = toSizeT(s2);
|
||||
stackPtr+=2;
|
||||
return;
|
||||
}
|
||||
r = bscf(mask);
|
||||
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s3((size_t)c3,(size_t)d3);
|
||||
/* 4 hits */
|
||||
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
|
||||
cur = toSizeT(s3);
|
||||
stackPtr+=3;
|
||||
#else
|
||||
/*! Here starts the slow path for 3 or 4 hit children. We push
|
||||
* all nodes onto the stack to sort them there. */
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
|
||||
|
||||
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
return;
|
||||
}
|
||||
|
||||
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */
|
||||
static __forceinline void traverseAnyHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat4& tNear,
|
||||
NodeRef*& stackPtr,
|
||||
NodeRef* stackEnd)
|
||||
{
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
/* simpler in sequence traversal order */
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
|
||||
for (; ;)
|
||||
{
|
||||
r = bscf(mask);
|
||||
cur = node->child(r); BVH::prefetch(cur,types);
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* Specialization for BVH8. */
|
||||
template<int types>
|
||||
class BVHNNodeTraverser1Hit<8, types>
|
||||
{
|
||||
typedef BVH8 BVH;
|
||||
typedef BVH8::NodeRef NodeRef;
|
||||
typedef BVH8::BaseNode BaseNode;
|
||||
|
||||
#if defined(__AVX512VL__)
|
||||
template<class NodeRef, class BaseNode>
|
||||
static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat8& tNear,
|
||||
StackItemT<NodeRef>*& stackPtr,
|
||||
StackItemT<NodeRef>* stackEnd)
|
||||
{
|
||||
assert(mask != 0);
|
||||
const BaseNode* node = cur.baseNode();
|
||||
const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]);
|
||||
const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]);
|
||||
vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step);
|
||||
distance_i = vint8::compact((int)mask,distance_i,distance_i);
|
||||
cur = permuteExtract(distance_i,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) return;
|
||||
|
||||
/* 2 hits: order A0 B0 */
|
||||
const vint8 d0(distance_i);
|
||||
const vint8 d1(shuffle<1>(distance_i));
|
||||
cur = permuteExtract(d1,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
const vint8 dist_A0 = min(d0, d1);
|
||||
const vint8 dist_B0 = max(d0, d1);
|
||||
assert(dist_A0[0] < dist_B0[0]);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) {
|
||||
cur = permuteExtract(dist_A0,n0,n1);
|
||||
stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1);
|
||||
*(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear);
|
||||
stackPtr++;
|
||||
return;
|
||||
}
|
||||
|
||||
/* 3 hits: order A1 B1 C1 */
|
||||
|
||||
const vint8 d2(shuffle<2>(distance_i));
|
||||
cur = permuteExtract(d2,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
const vint8 dist_A1 = min(dist_A0,d2);
|
||||
const vint8 dist_tmp_B1 = max(dist_A0,d2);
|
||||
const vint8 dist_B1 = min(dist_B0,dist_tmp_B1);
|
||||
const vint8 dist_C1 = max(dist_B0,dist_tmp_B1);
|
||||
assert(dist_A1[0] < dist_B1[0]);
|
||||
assert(dist_B1[0] < dist_C1[0]);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) {
|
||||
cur = permuteExtract(dist_A1,n0,n1);
|
||||
stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1);
|
||||
*(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear);
|
||||
stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1);
|
||||
*(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear);
|
||||
stackPtr+=2;
|
||||
return;
|
||||
}
|
||||
|
||||
/* 4 hits: order A2 B2 C2 D2 */
|
||||
|
||||
const vint8 d3(shuffle<3>(distance_i));
|
||||
cur = permuteExtract(d3,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
const vint8 dist_A2 = min(dist_A1,d3);
|
||||
const vint8 dist_tmp_B2 = max(dist_A1,d3);
|
||||
const vint8 dist_B2 = min(dist_B1,dist_tmp_B2);
|
||||
const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2);
|
||||
const vint8 dist_C2 = min(dist_C1,dist_tmp_C2);
|
||||
const vint8 dist_D2 = max(dist_C1,dist_tmp_C2);
|
||||
assert(dist_A2[0] < dist_B2[0]);
|
||||
assert(dist_B2[0] < dist_C2[0]);
|
||||
assert(dist_C2[0] < dist_D2[0]);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) {
|
||||
cur = permuteExtract(dist_A2,n0,n1);
|
||||
stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1);
|
||||
*(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear);
|
||||
stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1);
|
||||
*(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear);
|
||||
stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1);
|
||||
*(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear);
|
||||
stackPtr+=3;
|
||||
return;
|
||||
}
|
||||
|
||||
/* >=5 hits: reverse to descending order for writing to stack */
|
||||
|
||||
distance_i = align_shift_right<3>(distance_i,distance_i);
|
||||
const size_t hits = 4 + popcnt(mask);
|
||||
vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert
|
||||
|
||||
isort_quick_update<8>(dist,dist_A2);
|
||||
isort_quick_update<8>(dist,dist_B2);
|
||||
isort_quick_update<8>(dist,dist_C2);
|
||||
isort_quick_update<8>(dist,dist_D2);
|
||||
|
||||
do {
|
||||
|
||||
distance_i = align_shift_right<1>(distance_i,distance_i);
|
||||
cur = permuteExtract(distance_i,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
const vint8 new_dist(permute(distance_i,vint8(zero)));
|
||||
mask &= mask-1;
|
||||
isort_update<8>(dist,new_dist);
|
||||
|
||||
} while(mask);
|
||||
|
||||
for (size_t i=0; i<7; i++)
|
||||
assert(dist[i+0]>=dist[i+1]);
|
||||
|
||||
for (size_t i=0;i<hits-1;i++)
|
||||
{
|
||||
stackPtr->ptr = permuteExtract(dist,n0,n1);
|
||||
*(float*)&stackPtr->dist = permuteExtract(dist,tNear);
|
||||
dist = align_shift_right<1>(dist,dist);
|
||||
stackPtr++;
|
||||
}
|
||||
cur = permuteExtract(dist,n0,n1);
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
static __forceinline void traverseClosestHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat8& tNear,
|
||||
StackItemT<NodeRef>*& stackPtr,
|
||||
StackItemT<NodeRef>* stackEnd)
|
||||
{
|
||||
assert(mask != 0);
|
||||
#if defined(__AVX512VL__)
|
||||
traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
|
||||
#else
|
||||
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
if (likely(mask == 0)) {
|
||||
assert(cur != BVH::emptyNode);
|
||||
return;
|
||||
}
|
||||
|
||||
/*! two children are hit, push far child, and continue with closer child */
|
||||
NodeRef c0 = cur;
|
||||
const unsigned int d0 = ((unsigned int*)&tNear)[r];
|
||||
r = bscf(mask);
|
||||
NodeRef c1 = node->child(r);
|
||||
BVH::prefetch(c1,types);
|
||||
const unsigned int d1 = ((unsigned int*)&tNear)[r];
|
||||
|
||||
assert(c0 != BVH::emptyNode);
|
||||
assert(c1 != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
assert(stackPtr < stackEnd);
|
||||
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
|
||||
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
|
||||
}
|
||||
#if NEW_SORTING_CODE == 1
|
||||
vint4 s0((size_t)c0,(size_t)d0);
|
||||
vint4 s1((size_t)c1,(size_t)d1);
|
||||
|
||||
r = bscf(mask);
|
||||
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s2((size_t)c2,(size_t)d2);
|
||||
/* 3 hits */
|
||||
if (likely(mask == 0)) {
|
||||
StackItemT<NodeRef>::sort3(s0,s1,s2);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
|
||||
cur = toSizeT(s2);
|
||||
stackPtr+=2;
|
||||
return;
|
||||
}
|
||||
r = bscf(mask);
|
||||
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s3((size_t)c3,(size_t)d3);
|
||||
/* 4 hits */
|
||||
if (likely(mask == 0)) {
|
||||
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
|
||||
cur = toSizeT(s3);
|
||||
stackPtr+=3;
|
||||
return;
|
||||
}
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3;
|
||||
/*! fallback case if more than 4 children are hit */
|
||||
StackItemT<NodeRef>* stackFirst = stackPtr;
|
||||
stackPtr+=4;
|
||||
while (1)
|
||||
{
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r];
|
||||
const vint4 s((size_t)c,(size_t)d);
|
||||
*(vint4*)stackPtr++ = s;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (unlikely(mask == 0)) break;
|
||||
}
|
||||
sort(stackFirst,stackPtr);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
#else
|
||||
/*! Here starts the slow path for 3 or 4 hit children. We push
|
||||
* all nodes onto the stack to sort them there. */
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
|
||||
|
||||
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
return;
|
||||
}
|
||||
|
||||
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
return;
|
||||
}
|
||||
/*! fallback case if more than 4 children are hit */
|
||||
StackItemT<NodeRef>* stackFirst = stackPtr-4;
|
||||
while (1)
|
||||
{
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (unlikely(mask == 0)) break;
|
||||
}
|
||||
sort(stackFirst,stackPtr);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static __forceinline void traverseAnyHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat8& tNear,
|
||||
NodeRef*& stackPtr,
|
||||
NodeRef* stackEnd)
|
||||
{
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
/* simpler in sequence traversal order */
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
|
||||
for (; ;)
|
||||
{
|
||||
r = bscf(mask);
|
||||
cur = node->child(r); BVH::prefetch(cur,types);
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
31
thirdparty/embree/kernels/bvh/node_intersector.h
vendored
Normal file
31
thirdparty/embree/kernels/bvh/node_intersector.h
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct NearFarPrecalculations
|
||||
{
|
||||
size_t nearX, nearY, nearZ;
|
||||
size_t farX, farY, farZ;
|
||||
|
||||
__forceinline NearFarPrecalculations() {}
|
||||
|
||||
__forceinline NearFarPrecalculations(const Vec3fa& dir, size_t N)
|
||||
{
|
||||
const size_t size = sizeof(float)*N;
|
||||
nearX = (dir.x < 0.0f) ? 1*size : 0*size;
|
||||
nearY = (dir.y < 0.0f) ? 3*size : 2*size;
|
||||
nearZ = (dir.z < 0.0f) ? 5*size : 4*size;
|
||||
farX = nearX ^ size;
|
||||
farY = nearY ^ size;
|
||||
farZ = nearZ ^ size;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
1492
thirdparty/embree/kernels/bvh/node_intersector1.h
vendored
Normal file
1492
thirdparty/embree/kernels/bvh/node_intersector1.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
257
thirdparty/embree/kernels/bvh/node_intersector_frustum.h
vendored
Normal file
257
thirdparty/embree/kernels/bvh/node_intersector_frustum.h
vendored
Normal file
@@ -0,0 +1,257 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "node_intersector.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Frustum structure used in hybrid and stream traversal
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
Optimized frustum test. We calculate t=(p-org)/dir in ray/box
|
||||
intersection. We assume the rays are split by octant, thus
|
||||
dir intervals are either positive or negative in each
|
||||
dimension.
|
||||
|
||||
Case 1: dir.min >= 0 && dir.max >= 0:
|
||||
t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
|
||||
t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
|
||||
|
||||
Case 2: dir.min < 0 && dir.max < 0:
|
||||
t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
|
||||
t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
|
||||
*/
|
||||
|
||||
template<bool robust>
|
||||
struct Frustum;
|
||||
|
||||
/* Fast variant */
|
||||
template<>
|
||||
struct Frustum<false>
|
||||
{
|
||||
__forceinline Frustum() {}
|
||||
|
||||
template<int K>
|
||||
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
|
||||
reduce_min(select(valid, org.y, pos_inf)),
|
||||
reduce_min(select(valid, org.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
|
||||
reduce_max(select(valid, org.y, neg_inf)),
|
||||
reduce_max(select(valid, org.z, neg_inf)));
|
||||
|
||||
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
|
||||
reduce_min(select(valid, rdir.y, pos_inf)),
|
||||
reduce_min(select(valid, rdir.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
|
||||
reduce_max(select(valid, rdir.y, neg_inf)),
|
||||
reduce_max(select(valid, rdir.z, neg_inf)));
|
||||
|
||||
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
|
||||
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
|
||||
|
||||
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3fa& reduced_min_org,
|
||||
const Vec3fa& reduced_max_org,
|
||||
const Vec3fa& reduced_min_rdir,
|
||||
const Vec3fa& reduced_max_rdir,
|
||||
float reduced_min_dist,
|
||||
float reduced_max_dist,
|
||||
int N)
|
||||
{
|
||||
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
|
||||
|
||||
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
|
||||
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
|
||||
|
||||
#if defined (__aarch64__)
|
||||
neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
|
||||
neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
|
||||
#else
|
||||
min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
|
||||
max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
|
||||
#endif
|
||||
min_dist = reduced_min_dist;
|
||||
max_dist = reduced_max_dist;
|
||||
|
||||
nf = NearFarPrecalculations(min_rdir, N);
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
|
||||
{
|
||||
max_dist = reduce_max(ray_tfar);
|
||||
}
|
||||
|
||||
NearFarPrecalculations nf;
|
||||
|
||||
Vec3fa min_rdir;
|
||||
Vec3fa max_rdir;
|
||||
|
||||
#if defined (__aarch64__)
|
||||
Vec3fa neg_min_org_rdir;
|
||||
Vec3fa neg_max_org_rdir;
|
||||
#else
|
||||
Vec3fa min_org_rdir;
|
||||
Vec3fa max_org_rdir;
|
||||
#endif
|
||||
float min_dist;
|
||||
float max_dist;
|
||||
};
|
||||
|
||||
typedef Frustum<false> FrustumFast;
|
||||
|
||||
/* Robust variant */
|
||||
template<>
|
||||
struct Frustum<true>
|
||||
{
|
||||
__forceinline Frustum() {}
|
||||
|
||||
template<int K>
|
||||
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
|
||||
reduce_min(select(valid, org.y, pos_inf)),
|
||||
reduce_min(select(valid, org.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
|
||||
reduce_max(select(valid, org.y, neg_inf)),
|
||||
reduce_max(select(valid, org.z, neg_inf)));
|
||||
|
||||
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
|
||||
reduce_min(select(valid, rdir.y, pos_inf)),
|
||||
reduce_min(select(valid, rdir.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
|
||||
reduce_max(select(valid, rdir.y, neg_inf)),
|
||||
reduce_max(select(valid, rdir.z, neg_inf)));
|
||||
|
||||
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
|
||||
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
|
||||
|
||||
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3fa& reduced_min_org,
|
||||
const Vec3fa& reduced_max_org,
|
||||
const Vec3fa& reduced_min_rdir,
|
||||
const Vec3fa& reduced_max_rdir,
|
||||
float reduced_min_dist,
|
||||
float reduced_max_dist,
|
||||
int N)
|
||||
{
|
||||
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
|
||||
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
|
||||
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
|
||||
|
||||
min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
|
||||
max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
|
||||
|
||||
min_dist = reduced_min_dist;
|
||||
max_dist = reduced_max_dist;
|
||||
|
||||
nf = NearFarPrecalculations(min_rdir, N);
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
|
||||
{
|
||||
max_dist = reduce_max(ray_tfar);
|
||||
}
|
||||
|
||||
NearFarPrecalculations nf;
|
||||
|
||||
Vec3fa min_rdir;
|
||||
Vec3fa max_rdir;
|
||||
|
||||
Vec3fa min_org;
|
||||
Vec3fa max_org;
|
||||
|
||||
float min_dist;
|
||||
float max_dist;
|
||||
};
|
||||
|
||||
typedef Frustum<true> FrustumRobust;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
|
||||
const FrustumFast& frustum, vfloat<N>& dist)
|
||||
{
|
||||
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
|
||||
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
|
||||
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
|
||||
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
|
||||
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
|
||||
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
|
||||
|
||||
#if defined (__aarch64__)
|
||||
const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
|
||||
const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
|
||||
const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
|
||||
const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
|
||||
const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
|
||||
const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
|
||||
#else
|
||||
const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
|
||||
const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
|
||||
const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
|
||||
const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
|
||||
const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
|
||||
const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
|
||||
#endif
|
||||
const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
|
||||
dist = fmin;
|
||||
const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
|
||||
const vbool<N> vmask_node_hit = fmin <= fmax;
|
||||
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
|
||||
return m_node;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
|
||||
const FrustumRobust& frustum, vfloat<N>& dist)
|
||||
{
|
||||
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
|
||||
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
|
||||
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
|
||||
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
|
||||
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
|
||||
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
|
||||
|
||||
const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
|
||||
const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
|
||||
const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
|
||||
const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
|
||||
const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
|
||||
const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
|
||||
|
||||
const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
|
||||
const float round_up = 1.0f+2.0f*float(ulp);
|
||||
const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
|
||||
dist = fmin;
|
||||
const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
|
||||
const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);
|
||||
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
|
||||
return m_node;
|
||||
}
|
||||
}
|
||||
}
|
||||
844
thirdparty/embree/kernels/bvh/node_intersector_packet.h
vendored
Normal file
844
thirdparty/embree/kernels/bvh/node_intersector_packet.h
vendored
Normal file
@@ -0,0 +1,844 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "node_intersector.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Ray packet structure used in hybrid traversal
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int K, bool robust>
|
||||
struct TravRayK;
|
||||
|
||||
/* Fast variant */
|
||||
template<int K>
|
||||
struct TravRayK<K, false>
|
||||
{
|
||||
__forceinline TravRayK() {}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
tnear = ray_tnear;
|
||||
tfar = ray_tfar;
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
org = ray_org;
|
||||
dir = ray_dir;
|
||||
rdir = rcp_safe(ray_dir);
|
||||
#if defined(__aarch64__)
|
||||
neg_org_rdir = -(org * rdir);
|
||||
#elif defined(__AVX2__)
|
||||
org_rdir = org * rdir;
|
||||
#endif
|
||||
|
||||
if (N)
|
||||
{
|
||||
const int size = sizeof(float)*N;
|
||||
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
|
||||
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
|
||||
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
|
||||
}
|
||||
}
|
||||
|
||||
Vec3vf<K> org;
|
||||
Vec3vf<K> dir;
|
||||
Vec3vf<K> rdir;
|
||||
#if defined(__aarch64__)
|
||||
Vec3vf<K> neg_org_rdir;
|
||||
#elif defined(__AVX2__)
|
||||
Vec3vf<K> org_rdir;
|
||||
#endif
|
||||
Vec3vi<K> nearXYZ;
|
||||
vfloat<K> tnear;
|
||||
vfloat<K> tfar;
|
||||
};
|
||||
|
||||
template<int K>
|
||||
using TravRayKFast = TravRayK<K, false>;
|
||||
|
||||
/* Robust variant */
|
||||
template<int K>
|
||||
struct TravRayK<K, true>
|
||||
{
|
||||
__forceinline TravRayK() {}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
tnear = ray_tnear;
|
||||
tfar = ray_tfar;
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
org = ray_org;
|
||||
dir = ray_dir;
|
||||
rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
|
||||
|
||||
if (N)
|
||||
{
|
||||
const int size = sizeof(float)*N;
|
||||
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
|
||||
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
|
||||
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
|
||||
}
|
||||
}
|
||||
|
||||
Vec3vf<K> org;
|
||||
Vec3vf<K> dir;
|
||||
Vec3vf<K> rdir;
|
||||
Vec3vi<K> nearXYZ;
|
||||
vfloat<K> tnear;
|
||||
vfloat<K> tfar;
|
||||
};
|
||||
|
||||
template<int K>
|
||||
using TravRayKRobust = TravRayK<K, true>;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
|
||||
const TravRayKFast<K>& ray, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
/* use mixed float/int min/max */
|
||||
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
#if defined(__AVX512F__) // SKX
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
#else
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
#endif
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
|
||||
const TravRayKRobust<K>& ray, vfloat<K>& dist)
|
||||
{
|
||||
// FIXME: use per instruction rounding for AVX512
|
||||
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNodeMB intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
/* use mixed float/int min/max */
|
||||
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
#if defined(__AVX512F__) // SKX
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
#else
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
#endif
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNodeMB intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNodeMB4D intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
|
||||
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
|
||||
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
if (unlikely(ref.isAABBNodeMB4D())) {
|
||||
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
|
||||
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
|
||||
}
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNodeMB4D intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
|
||||
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
|
||||
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
|
||||
if (unlikely(ref.isAABBNodeMB4D())) {
|
||||
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
|
||||
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
|
||||
}
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast OBBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K, bool robust>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
|
||||
const TravRayK<K,robust>& ray, vfloat<K>& dist)
|
||||
{
|
||||
const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
|
||||
Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
|
||||
Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
|
||||
Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
|
||||
|
||||
const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
|
||||
const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
|
||||
const Vec3vf<K> org = xfmPoint(naabb, ray.org);
|
||||
|
||||
const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
|
||||
const vfloat<K> lclipMinY = org.y * nrdir.y;
|
||||
const vfloat<K> lclipMinZ = org.z * nrdir.z;
|
||||
const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
|
||||
const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
|
||||
const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
|
||||
|
||||
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
if (robust) {
|
||||
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
|
||||
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
|
||||
}
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast OBBNodeMB intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K, bool robust>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
|
||||
const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
|
||||
Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
|
||||
Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
|
||||
Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
|
||||
|
||||
const Vec3vf<K> b0_lower = zero;
|
||||
const Vec3vf<K> b0_upper = one;
|
||||
const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
|
||||
const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
|
||||
const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
|
||||
const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
|
||||
|
||||
const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
|
||||
const Vec3vf<K> rdir = rcp_safe(dir);
|
||||
const Vec3vf<K> org = xfmPoint(xfm, ray.org);
|
||||
|
||||
const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
|
||||
|
||||
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
if (robust) {
|
||||
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
|
||||
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
|
||||
}
|
||||
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// QuantizedBaseNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
|
||||
const TravRayK<K,false>& ray, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
const vfloat<N> lower_x = node->dequantizeLowerX();
|
||||
const vfloat<N> upper_x = node->dequantizeUpperX();
|
||||
const vfloat<N> lower_y = node->dequantizeLowerY();
|
||||
const vfloat<N> upper_y = node->dequantizeUpperY();
|
||||
const vfloat<N> lower_z = node->dequantizeLowerZ();
|
||||
const vfloat<N> upper_z = node->dequantizeUpperZ();
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
/* use mixed float/int min/max */
|
||||
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
#if defined(__AVX512F__) // SKX
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
#else
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
#endif
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
|
||||
const TravRayK<K,true>& ray, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
const vfloat<N> lower_x = node->dequantizeLowerX();
|
||||
const vfloat<N> upper_x = node->dequantizeUpperX();
|
||||
const vfloat<N> lower_y = node->dequantizeLowerY();
|
||||
const vfloat<N> upper_y = node->dequantizeUpperY();
|
||||
const vfloat<N> lower_z = node->dequantizeLowerZ();
|
||||
const vfloat<N> upper_z = node->dequantizeUpperZ();
|
||||
|
||||
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
|
||||
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
|
||||
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
|
||||
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
|
||||
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
|
||||
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
|
||||
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
|
||||
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
|
||||
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
|
||||
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
|
||||
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
|
||||
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
|
||||
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
|
||||
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
|
||||
|
||||
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
|
||||
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Node intersectors used in hybrid traversal
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*! Intersects N nodes with K rays */
|
||||
template<int N, int K, int types, bool robust>
|
||||
struct BVHNNodeIntersectorK;
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
|
||||
{
|
||||
/* vmask is both an input and an output parameter! Its initial value should be the parent node
|
||||
hit mask, which is used for correctly computing the current hit mask. The parent hit mask
|
||||
is actually required only for motion blur node intersections (because different rays may
|
||||
have different times), so for regular nodes vmask is simply overwritten. */
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
|
||||
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
|
||||
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
|
||||
assert(node.isOBBNodeMB());
|
||||
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
|
||||
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
|
||||
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
|
||||
assert(node.isOBBNodeMB());
|
||||
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*! Intersects N nodes with K rays */
|
||||
template<int N, int K, bool robust>
|
||||
struct BVHNQuantizedBaseNodeIntersectorK;
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
|
||||
{
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
|
||||
const TravRayK<K,false>& ray, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
|
||||
}
|
||||
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
|
||||
{
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
|
||||
const TravRayK<K,true>& ray, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
|
||||
}
|
||||
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user