initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

190
thirdparty/embree/kernels/bvh/bvh.cpp vendored Normal file
View File

@@ -0,0 +1,190 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_statistics.h"
namespace embree
{
template<int N>
BVHN<N>::BVHN (const PrimitiveType& primTy, Scene* scene)
: AccelData((N==4) ? AccelData::TY_BVH4 : (N==8) ? AccelData::TY_BVH8 : AccelData::TY_UNKNOWN),
primTy(&primTy), device(scene->device), scene(scene),
root(emptyNode), alloc(scene->device,scene->isStaticAccel()), numPrimitives(0), numVertices(0)
{
}
template<int N>
BVHN<N>::~BVHN ()
{
for (size_t i=0; i<objects.size(); i++)
delete objects[i];
}
template<int N>
void BVHN<N>::clear()
{
set(BVHN::emptyNode,empty,0);
alloc.clear();
}
template<int N>
void BVHN<N>::set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives)
{
this->root = root;
this->bounds = bounds;
this->numPrimitives = numPrimitives;
}
template<int N>
void BVHN<N>::clearBarrier(NodeRef& node)
{
if (node.isBarrier())
node.clearBarrier();
else if (!node.isLeaf()) {
BaseNode* n = node.baseNode(); // FIXME: flags should be stored in BVH
for (size_t c=0; c<N; c++)
clearBarrier(n->child(c));
}
}
template<int N>
void BVHN<N>::layoutLargeNodes(size_t num)
{
#if defined(__64BIT__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
struct NodeArea
{
__forceinline NodeArea() {}
__forceinline NodeArea(NodeRef& node, const BBox3fa& bounds)
: node(&node), A(node.isLeaf() ? float(neg_inf) : area(bounds)) {}
__forceinline bool operator< (const NodeArea& other) const {
return this->A < other.A;
}
NodeRef* node;
float A;
};
std::vector<NodeArea> lst;
lst.reserve(num);
lst.push_back(NodeArea(root,empty));
while (lst.size() < num)
{
std::pop_heap(lst.begin(), lst.end());
NodeArea n = lst.back(); lst.pop_back();
if (!n.node->isAABBNode()) break;
AABBNode* node = n.node->getAABBNode();
for (size_t i=0; i<N; i++) {
if (node->child(i) == BVHN::emptyNode) continue;
lst.push_back(NodeArea(node->child(i),node->bounds(i)));
std::push_heap(lst.begin(), lst.end());
}
}
for (size_t i=0; i<lst.size(); i++)
lst[i].node->setBarrier();
root = layoutLargeNodesRecursion(root,alloc.getCachedAllocator());
#endif
}
template<int N>
typename BVHN<N>::NodeRef BVHN<N>::layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator)
{
if (node.isBarrier()) {
node.clearBarrier();
return node;
}
else if (node.isAABBNode())
{
AABBNode* oldnode = node.getAABBNode();
AABBNode* newnode = (BVHN::AABBNode*) allocator.malloc0(sizeof(BVHN::AABBNode),byteNodeAlignment);
*newnode = *oldnode;
for (size_t c=0; c<N; c++)
newnode->child(c) = layoutLargeNodesRecursion(oldnode->child(c),allocator);
return encodeNode(newnode);
}
else return node;
}
template<int N>
double BVHN<N>::preBuild(const std::string& builderName)
{
if (builderName == "")
return inf;
if (device->verbosity(2))
{
Lock<MutexSys> lock(g_printMutex);
std::cout << "building BVH" << N << (builderName.find("MBlur") != std::string::npos ? "MB" : "") << "<" << primTy->name() << "> using " << builderName << " ..." << std::endl << std::flush;
}
double t0 = 0.0;
if (device->benchmark || device->verbosity(2)) t0 = getSeconds();
return t0;
}
template<int N>
void BVHN<N>::postBuild(double t0)
{
if (t0 == double(inf))
return;
double dt = 0.0;
if (device->benchmark || device->verbosity(2))
dt = getSeconds()-t0;
std::unique_ptr<BVHNStatistics<N>> stat;
/* print statistics */
if (device->verbosity(2))
{
if (!stat) stat.reset(new BVHNStatistics<N>(this));
const size_t usedBytes = alloc.getUsedBytes();
Lock<MutexSys> lock(g_printMutex);
std::cout << "finished BVH" << N << "<" << primTy->name() << "> : " << 1000.0f*dt << "ms, " << 1E-6*double(numPrimitives)/dt << " Mprim/s, " << 1E-9*double(usedBytes)/dt << " GB/s" << std::endl;
if (device->verbosity(2))
std::cout << stat->str();
if (device->verbosity(2))
{
FastAllocator::AllStatistics stat(&alloc);
for (size_t i=0; i<objects.size(); i++)
if (objects[i])
stat = stat + FastAllocator::AllStatistics(&objects[i]->alloc);
stat.print(numPrimitives);
}
if (device->verbosity(3))
{
alloc.print_blocks();
for (size_t i=0; i<objects.size(); i++)
if (objects[i])
objects[i]->alloc.print_blocks();
}
std::cout << std::flush;
}
/* benchmark mode */
if (device->benchmark)
{
if (!stat) stat.reset(new BVHNStatistics<N>(this));
Lock<MutexSys> lock(g_printMutex);
std::cout << "BENCHMARK_BUILD " << dt << " " << double(numPrimitives)/dt << " " << stat->sah() << " " << stat->bytesUsed() << " BVH" << N << "<" << primTy->name() << ">" << std::endl << std::flush;
}
}
#if defined(__AVX__)
template class BVHN<8>;
#endif
#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__)
template class BVHN<4>;
#endif
}

235
thirdparty/embree/kernels/bvh/bvh.h vendored Normal file
View File

@@ -0,0 +1,235 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
/* include all node types */
#include "bvh_node_aabb.h"
#include "bvh_node_aabb_mb.h"
#include "bvh_node_aabb_mb4d.h"
#include "bvh_node_obb.h"
#include "bvh_node_obb_mb.h"
#include "bvh_node_qaabb.h"
namespace embree
{
/*! flags used to enable specific node types in intersectors */
enum BVHNodeFlags
{
BVH_FLAG_ALIGNED_NODE = 0x00001,
BVH_FLAG_ALIGNED_NODE_MB = 0x00010,
BVH_FLAG_UNALIGNED_NODE = 0x00100,
BVH_FLAG_UNALIGNED_NODE_MB = 0x01000,
BVH_FLAG_QUANTIZED_NODE = 0x100000,
BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000,
/* short versions */
BVH_AN1 = BVH_FLAG_ALIGNED_NODE,
BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB,
BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
BVH_UN1 = BVH_FLAG_UNALIGNED_NODE,
BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB,
BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE,
BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB,
BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB,
BVH_QN1 = BVH_FLAG_QUANTIZED_NODE
};
/*! Multi BVH with N children. Each node stores the bounding box of
* it's N children as well as N child references. */
template<int N>
class BVHN : public AccelData
{
ALIGNED_CLASS_(16);
public:
/*! forward declaration of node ref type */
typedef NodeRefPtr<N> NodeRef;
typedef BaseNode_t<NodeRef,N> BaseNode;
typedef AABBNode_t<NodeRef,N> AABBNode;
typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB;
typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D;
typedef OBBNode_t<NodeRef,N> OBBNode;
typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB;
typedef QuantizedBaseNode_t<N> QuantizedBaseNode;
typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB;
typedef QuantizedNode_t<NodeRef,N> QuantizedNode;
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
static const size_t byteAlignment = 16;
static const size_t byteNodeAlignment = 4*N;
/*! Empty node */
static const size_t emptyNode = NodeRef::emptyNode;
/*! Invalid node, used as marker in traversal */
static const size_t invalidNode = NodeRef::invalidNode;
static const size_t popRay = NodeRef::popRay;
/*! Maximum depth of the BVH. */
static const size_t maxBuildDepth = 32;
static const size_t maxBuildDepthLeaf = maxBuildDepth+8;
static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder
/*! Maximum number of primitive blocks in a leaf. */
static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks;
public:
/*! Builder interface to create allocator */
struct CreateAlloc : public FastAllocator::Create {
__forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {}
};
typedef BVHNodeRecord<NodeRef> NodeRecord;
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
public:
/*! BVHN default constructor. */
BVHN (const PrimitiveType& primTy, Scene* scene);
/*! BVHN destruction */
~BVHN ();
/*! clears the acceleration structure */
void clear();
/*! sets BVH members after build */
void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives);
/*! Clears the barrier bits of a subtree. */
void clearBarrier(NodeRef& node);
/*! lays out num large nodes of the BVH */
void layoutLargeNodes(size_t num);
NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator);
/*! called by all builders before build starts */
double preBuild(const std::string& builderName);
/*! called by all builders after build ended */
void postBuild(double t0);
/*! allocator class */
struct Allocator {
BVHN* bvh;
Allocator (BVHN* bvh) : bvh(bvh) {}
__forceinline void* operator() (size_t bytes) const {
return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes);
}
};
/*! post build cleanup */
void cleanup() {
alloc.cleanup();
}
public:
/*! Encodes a node */
static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); }
static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); }
public:
/*! Prefetches the node this reference points to */
__forceinline static void prefetch(const NodeRef ref, int types=0)
{
#if defined(__AVX512PF__) // MIC
if (types != BVH_FLAG_QUANTIZED_NODE) {
prefetchL2(((char*)ref.ptr)+0*64);
prefetchL2(((char*)ref.ptr)+1*64);
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
prefetchL2(((char*)ref.ptr)+2*64);
prefetchL2(((char*)ref.ptr)+3*64);
}
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
/* KNL still needs L2 prefetches for large nodes */
prefetchL2(((char*)ref.ptr)+4*64);
prefetchL2(((char*)ref.ptr)+5*64);
prefetchL2(((char*)ref.ptr)+6*64);
prefetchL2(((char*)ref.ptr)+7*64);
}
}
else
{
/* todo: reduce if 32bit offsets are enabled */
prefetchL2(((char*)ref.ptr)+0*64);
prefetchL2(((char*)ref.ptr)+1*64);
prefetchL2(((char*)ref.ptr)+2*64);
}
#else
if (types != BVH_FLAG_QUANTIZED_NODE) {
prefetchL1(((char*)ref.ptr)+0*64);
prefetchL1(((char*)ref.ptr)+1*64);
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
prefetchL1(((char*)ref.ptr)+2*64);
prefetchL1(((char*)ref.ptr)+3*64);
}
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
/* deactivate for large nodes on Xeon, as it introduces regressions */
//prefetchL1(((char*)ref.ptr)+4*64);
//prefetchL1(((char*)ref.ptr)+5*64);
//prefetchL1(((char*)ref.ptr)+6*64);
//prefetchL1(((char*)ref.ptr)+7*64);
}
}
else
{
/* todo: reduce if 32bit offsets are enabled */
prefetchL1(((char*)ref.ptr)+0*64);
prefetchL1(((char*)ref.ptr)+1*64);
prefetchL1(((char*)ref.ptr)+2*64);
}
#endif
}
__forceinline static void prefetchW(const NodeRef ref, int types=0)
{
embree::prefetchEX(((char*)ref.ptr)+0*64);
embree::prefetchEX(((char*)ref.ptr)+1*64);
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
embree::prefetchEX(((char*)ref.ptr)+2*64);
embree::prefetchEX(((char*)ref.ptr)+3*64);
}
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
embree::prefetchEX(((char*)ref.ptr)+4*64);
embree::prefetchEX(((char*)ref.ptr)+5*64);
embree::prefetchEX(((char*)ref.ptr)+6*64);
embree::prefetchEX(((char*)ref.ptr)+7*64);
}
}
/*! bvh type information */
public:
const PrimitiveType* primTy; //!< primitive type stored in the BVH
/*! bvh data */
public:
Device* device; //!< device pointer
Scene* scene; //!< scene pointer
NodeRef root; //!< root node
FastAllocator alloc; //!< allocator used to allocate nodes
/*! statistics data */
public:
size_t numPrimitives; //!< number of primitives the BVH is build over
size_t numVertices; //!< number of vertices the BVH references
/*! data arrays for special builders */
public:
std::vector<BVHN*> objects;
vector_t<char,aligned_allocator<char,32>> subdiv_patches;
};
typedef BVHN<4> BVH4;
typedef BVHN<8> BVH8;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,318 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_factory.h"
namespace embree
{
/*! BVH4 instantiations */
class BVH4Factory : public BVHFactory
{
public:
BVH4Factory(int bfeatures, int ifeatures);
public:
Accel* BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4i);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8i);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4v);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4iMB);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
Accel* BVH4Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::ROBUST);
Accel* BVH4Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4QuantizedTriangle4i(Scene* scene);
Accel* BVH4QuantizedQuad4i(Scene* scene);
Accel* BVH4SubdivPatch1(Scene* scene);
Accel* BVH4SubdivPatch1MB(Scene* scene);
Accel* BVH4UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4UserGeometryMB(Scene* scene);
Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
Accel* BVH4InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4InstanceArrayMB(Scene* scene);
Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
private:
void selectBuilders(int features);
void selectIntersectors(int features);
private:
Accel::Intersectors BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors QBVH4Quad4iIntersectors(BVH4* bvh);
Accel::Intersectors QBVH4Triangle4iIntersectors(BVH4* bvh);
Accel::Intersectors BVH4UserGeometryIntersectors(BVH4* bvh);
Accel::Intersectors BVH4UserGeometryMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceArrayIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceArrayMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
private:
DEFINE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
// ==============
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
// ==============
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
// SAH scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
// spatial scene builder
private:
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
// twolevel scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
};
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,284 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_factory.h"
namespace embree
{
/*! BVH8 instantiations */
class BVH8Factory : public BVHFactory
{
public:
BVH8Factory(int bfeatures, int ifeatures);
public:
Accel* BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant);
Accel* BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
Accel* BVH8Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8QuantizedTriangle4i(Scene* scene);
Accel* BVH8QuantizedTriangle4(Scene* scene);
Accel* BVH8QuantizedQuad4i(Scene* scene);
Accel* BVH8UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8UserGeometryMB(Scene* scene);
Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
Accel* BVH8InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8InstanceArrayMB(Scene* scene);
Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
private:
void selectBuilders(int features);
void selectIntersectors(int features);
private:
Accel::Intersectors BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors QBVH8Triangle4iIntersectors(BVH8* bvh);
Accel::Intersectors QBVH8Triangle4Intersectors(BVH8* bvh);
Accel::Intersectors QBVH8Quad4iIntersectors(BVH8* bvh);
Accel::Intersectors BVH8UserGeometryIntersectors(BVH8* bvh);
Accel::Intersectors BVH8UserGeometryMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceArrayIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceArrayMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
private:
DEFINE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
// SAH scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
// SAH spatial scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
// twolevel scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
};
}

View File

@@ -0,0 +1,60 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_builder.h"
namespace embree
{
namespace isa
{
template<int N>
typename BVHN<N>::NodeRef BVHNBuilderVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
{
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
return createLeaf(prims,set,alloc);
};
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
return BVHBuilderBinnedSAH::build<NodeRef>
(FastAllocator::Create(allocator),typename BVH::AABBNode::Create2(),typename BVH::AABBNode::Set3(allocator,prims),createLeafFunc,progressFunc,prims,pinfo,settings);
}
template<int N>
typename BVHN<N>::NodeRef BVHNBuilderQuantizedVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
{
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
return createLeaf(prims,set,alloc);
};
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
return BVHBuilderBinnedSAH::build<NodeRef>
(FastAllocator::Create(allocator),typename BVH::QuantizedNode::Create2(),typename BVH::QuantizedNode::Set2(),createLeafFunc,progressFunc,prims,pinfo,settings);
}
template<int N>
typename BVHN<N>::NodeRecordMB BVHNBuilderMblurVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange)
{
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRecordMB {
return createLeaf(prims,set,alloc);
};
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
return BVHBuilderBinnedSAH::build<NodeRecordMB>
(FastAllocator::Create(allocator),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::SetTimeRange(timeRange),createLeafFunc,progressFunc,prims,pinfo,settings);
}
template struct BVHNBuilderVirtual<4>;
template struct BVHNBuilderQuantizedVirtual<4>;
template struct BVHNBuilderMblurVirtual<4>;
#if defined(__AVX__)
template struct BVHNBuilderVirtual<8>;
template struct BVHNBuilderQuantizedVirtual<8>;
template struct BVHNBuilderMblurVirtual<8>;
#endif
}
}

View File

@@ -0,0 +1,115 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "../builders/bvh_builder_sah.h"
#include "../builders/bvh_builder_msmblur.h"
namespace embree
{
namespace isa
{
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N>
struct BVHNBuilderVirtual
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef FastAllocator::CachedAllocator Allocator;
struct BVHNBuilderV {
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
};
template<typename CreateLeafFunc>
struct BVHNBuilderT : public BVHNBuilderV
{
BVHNBuilderT (CreateLeafFunc createLeafFunc)
: createLeafFunc(createLeafFunc) {}
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
return createLeafFunc(prims,set,alloc);
}
private:
CreateLeafFunc createLeafFunc;
};
template<typename CreateLeafFunc>
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
}
};
template<int N>
struct BVHNBuilderQuantizedVirtual
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef FastAllocator::CachedAllocator Allocator;
struct BVHNBuilderV {
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
};
template<typename CreateLeafFunc>
struct BVHNBuilderT : public BVHNBuilderV
{
BVHNBuilderT (CreateLeafFunc createLeafFunc)
: createLeafFunc(createLeafFunc) {}
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
return createLeafFunc(prims,set,alloc);
}
private:
CreateLeafFunc createLeafFunc;
};
template<typename CreateLeafFunc>
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
}
};
template<int N>
struct BVHNBuilderMblurVirtual
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNodeMB AABBNodeMB;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB NodeRecordMB;
typedef FastAllocator::CachedAllocator Allocator;
struct BVHNBuilderV {
NodeRecordMB build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange);
virtual NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
};
template<typename CreateLeafFunc>
struct BVHNBuilderT : public BVHNBuilderV
{
BVHNBuilderT (CreateLeafFunc createLeafFunc)
: createLeafFunc(createLeafFunc) {}
NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
return createLeafFunc(prims,set,alloc);
}
private:
CreateLeafFunc createLeafFunc;
};
template<typename CreateLeafFunc>
static NodeRecordMB build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) {
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings,timeRange);
}
};
}
}

View File

@@ -0,0 +1,583 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_statistics.h"
#include "bvh_rotate.h"
#include "../common/profile.h"
#include "../../common/algorithms/parallel_prefix_sum.h"
#include "../builders/primrefgen.h"
#include "../builders/bvh_builder_morton.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#if defined(__64BIT__)
# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
#else
# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
#endif
namespace embree
{
namespace isa
{
template<int N>
struct SetBVHNBounds
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
typedef typename BVH::AABBNode AABBNode;
BVH* bvh;
__forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num)
{
AABBNode* node = ref.getAABBNode();
BBox3fa res = empty;
for (size_t i=0; i<num; i++) {
const BBox3fa b = children[i].bounds;
res.extend(b);
node->setRef(i,children[i].ref);
node->setBounds(i,b);
}
BBox3fx result = (BBox3fx&)res;
#if ROTATE_TREE
if (N == 4)
{
size_t n = 0;
for (size_t i=0; i<num; i++)
n += children[i].bounds.lower.a;
if (n >= 4096) {
for (size_t i=0; i<num; i++) {
if (children[i].bounds.lower.a < 4096) {
for (int j=0; j<ROTATE_TREE; j++)
BVHNRotate<N>::rotate(node->child(i));
node->child(i).setBarrier();
}
}
}
result.lower.a = unsigned(n);
}
#endif
return NodeRecord(ref,result);
}
};
template<int N, typename Primitive>
struct CreateMortonLeaf;
template<int N>
struct CreateMortonLeaf<N,Triangle4>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
const TriangleMesh* __restrict__ const mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = unsigned(current.size());
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Triangle4v>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
const TriangleMesh* __restrict__ mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Triangle4i>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 v0 = zero, v1 = zero, v2 = zero;
vuint4 vgeomID = -1, vprimID = -1;
const TriangleMesh* __restrict__ const mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID[i] = geomID_;
vprimID[i] = primID;
unsigned int int_stride = mesh->vertices0.getStride()/4;
v0[i] = tri.v[0] * int_stride;
v1[i] = tri.v[1] * int_stride;
v2[i] = tri.v[2] * int_stride;
}
for (size_t i=items; i<4; i++)
{
vgeomID[i] = vgeomID[0];
vprimID[i] = -1;
v0[i] = 0;
v1[i] = 0;
v2[i] = 0;
}
Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Quad4v>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero;
const QuadMesh* __restrict__ mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const QuadMesh::Quad& tri = mesh->quad(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
const Vec3fa& p3 = mesh->vertex(tri.v[3]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
}
Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
QuadMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Object>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
/* allocate leaf node */
Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const UserGeometry* mesh = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int index = morton[start+i].index;
const unsigned int primID = index;
bounds.extend(mesh->bounds(primID));
new (&accel[i]) Object(geomID_,primID);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
UserGeometry* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,InstancePrimitive>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items <= 1);
/* allocate leaf node */
InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const Instance* instance = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
bounds.extend(instance->bounds(primID));
new (&accel[i]) InstancePrimitive(instance, geomID_);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
Instance* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,InstanceArrayPrimitive>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (InstanceArray* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items <= 1);
/* allocate leaf node */
InstanceArrayPrimitive* accel = (InstanceArrayPrimitive*) alloc.malloc1(items*sizeof(InstanceArrayPrimitive),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const InstanceArray* instance = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
bounds.extend(instance->bounds(primID));
new (&accel[i]) InstanceArrayPrimitive(geomID_, primID);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
InstanceArray* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<typename Mesh>
struct CalculateMeshBounds
{
__forceinline CalculateMeshBounds (Mesh* mesh)
: mesh(mesh) {}
__forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) {
return mesh->bounds(morton.index);
}
private:
Mesh* mesh;
};
template<int N, typename Mesh, typename Primitive>
class BVHNMeshBuilderMorton : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
public:
BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD)
: bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {}
/* build function */
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
morton.clear();
}
size_t numPrimitives = mesh->size();
numPreviousPrimitives = numPrimitives;
/* skip build for empty scene */
if (numPrimitives == 0) {
bvh->set(BVH::emptyNode,empty,0);
return;
}
/* preallocate arrays */
morton.resize(numPrimitives);
size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive));
size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim);
bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes
bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated);
/* create morton code array */
BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes);
size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface);
/* create BVH */
SetBVHNBounds<N> setBounds(bvh);
CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data());
CalculateMeshBounds<Mesh> calculateBounds(mesh);
auto root = BVHBuilderMorton::build<NodeRecord>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create(),
setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface,
morton.data(),dest,numPrimitivesGen,settings);
bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives);
#if ROTATE_TREE
if (N == 4)
{
for (int i=0; i<ROTATE_TREE; i++)
BVHNRotate<N>::rotate(bvh->root);
bvh->clearBarrier(bvh->root);
}
#endif
/* clear temporary data for static geometry */
if (bvh->scene->isStaticAccel()) {
morton.clear();
}
bvh->cleanup();
}
void clear() {
morton.clear();
}
private:
BVH* bvh;
Mesh* mesh;
mvector<BVHBuilderMorton::BuildPrim> morton;
BVHBuilderMorton::Settings settings;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
};
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); }
Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); }
Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); }
#if defined(__AVX__)
Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); }
Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); }
Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); }
#if defined(__AVX__)
Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
}
}

View File

@@ -0,0 +1,565 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_builder.h"
#include "../builders/primrefgen.h"
#include "../builders/splitter.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglev_mb.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
#include "../../common/algorithms/parallel_for_for.h"
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
#define PROFILE 0
#define PROFILE_RUNS 20
namespace embree
{
namespace isa
{
template<int N, typename Primitive>
struct CreateLeaf
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeaf (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t n = set.size();
size_t items = Primitive::blocks(n);
size_t start = set.begin();
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return node;
}
BVH* bvh;
};
template<int N, typename Primitive>
struct CreateLeafQuantized
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeafQuantized (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t n = set.size();
size_t items = Primitive::blocks(n);
size_t start = set.begin();
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return node;
}
BVH* bvh;
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N, typename Primitive>
struct BVHNBuilderSAH : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
Geometry* mesh;
mvector<PrimRef> prims;
GeneralBVHBuilder::Settings settings;
Geometry::GTypeMask gtype_;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
bool primrefarrayalloc;
unsigned int numPreviousPrimitives = 0;
BVHNBuilderSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize,
const Geometry::GTypeMask gtype, bool primrefarrayalloc = false)
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0),
settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), primrefarrayalloc(primrefarrayalloc) {}
BVHNBuilderSAH (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID), primrefarrayalloc(false) {}
// FIXME: shrink bvh->alloc in destructor here and in other builders too
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* if we use the primrefarray for allocations we have to take it back from the BVH */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.unshare(prims);
/* skip build for empty scene */
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
numPreviousPrimitives = numPrimitives;
if (numPrimitives == 0) {
bvh->clear();
prims.clear();
return;
}
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
#if PROFILE
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
#endif
/* create primref array */
if (primrefarrayalloc) {
settings.primrefarrayalloc = numPrimitives/1000;
if (settings.primrefarrayalloc < 1000)
settings.primrefarrayalloc = inf;
}
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
/* initialize allocator */
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
prims.resize(numPrimitives);
PrimInfo pinfo = mesh ?
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
/* pinfo might has zero size due to invalid geometry */
if (unlikely(pinfo.size() == 0))
{
bvh->clear();
prims.clear();
return;
}
/* call BVH builder */
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeaf<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
#if PROFILE
});
#endif
/* if we allocated using the primrefarray we have to keep it alive */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.share(prims);
/* for static geometries we can do some cleanups */
else if (scene && scene->isStaticAccel()) {
prims.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N, typename Primitive>
struct BVHNBuilderSAHQuantized : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
Geometry* mesh;
mvector<PrimRef> prims;
GeneralBVHBuilder::Settings settings;
Geometry::GTypeMask gtype_;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
BVHNBuilderSAHQuantized (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype) {}
BVHNBuilderSAHQuantized (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID) {}
// FIXME: shrink bvh->alloc in destructor here and in other builders too
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* skip build for empty scene */
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
numPreviousPrimitives = numPrimitives;
if (numPrimitives == 0) {
prims.clear();
bvh->clear();
return;
}
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::QBVH" + toString(N) + "BuilderSAH");
#if PROFILE
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
#endif
/* create primref array */
prims.resize(numPrimitives);
PrimInfo pinfo = mesh ?
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
/* call BVH builder */
const size_t node_bytes = numPrimitives*sizeof(typename BVH::QuantizedNode)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
NodeRef root = BVHNBuilderQuantizedVirtual<N>::build(&bvh->alloc,CreateLeafQuantized<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
//bvh->layoutLargeNodes(pinfo.size()*0.005f); // FIXME: COPY LAYOUT FOR LARGE NODES !!!
#if PROFILE
});
#endif
/* clear temporary data for static geometry */
if (scene && scene->isStaticAccel()) {
prims.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N, typename Primitive>
struct CreateLeafGrid
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeafGrid (BVH* bvh, const SubGridBuildData * const sgrids) : bvh(bvh),sgrids(sgrids) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
const size_t items = set.size(); //Primitive::blocks(n);
const size_t start = set.begin();
/* collect all subsets with unique geomIDs */
assert(items <= N);
unsigned int geomIDs[N];
unsigned int num_geomIDs = 1;
geomIDs[0] = prims[start].geomID();
for (size_t i=1;i<items;i++)
{
bool found = false;
const unsigned int new_geomID = prims[start+i].geomID();
for (size_t j=0;j<num_geomIDs;j++)
if (new_geomID == geomIDs[j])
{ found = true; break; }
if (!found)
geomIDs[num_geomIDs++] = new_geomID;
}
/* allocate all leaf memory in one single block */
SubGridQBVHN<N>* accel = (SubGridQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridQBVHN<N>),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,num_geomIDs);
for (size_t g=0;g<num_geomIDs;g++)
{
unsigned int x[N];
unsigned int y[N];
unsigned int primID[N];
BBox3fa bounds[N];
unsigned int pos = 0;
for (size_t i=0;i<items;i++)
{
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
const SubGridBuildData& sgrid_bd = sgrids[prims[start+i].primID()];
x[pos] = sgrid_bd.sx;
y[pos] = sgrid_bd.sy;
primID[pos] = sgrid_bd.primID;
bounds[pos] = prims[start+i].bounds();
pos++;
}
assert(pos <= N);
new (&accel[g]) SubGridQBVHN<N>(x,y,primID,bounds,geomIDs[g],pos);
}
return node;
}
BVH* bvh;
const SubGridBuildData * const sgrids;
};
template<int N>
struct BVHNBuilderSAHGrid : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
GridMesh* mesh;
mvector<PrimRef> prims;
mvector<SubGridBuildData> sgrids;
GeneralBVHBuilder::Settings settings;
const unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD) {}
BVHNBuilderSAHGrid (BVH* bvh, GridMesh* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), geomID_(geomID) {}
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* if we use the primrefarray for allocations we have to take it back from the BVH */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.unshare(prims);
const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false);
numPreviousPrimitives = numGridPrimitives;
PrimInfo pinfo = mesh ? createPrimRefArrayGrids(mesh,prims,sgrids) : createPrimRefArrayGrids(scene,prims,sgrids);
const size_t numPrimitives = pinfo.size();
/* no primitives */
if (numPrimitives == 0) {
bvh->clear();
prims.clear();
sgrids.clear();
return;
}
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
/* create primref array */
settings.primrefarrayalloc = numPrimitives/1000;
if (settings.primrefarrayalloc < 1000)
settings.primrefarrayalloc = inf;
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
/* initialize allocator */
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
/* pinfo might has zero size due to invalid geometry */
if (unlikely(pinfo.size() == 0))
{
bvh->clear();
sgrids.clear();
prims.clear();
return;
}
/* call BVH builder */
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafGrid<N,SubGridQBVHN<N>>(bvh,sgrids.data()),bvh->scene->progressInterface,prims.data(),pinfo,settings);
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
/* clear temporary array */
sgrids.clear();
/* if we allocated using the primrefarray we have to keep it alive */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.share(prims);
/* for static geometries we can do some cleanups */
else if (scene && scene->isStaticAccel()) {
prims.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
#if defined(__AVX__)
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4Quad4iMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
Builder* BVH4QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
#if defined(__AVX__)
Builder* BVH8Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH8Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
Builder* BVH8QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH8QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_min_leaf_size;
int maxLeafSize = scene->device->object_accel_max_leaf_size;
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
}
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,UserGeometry::geom_type);
}
#if defined(__AVX__)
Builder* BVH8VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_min_leaf_size;
int maxLeafSize = scene->device->object_accel_max_leaf_size;
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
}
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,UserGeometry::geom_type);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
}
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
}
#if defined(__AVX__)
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
}
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
}
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,1,gtype);
}
#if defined(__AVX__)
Builder* BVH8InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
}
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_GRID)
Builder* BVH4GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,4,mode); }
Builder* BVH4GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4,mode); } // FIXME: check whether cost factors are correct
#if defined(__AVX__)
Builder* BVH8GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,mesh,geomID,8,1.0f,8,8,mode); }
Builder* BVH8GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8,mode); } // FIXME: check whether cost factors are correct
#endif
#endif
}
}

View File

@@ -0,0 +1,713 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_builder.h"
#include "../builders/bvh_builder_msmblur.h"
#include "../builders/primrefgen.h"
#include "../builders/splitter.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglev_mb.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
// FIXME: remove after removing BVHNBuilderMBlurRootTimeSplitsSAH
#include "../../common/algorithms/parallel_for_for.h"
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
namespace embree
{
namespace isa
{
#if 0
template<int N, typename Primitive>
struct CreateMBlurLeaf
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB NodeRecordMB;
__forceinline CreateMBlurLeaf (BVH* bvh, PrimRef* prims, size_t time) : bvh(bvh), prims(prims), time(time) {}
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t items = Primitive::blocks(set.size());
size_t start = set.begin();
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
NodeRef node = bvh->encodeLeaf((char*)accel,items);
LBBox3fa allBounds = empty;
for (size_t i=0; i<items; i++)
allBounds.extend(accel[i].fillMB(prims, start, set.end(), bvh->scene, time));
return NodeRecordMB(node,allBounds);
}
BVH* bvh;
PrimRef* prims;
size_t time;
};
#endif
template<int N, typename Mesh, typename Primitive>
struct CreateMSMBlurLeaf
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
__forceinline CreateMSMBlurLeaf (BVH* bvh) : bvh(bvh) {}
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
{
size_t items = Primitive::blocks(current.prims.size());
size_t start = current.prims.begin();
size_t end = current.prims.end();
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteNodeAlignment);
NodeRef node = bvh->encodeLeaf((char*)accel,items);
LBBox3fa allBounds = empty;
for (size_t i=0; i<items; i++)
allBounds.extend(accel[i].fillMB(current.prims.prims->data(), start, current.prims.end(), bvh->scene, current.prims.time_range));
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
}
BVH* bvh;
};
/* Motion blur BVH with 4D nodes and internal time splits */
template<int N, typename Mesh, typename Primitive>
struct BVHNBuilderMBlurSAH : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
BVH* bvh;
Scene* scene;
const size_t sahBlockSize;
const float intCost;
const size_t minLeafSize;
const size_t maxLeafSize;
const Geometry::GTypeMask gtype_;
BVHNBuilderMBlurSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks)), gtype_(gtype) {}
void build()
{
/* skip build for empty scene */
const size_t numPrimitives = scene->getNumPrimitives(gtype_,true);
if (numPrimitives == 0) { bvh->clear(); return; }
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAH");
#if PROFILE
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
#endif
//const size_t numTimeSteps = scene->getNumTimeSteps<typename Mesh::type_t,true>();
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
/*if (numTimeSegments == 1)
buildSingleSegment(numPrimitives);
else*/
buildMultiSegment(numPrimitives);
#if PROFILE
});
#endif
/* clear temporary data for static geometry */
bvh->cleanup();
bvh->postBuild(t0);
}
#if 0 // No longer compatible when time_ranges are present for geometries. Would have to create temporal nodes sometimes, and put only a single geometry into leaf.
void buildSingleSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRef> prims(scene->device,numPrimitives);
const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface,0);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
GeneralBVHBuilder::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
(typename BVH::CreateAlloc(bvh),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::Set(),
CreateMBlurLeaf<N,Primitive>(bvh,prims.data(),0),bvh->scene->progressInterface,
prims.data(),pinfo,settings);
bvh->set(root.ref,root.lbounds,pinfo.size());
}
#endif
void buildMultiSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRefMB> prims(scene->device,numPrimitives);
PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
BVHBuilderMSMBlur::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxDepth;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleLeafTimeSegment = Primitive::singleTimeSegment;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root =
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
RecalculatePrimRef<Mesh>(scene),
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNodeMB4D::Create(),
typename BVH::AABBNodeMB4D::Set(),
CreateMSMBlurLeaf<N,Mesh,Primitive>(bvh),
bvh->scene->progressInterface,
settings);
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
}
void clear() {
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
struct GridRecalculatePrimRef
{
Scene* scene;
const SubGridBuildData * const sgrids;
__forceinline GridRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids)
: scene(scene), sgrids(sgrids) {}
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
{
const unsigned int geomID = prim.geomID();
const GridMesh* mesh = scene->get<GridMesh>(geomID);
const unsigned int buildID = prim.primID();
const SubGridBuildData &subgrid = sgrids[buildID];
const unsigned int primID = subgrid.primID;
const size_t x = subgrid.x();
const size_t y = subgrid.y();
const LBBox3fa lbounds = mesh->linearBounds(mesh->grid(primID),x,y,time_range);
const unsigned num_time_segments = mesh->numTimeSegments();
const range<int> tbounds = mesh->timeSegmentRange(time_range);
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, num_time_segments, geomID, buildID);
}
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
const unsigned int geomID = prim.geomID();
const GridMesh* mesh = scene->get<GridMesh>(geomID);
const unsigned int buildID = prim.primID();
const SubGridBuildData &subgrid = sgrids[buildID];
const unsigned int primID = subgrid.primID;
const size_t x = subgrid.x();
const size_t y = subgrid.y();
return mesh->linearBounds(mesh->grid(primID),x,y,time_range);
}
};
template<int N>
struct CreateMSMBlurLeafGrid
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
__forceinline CreateMSMBlurLeafGrid (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) : scene(scene), bvh(bvh), sgrids(sgrids) {}
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
{
const size_t items = current.prims.size();
const size_t start = current.prims.begin();
const PrimRefMB* prims = current.prims.prims->data();
/* collect all subsets with unique geomIDs */
assert(items <= N);
unsigned int geomIDs[N];
unsigned int num_geomIDs = 1;
geomIDs[0] = prims[start].geomID();
for (size_t i=1;i<items;i++)
{
bool found = false;
const unsigned int new_geomID = prims[start+i].geomID();
for (size_t j=0;j<num_geomIDs;j++)
if (new_geomID == geomIDs[j])
{ found = true; break; }
if (!found)
geomIDs[num_geomIDs++] = new_geomID;
}
/* allocate all leaf memory in one single block */
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
LBBox3fa allBounds = empty;
for (size_t g=0;g<num_geomIDs;g++)
{
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
unsigned int x[N];
unsigned int y[N];
unsigned int primID[N];
BBox3fa bounds0[N];
BBox3fa bounds1[N];
unsigned int pos = 0;
for (size_t i=0;i<items;i++)
{
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
x[pos] = sgrid_bd.sx;
y[pos] = sgrid_bd.sy;
primID[pos] = sgrid_bd.primID;
const size_t x = sgrid_bd.x();
const size_t y = sgrid_bd.y();
LBBox3fa newBounds = mesh->linearBounds(mesh->grid(sgrid_bd.primID),x,y,current.prims.time_range);
allBounds.extend(newBounds);
bounds0[pos] = newBounds.bounds0;
bounds1[pos] = newBounds.bounds1;
pos++;
}
assert(pos <= N);
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],current.prims.time_range.lower,1.0f/current.prims.time_range.size(),pos);
}
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
}
Scene *scene;
BVH* bvh;
const SubGridBuildData * const sgrids;
};
#if 0
template<int N>
struct CreateLeafGridMB
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB NodeRecordMB;
__forceinline CreateLeafGridMB (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids)
: scene(scene), bvh(bvh), sgrids(sgrids) {}
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
const size_t items = set.size();
const size_t start = set.begin();
/* collect all subsets with unique geomIDs */
assert(items <= N);
unsigned int geomIDs[N];
unsigned int num_geomIDs = 1;
geomIDs[0] = prims[start].geomID();
for (size_t i=1;i<items;i++)
{
bool found = false;
const unsigned int new_geomID = prims[start+i].geomID();
for (size_t j=0;j<num_geomIDs;j++)
if (new_geomID == geomIDs[j])
{ found = true; break; }
if (!found)
geomIDs[num_geomIDs++] = new_geomID;
}
/* allocate all leaf memory in one single block */
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
LBBox3fa allBounds = empty;
for (size_t g=0;g<num_geomIDs;g++)
{
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
unsigned int x[N];
unsigned int y[N];
unsigned int primID[N];
BBox3fa bounds0[N];
BBox3fa bounds1[N];
unsigned int pos = 0;
for (size_t i=0;i<items;i++)
{
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
x[pos] = sgrid_bd.sx;
y[pos] = sgrid_bd.sy;
primID[pos] = sgrid_bd.primID;
const size_t x = sgrid_bd.x();
const size_t y = sgrid_bd.y();
bool MAYBE_UNUSED valid0 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,0,bounds0[pos]);
bool MAYBE_UNUSED valid1 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,1,bounds1[pos]);
assert(valid0);
assert(valid1);
allBounds.extend(LBBox3fa(bounds0[pos],bounds1[pos]));
pos++;
}
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],0.0f,1.0f,pos);
}
return NodeRecordMB(node,allBounds);
}
Scene *scene;
BVH* bvh;
const SubGridBuildData * const sgrids;
};
#endif
/* Motion blur BVH with 4D nodes and internal time splits */
template<int N>
struct BVHNBuilderMBlurSAHGrid : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
BVH* bvh;
Scene* scene;
const size_t sahBlockSize;
const float intCost;
const size_t minLeafSize;
const size_t maxLeafSize;
mvector<SubGridBuildData> sgrids;
BVHNBuilderMBlurSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize)
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,BVH::maxLeafBlocks)), sgrids(scene->device,0) {}
PrimInfo createPrimRefArrayMBlurGrid(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
{
/* first run to get #primitives */
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator<GridMesh,true> iter(scene);
pstate.init(iter,size_t(1024));
/* iterate over all meshes in the scene */
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j,range<size_t>(0,1))) continue;
BBox3fa bounds = empty;
const PrimRef prim(bounds,unsigned(geomID),unsigned(j));
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
}
return pinfo;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
size_t numPrimitives = pinfo.size();
if (numPrimitives == 0) return pinfo;
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
k = base.size();
size_t p_index = k;
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
const GridMesh::Grid &g = mesh->grid(j);
if (!mesh->valid(j,range<size_t>(0,1))) continue;
for (unsigned int y=0; y<g.resY-1u; y+=2)
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
BBox3fa bounds = empty;
if (!mesh->buildBounds(g,x,y,itime,bounds)) continue; // get bounds of subgrid
const PrimRef prim(bounds,unsigned(geomID),unsigned(p_index));
pinfo.add_center2(prim);
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[p_index++] = prim;
}
}
return pinfo;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
assert(pinfo.size() == numPrimitives);
return pinfo;
}
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f))
{
/* first run to get #primitives */
ParallelForForPrefixSumState<PrimInfoMB> pstate;
Scene::Iterator<GridMesh,true> iter(scene);
pstate.init(iter,size_t(1024));
/* iterate over all meshes in the scene */
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
LBBox3fa bounds(empty);
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
pinfoMB.merge(gridMB);
}
return pinfoMB;
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
size_t numPrimitives = pinfoMB.size();
if (numPrimitives == 0) return pinfoMB;
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
k = base.size();
size_t p_index = k;
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
const GridMesh::Grid &g = mesh->grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
const PrimRefMB prim(mesh->linearBounds(g,x,y,t0t1),mesh->numTimeSegments(),mesh->time_range,mesh->numTimeSegments(),unsigned(geomID),unsigned(p_index));
pinfoMB.add_primref(prim);
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[p_index++] = prim;
}
}
return pinfoMB;
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
assert(pinfoMB.size() == numPrimitives);
pinfoMB.time_range = t0t1;
return pinfoMB;
}
void build()
{
/* skip build for empty scene */
const size_t numPrimitives = scene->getNumPrimitives(GridMesh::geom_type,true);
if (numPrimitives == 0) { bvh->clear(); return; }
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAHGrid");
//const size_t numTimeSteps = scene->getNumTimeSteps<GridMesh,true>();
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
//if (numTimeSegments == 1)
// buildSingleSegment(numPrimitives);
//else
buildMultiSegment(numPrimitives);
/* clear temporary data for static geometry */
bvh->cleanup();
bvh->postBuild(t0);
}
#if 0
void buildSingleSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRef> prims(scene->device,numPrimitives);
const PrimInfo pinfo = createPrimRefArrayMBlurGrid(scene,prims,bvh->scene->progressInterface,0);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
//TODO: check leaf_bytes
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
GeneralBVHBuilder::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
(typename BVH::CreateAlloc(bvh),
typename BVH::AABBNodeMB::Create(),
typename BVH::AABBNodeMB::Set(),
CreateLeafGridMB<N>(scene,bvh,sgrids.data()),
bvh->scene->progressInterface,
prims.data(),pinfo,settings);
bvh->set(root.ref,root.lbounds,pinfo.size());
}
#endif
void buildMultiSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRefMB> prims(scene->device,numPrimitives);
PrimInfoMB pinfo = createPrimRefArrayMSMBlurGrid(scene,prims,bvh->scene->progressInterface);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
GridRecalculatePrimRef recalculatePrimRef(scene,sgrids.data());
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
//FIXME: check leaf_bytes
//const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(SubGridQBVHN<N>));
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
BVHBuilderMSMBlur::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxDepth;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleLeafTimeSegment = false;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root =
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
recalculatePrimRef,
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNodeMB4D::Create(),
typename BVH::AABBNodeMB4D::Set(),
CreateMSMBlurLeafGrid<N>(scene,bvh,sgrids.data()),
bvh->scene->progressInterface,
settings);
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
}
void clear() {
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
Builder* BVH4Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4vMB>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
#if defined(__AVX__)
Builder* BVH8Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
Builder* BVH8Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4vMB>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,QuadMesh,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
#if defined(__AVX__)
Builder* BVH8Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,QuadMesh,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
return new BVHNBuilderMBlurSAH<4,UserGeometry,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
}
#if defined(__AVX__)
Builder* BVH8VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
return new BVHNBuilderMBlurSAH<8,UserGeometry,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
#if defined(__AVX__)
Builder* BVH8InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_GRID)
Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); }
#if defined(__AVX__)
Builder* BVH8GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8); }
#endif
#endif
}
}

View File

@@ -0,0 +1,201 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_builder.h"
#include "../builders/primrefgen.h"
#include "../builders/primrefgen_presplit.h"
#include "../builders/splitter.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglev_mb.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
namespace embree
{
namespace isa
{
template<int N, typename Primitive>
struct CreateLeafSpatial
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeafSpatial (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t n = set.size();
size_t items = Primitive::blocks(n);
size_t start = set.begin();
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return node;
}
BVH* bvh;
};
template<int N, typename Mesh, typename Primitive, typename Splitter>
struct BVHNBuilderFastSpatialSAH : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
Mesh* mesh;
mvector<PrimRef> prims0;
GeneralBVHBuilder::Settings settings;
const float splitFactor;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
BVHNBuilderFastSpatialSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(scene), mesh(nullptr), prims0(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
splitFactor(scene->device->max_spatial_split_replications) {}
BVHNBuilderFastSpatialSAH (BVH* bvh, Mesh* mesh, const unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(nullptr), mesh(mesh), prims0(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
splitFactor(scene->device->max_spatial_split_replications), geomID_(geomID) {}
// FIXME: shrink bvh->alloc in destructor here and in other builders too
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* skip build for empty scene */
const size_t numOriginalPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(Mesh::geom_type,false);
numPreviousPrimitives = numOriginalPrimitives;
if (numOriginalPrimitives == 0) {
prims0.clear();
bvh->clear();
return;
}
const unsigned int maxGeomID = mesh ? geomID_ : scene->getMaxGeomID<Mesh,false>();
const bool usePreSplits = scene->device->useSpatialPreSplits || (maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)));
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + (usePreSplits ? "BuilderFastSpatialPresplitSAH" : "BuilderFastSpatialSAH"));
/* create primref array */
const size_t numSplitPrimitives = max(numOriginalPrimitives,size_t(splitFactor*numOriginalPrimitives));
prims0.resize(numSplitPrimitives);
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
NodeRef root(0);
PrimInfo pinfo;
if (likely(usePreSplits))
{
/* spatial presplit SAH BVH builder */
pinfo = mesh ?
createPrimRefArray_presplit<Mesh,Splitter>(mesh,maxGeomID,numOriginalPrimitives,prims0,bvh->scene->progressInterface) :
createPrimRefArray_presplit<Mesh,Splitter>(scene,Mesh::geom_type,false,numOriginalPrimitives,prims0,bvh->scene->progressInterface);
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
/* call BVH builder */
root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafSpatial<N,Primitive>(bvh),bvh->scene->progressInterface,prims0.data(),pinfo,settings);
}
else
{
/* standard spatial split SAH BVH builder */
pinfo = mesh ?
createPrimRefArray(mesh,geomID_,numSplitPrimitives,prims0,bvh->scene->progressInterface) :
createPrimRefArray(scene,Mesh::geom_type,false,numSplitPrimitives,prims0,bvh->scene->progressInterface);
Splitter splitter(scene);
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
/* call BVH builder */
root = BVHBuilderBinnedFastSpatialSAH::build<NodeRef>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create2(),
typename BVH::AABBNode::Set2(),
CreateLeafSpatial<N,Primitive>(bvh),
splitter,
bvh->scene->progressInterface,
prims0.data(),
numSplitPrimitives,
pinfo,settings);
/* ==================== */
}
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
/* clear temporary data for static geometry */
if (scene && scene->isStaticAccel()) {
prims0.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims0.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
#if defined(__AVX__)
Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
Builder* BVH8Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,QuadMesh,Quad4v,QuadSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
#if defined(__AVX__)
Builder* BVH8Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,QuadMesh,Quad4v,QuadSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
#endif
#endif
}
}

View File

@@ -0,0 +1,385 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "bvh_builder_twolevel.h"
#include "bvh_statistics.h"
#include "../builders/bvh_builder_sah.h"
#include "../common/scene_line_segments.h"
#include "../common/scene_triangle_mesh.h"
#include "../common/scene_quad_mesh.h"
#define PROFILE 0
namespace embree
{
namespace isa
{
template<int N, typename Mesh, typename Primitive>
BVHNBuilderTwoLevel<N,Mesh,Primitive>::BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder, const size_t singleThreadThreshold)
: bvh(bvh), scene(scene), refs(scene->device,0), prims(scene->device,0), singleThreadThreshold(singleThreadThreshold), gtype(gtype), useMortonBuilder_(useMortonBuilder) {}
template<int N, typename Mesh, typename Primitive>
BVHNBuilderTwoLevel<N,Mesh,Primitive>::~BVHNBuilderTwoLevel () {
}
// ===========================================================================
// ===========================================================================
// ===========================================================================
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::build()
{
/* delete some objects */
size_t num = scene->size();
if (num < bvh->objects.size()) {
parallel_for(num, bvh->objects.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
builders[i].reset();
delete bvh->objects[i]; bvh->objects[i] = nullptr;
}
});
}
#if PROFILE
while(1)
#endif
{
/* reset memory allocator */
bvh->alloc.reset();
/* skip build for empty scene */
const size_t numPrimitives = scene->getNumPrimitives(gtype,false);
if (numPrimitives == 0) {
prims.resize(0);
bvh->set(BVH::emptyNode,empty,0);
return;
}
/* calculate the size of the entire BVH */
const size_t numLeafBlocks = Primitive::blocks(numPrimitives);
const size_t node_bytes = 2*numLeafBlocks*sizeof(typename BVH::AABBNode)/N;
const size_t leaf_bytes = size_t(1.2*numLeafBlocks*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderTwoLevel");
/* resize object array if scene got larger */
if (bvh->objects.size() < num) bvh->objects.resize(num);
if (builders.size() < num) builders.resize(num);
resizeRefsList ();
nextRef.store(0);
/* create acceleration structures */
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
{
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
{
Mesh* mesh = scene->getSafe<Mesh>(objectID);
/* ignore meshes we do not support */
if (mesh == nullptr || mesh->numTimeSteps != 1)
continue;
if (isSmallGeometry(mesh)) {
setupSmallBuildRefBuilder (objectID, mesh);
} else {
setupLargeBuildRefBuilder (objectID, mesh);
}
}
});
/* parallel build of acceleration structures */
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
{
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
{
/* ignore if no triangle mesh or not enabled */
Mesh* mesh = scene->getSafe<Mesh>(objectID);
if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1)
continue;
builders[objectID]->attachBuildRefs (this);
}
});
#if PROFILE
double d0 = getSeconds();
#endif
/* fast path for single geometry scenes */
if (nextRef == 1) {
bvh->set(refs[0].node,LBBox3fa(refs[0].bounds()),numPrimitives);
}
else
{
/* open all large nodes */
refs.resize(nextRef);
/* this probably needs some more tuning */
const size_t extSize = max(max((size_t)SPLIT_MIN_EXT_SPACE,refs.size()*SPLIT_MEMORY_RESERVE_SCALE),size_t((float)numPrimitives / SPLIT_MEMORY_RESERVE_FACTOR));
#if !ENABLE_DIRECT_SAH_MERGE_BUILDER
#if ENABLE_OPEN_SEQUENTIAL
open_sequential(extSize);
#endif
/* compute PrimRefs */
prims.resize(refs.size());
#endif
{
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
PrimInfo pinfo(empty);
for (size_t i=r.begin(); i<r.end(); i++) {
pinfo.add_center2(refs[i]);
}
return pinfo;
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
#else
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
PrimInfo pinfo(empty);
for (size_t i=r.begin(); i<r.end(); i++) {
pinfo.add_center2(refs[i]);
prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
}
return pinfo;
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
#endif
/* skip if all objects where empty */
if (pinfo.size() == 0)
bvh->set(BVH::emptyNode,empty,0);
/* otherwise build toplevel hierarchy */
else
{
/* settings for BVH build */
GeneralBVHBuilder::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
settings.logBlockSize = bsr(N);
settings.minLeafSize = 1;
settings.maxLeafSize = 1;
settings.travCost = 1.0f;
settings.intCost = 1.0f;
settings.singleThreadThreshold = singleThreadThreshold;
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
refs.resize(extSize);
NodeRef root = BVHBuilderBinnedOpenMergeSAH::build<NodeRef,BuildRef>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create2(),
typename BVH::AABBNode::Set2(),
[&] (const BuildRef* refs, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
assert(range.size() == 1);
return (NodeRef) refs[range.begin()].node;
},
[&] (BuildRef &bref, BuildRef *refs) -> size_t {
return openBuildRef(bref,refs);
},
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
refs.data(),extSize,pinfo,settings);
#else
NodeRef root = BVHBuilderBinnedSAH::build<NodeRef>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create2(),
typename BVH::AABBNode::Set2(),
[&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
assert(range.size() == 1);
return (NodeRef) prims[range.begin()].ID();
},
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
prims.data(),pinfo,settings);
#endif
bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives);
}
}
}
bvh->alloc.cleanup();
bvh->postBuild(t0);
#if PROFILE
double d1 = getSeconds();
std::cout << "TOP_LEVEL OPENING/REBUILD TIME " << 1000.0*(d1-d0) << " ms" << std::endl;
#endif
}
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::deleteGeometry(size_t geomID)
{
if (geomID >= bvh->objects.size()) return;
if (builders[geomID]) builders[geomID].reset();
delete bvh->objects [geomID]; bvh->objects [geomID] = nullptr;
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::clear()
{
for (size_t i=0; i<bvh->objects.size(); i++)
if (bvh->objects[i]) bvh->objects[i]->clear();
for (size_t i=0; i<builders.size(); i++)
if (builders[i]) builders[i].reset();
refs.clear();
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::open_sequential(const size_t extSize)
{
if (refs.size() == 0)
return;
refs.reserve(extSize);
#if 1
for (size_t i=0;i<refs.size();i++)
{
NodeRef ref = refs[i].node;
if (ref.isAABBNode())
BVH::prefetch(ref);
}
#endif
std::make_heap(refs.begin(),refs.end());
while (refs.size()+N-1 <= extSize)
{
std::pop_heap (refs.begin(),refs.end());
NodeRef ref = refs.back().node;
if (ref.isLeaf()) break;
refs.pop_back();
AABBNode* node = ref.getAABBNode();
for (size_t i=0; i<N; i++) {
if (node->child(i) == BVH::emptyNode) continue;
refs.push_back(BuildRef(node->bounds(i),node->child(i)));
#if 1
NodeRef ref_pre = node->child(i);
if (ref_pre.isAABBNode())
ref_pre.prefetch();
#endif
std::push_heap (refs.begin(),refs.end());
}
}
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupSmallBuildRefBuilder (size_t objectID, Mesh const * const /*mesh*/)
{
if (builders[objectID] == nullptr || // new mesh
dynamic_cast<RefBuilderSmall*>(builders[objectID].get()) == nullptr) // size change resulted in large->small change
{
builders[objectID].reset (new RefBuilderSmall(objectID));
}
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh)
{
if (bvh->objects[objectID] == nullptr || // new mesh
builders[objectID]->meshQualityChanged (mesh->quality) || // changed build quality
dynamic_cast<RefBuilderLarge*>(builders[objectID].get()) == nullptr) // size change resulted in small->large change
{
Builder* builder = nullptr;
delete bvh->objects[objectID];
createMeshAccel(objectID, builder);
builders[objectID].reset (new RefBuilderLarge(objectID, builder, mesh->quality));
}
}
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH4BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4v>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH4BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,QuadMesh,Quad4v>((BVH4*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,UserGeometry,Object>((BVH4*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#if defined(__AVX__)
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH8BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4v>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH8BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH8BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,QuadMesh,Quad4v>((BVH8*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH8BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,UserGeometry,Object>((BVH8*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH8BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH8BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#endif
}
}

View File

@@ -0,0 +1,262 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <type_traits>
#include "bvh_builder_twolevel_internal.h"
#include "bvh.h"
#include "../builders/priminfo.h"
#include "../builders/primrefgen.h"
/* new open/merge builder */
#define ENABLE_DIRECT_SAH_MERGE_BUILDER 1
#define ENABLE_OPEN_SEQUENTIAL 0
#define SPLIT_MEMORY_RESERVE_FACTOR 1000
#define SPLIT_MEMORY_RESERVE_SCALE 2
#define SPLIT_MIN_EXT_SPACE 1000
namespace embree
{
namespace isa
{
template<int N, typename Mesh, typename Primitive>
class BVHNBuilderTwoLevel : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
__forceinline static bool isSmallGeometry(Mesh* mesh) {
return mesh->size() <= 4;
}
public:
typedef void (*createMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
struct BuildRef : public PrimRef
{
public:
__forceinline BuildRef () {}
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node)
: PrimRef(bounds,(size_t)node), node(node)
{
if (node.isLeaf())
bounds_area = 0.0f;
else
bounds_area = area(this->bounds());
}
/* used by the open/merge bvh builder */
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node, const unsigned int geomID, const unsigned int numPrimitives)
: PrimRef(bounds,geomID,numPrimitives), node(node)
{
/* important for relative buildref ordering */
if (node.isLeaf())
bounds_area = 0.0f;
else
bounds_area = area(this->bounds());
}
__forceinline size_t size() const {
return primID();
}
friend bool operator< (const BuildRef& a, const BuildRef& b) {
return a.bounds_area < b.bounds_area;
}
friend __forceinline embree_ostream operator<<(embree_ostream cout, const BuildRef& ref) {
return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", center2 = " << ref.center2() << ", geomID = " << ref.geomID() << ", numPrimitives = " << ref.numPrimitives() << ", bounds_area = " << ref.bounds_area << " }";
}
__forceinline unsigned int numPrimitives() const { return primID(); }
public:
NodeRef node;
float bounds_area;
};
__forceinline size_t openBuildRef(BuildRef &bref, BuildRef *const refs) {
if (bref.node.isLeaf())
{
refs[0] = bref;
return 1;
}
NodeRef ref = bref.node;
unsigned int geomID = bref.geomID();
unsigned int numPrims = max((unsigned int)bref.numPrimitives() / N,(unsigned int)1);
AABBNode* node = ref.getAABBNode();
size_t n = 0;
for (size_t i=0; i<N; i++) {
if (node->child(i) == BVH::emptyNode) continue;
refs[i] = BuildRef(node->bounds(i),node->child(i),geomID,numPrims);
n++;
}
assert(n > 1);
return n;
}
/*! Constructor. */
BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype = Mesh::geom_type, bool useMortonBuilder = false, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD);
/*! Destructor */
~BVHNBuilderTwoLevel ();
/*! builder entry point */
void build();
void deleteGeometry(size_t geomID);
void clear();
void open_sequential(const size_t extSize);
private:
class RefBuilderBase {
public:
virtual ~RefBuilderBase () {}
virtual void attachBuildRefs (BVHNBuilderTwoLevel* builder) = 0;
virtual bool meshQualityChanged (RTCBuildQuality currQuality) = 0;
};
class RefBuilderSmall : public RefBuilderBase {
public:
RefBuilderSmall (size_t objectID)
: objectID_ (objectID) {}
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) {
Mesh* mesh = topBuilder->scene->template getSafe<Mesh>(objectID_);
size_t meshSize = mesh->size();
assert(isSmallGeometry(mesh));
mvector<PrimRef> prefs(topBuilder->scene->device, meshSize);
auto pinfo = createPrimRefArray(mesh,objectID_,meshSize,prefs,topBuilder->bvh->scene->progressInterface);
size_t begin=0;
while (begin < pinfo.size())
{
Primitive* accel = (Primitive*) topBuilder->bvh->alloc.getCachedAllocator().malloc1(sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,1);
accel->fill(prefs.data(),begin,pinfo.size(),topBuilder->bvh->scene);
/* create build primitive */
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node,(unsigned int)objectID_,1);
#else
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node);
#endif
}
assert(begin == pinfo.size());
}
bool meshQualityChanged (RTCBuildQuality /*currQuality*/) {
return false;
}
size_t objectID_;
};
class RefBuilderLarge : public RefBuilderBase {
public:
RefBuilderLarge (size_t objectID, const Ref<Builder>& builder, RTCBuildQuality quality)
: objectID_ (objectID), builder_ (builder), quality_ (quality) {}
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder)
{
BVH* object = topBuilder->getBVH(objectID_); assert(object);
/* build object if it got modified */
if (topBuilder->isGeometryModified(objectID_))
builder_->build();
/* create build primitive */
if (!object->getBounds().empty())
{
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
Mesh* mesh = topBuilder->getMesh(objectID_);
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root,(unsigned int)objectID_,(unsigned int)mesh->size());
#else
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root);
#endif
}
}
bool meshQualityChanged (RTCBuildQuality currQuality) {
return currQuality != quality_;
}
private:
size_t objectID_;
Ref<Builder> builder_;
RTCBuildQuality quality_;
};
void setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh);
void setupSmallBuildRefBuilder (size_t objectID, Mesh const * const mesh);
BVH* getBVH (size_t objectID) {
return this->bvh->objects[objectID];
}
Mesh* getMesh (size_t objectID) {
return this->scene->template getSafe<Mesh>(objectID);
}
bool isGeometryModified (size_t objectID) {
return this->scene->isGeometryModified(objectID);
}
void resizeRefsList ()
{
size_t num = parallel_reduce (size_t(0), scene->size(), size_t(0),
[this](const range<size_t>& r)->size_t {
size_t c = 0;
for (auto i=r.begin(); i<r.end(); ++i) {
Mesh* mesh = scene->getSafe<Mesh>(i);
if (mesh == nullptr || mesh->numTimeSteps != 1)
continue;
size_t meshSize = mesh->size();
c += isSmallGeometry(mesh) ? Primitive::blocks(meshSize) : 1;
}
return c;
},
std::plus<size_t>()
);
if (refs.size() < num) {
refs.resize(num);
}
}
void createMeshAccel (size_t geomID, Builder*& builder)
{
bvh->objects[geomID] = new BVH(Primitive::type,scene);
BVH* accel = bvh->objects[geomID];
auto mesh = scene->getSafe<Mesh>(geomID);
if (nullptr == mesh) {
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"geomID does not return correct type");
return;
}
__internal_two_level_builder__::MeshBuilder<N,Mesh,Primitive>()(accel, mesh, geomID, this->gtype, this->useMortonBuilder_, builder);
}
using BuilderList = std::vector<std::unique_ptr<RefBuilderBase>>;
BuilderList builders;
BVH* bvh;
Scene* scene;
mvector<BuildRef> refs;
mvector<PrimRef> prims;
std::atomic<int> nextRef;
const size_t singleThreadThreshold;
Geometry::GTypeMask gtype;
bool useMortonBuilder_ = false;
};
}
}

View File

@@ -0,0 +1,304 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
namespace embree
{
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
namespace isa
{
namespace __internal_two_level_builder__ {
template<int N, typename Mesh, typename Primitive>
struct MortonBuilder {};
template<>
struct MortonBuilder<4,TriangleMesh,Triangle4> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,TriangleMesh,Triangle4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,TriangleMesh,Triangle4i> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,QuadMesh,Quad4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,UserGeometry,Object> {
MortonBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,Instance,InstancePrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<>
struct MortonBuilder<4,InstanceArray,InstanceArrayPrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<>
struct MortonBuilder<8,TriangleMesh,Triangle4> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,TriangleMesh,Triangle4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,TriangleMesh,Triangle4i> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,QuadMesh,Quad4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,UserGeometry,Object> {
MortonBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,Instance,InstancePrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<>
struct MortonBuilder<8,InstanceArray,InstanceArrayPrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<int N, typename Mesh, typename Primitive>
struct SAHBuilder {};
template<>
struct SAHBuilder<4,TriangleMesh,Triangle4> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,TriangleMesh,Triangle4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,TriangleMesh,Triangle4i> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,QuadMesh,Quad4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,UserGeometry,Object> {
SAHBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,Instance,InstancePrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct SAHBuilder<4,InstanceArray,InstanceArrayPrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct SAHBuilder<8,TriangleMesh,Triangle4> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,TriangleMesh,Triangle4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,TriangleMesh,Triangle4i> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,QuadMesh,Quad4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,UserGeometry,Object> {
SAHBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,Instance,InstancePrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct SAHBuilder<8,InstanceArray,InstanceArrayPrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<int N, typename Mesh, typename Primitive>
struct RefitBuilder {};
template<>
struct RefitBuilder<4,TriangleMesh,Triangle4> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,TriangleMesh,Triangle4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,TriangleMesh,Triangle4i> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,QuadMesh,Quad4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,UserGeometry,Object> {
RefitBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,Instance,InstancePrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct RefitBuilder<4,InstanceArray,InstanceArrayPrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct RefitBuilder<8,TriangleMesh,Triangle4> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,TriangleMesh,Triangle4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,TriangleMesh,Triangle4i> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,QuadMesh,Quad4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,UserGeometry,Object> {
RefitBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,Instance,InstancePrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct RefitBuilder<8,InstanceArray,InstanceArrayPrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<int N, typename Mesh, typename Primitive>
struct MeshBuilder {
MeshBuilder () {}
void operator () (void* bvh, Mesh* mesh, size_t geomID, Geometry::GTypeMask gtype, bool useMortonBuilder, Builder*& builder) {
if(useMortonBuilder) {
builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype);
return;
}
switch (mesh->quality) {
case RTC_BUILD_QUALITY_LOW: builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
case RTC_BUILD_QUALITY_MEDIUM:
case RTC_BUILD_QUALITY_HIGH: builder = SAHBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
case RTC_BUILD_QUALITY_REFIT: builder = RefitBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
default: throw_RTCError(RTC_ERROR_UNKNOWN,"invalid build quality");
}
}
};
}
}
}

View File

@@ -0,0 +1,377 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_collider.h"
#include "../geometry/triangle_triangle_intersector.h"
#include "../../common/algorithms/parallel_for.h"
namespace embree
{
namespace isa
{
#define CSTAT(x)
size_t parallel_depth_threshold = 3;
CSTAT(std::atomic<size_t> bvh_collide_traversal_steps(0));
CSTAT(std::atomic<size_t> bvh_collide_leaf_pairs(0));
CSTAT(std::atomic<size_t> bvh_collide_leaf_iterations(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections1(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections2(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections3(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections4(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections5(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections(0));
struct Collision
{
__forceinline Collision() {}
__forceinline Collision (unsigned geomID0, unsigned primID0, unsigned geomID1, unsigned primID1)
: geomID0(geomID0), primID0(primID0), geomID1(geomID1), primID1(primID1) {}
unsigned geomID0;
unsigned primID0;
unsigned geomID1;
unsigned primID1;
};
template<int N>
__forceinline size_t overlap(const BBox3fa& box0, const typename BVHN<N>::AABBNode& node1)
{
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),node1.lower_x);
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),node1.lower_y);
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),node1.lower_z);
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),node1.upper_x);
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),node1.upper_y);
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),node1.upper_z);
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
}
template<int N>
__forceinline size_t overlap(const BBox3fa& box0, const BBox<Vec3<vfloat<N>>>& box1)
{
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),box1.lower.x);
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),box1.lower.y);
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),box1.lower.z);
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),box1.upper.x);
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),box1.upper.y);
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),box1.upper.z);
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
}
template<int N>
__forceinline size_t overlap(const BBox<Vec3<vfloat<N>>>& box0, size_t i, const BBox<Vec3<vfloat<N>>>& box1)
{
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x[i]),box1.lower.x);
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y[i]),box1.lower.y);
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z[i]),box1.lower.z);
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x[i]),box1.upper.x);
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y[i]),box1.upper.y);
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z[i]),box1.upper.z);
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
}
bool intersect_triangle_triangle (Scene* scene0, unsigned geomID0, unsigned primID0, Scene* scene1, unsigned geomID1, unsigned primID1)
{
CSTAT(bvh_collide_prim_intersections1++);
const TriangleMesh* mesh0 = scene0->get<TriangleMesh>(geomID0);
const TriangleMesh* mesh1 = scene1->get<TriangleMesh>(geomID1);
const TriangleMesh::Triangle& tri0 = mesh0->triangle(primID0);
const TriangleMesh::Triangle& tri1 = mesh1->triangle(primID1);
/* special culling for scene intersection with itself */
if (scene0 == scene1 && geomID0 == geomID1)
{
/* ignore self intersections */
if (primID0 == primID1)
return false;
}
CSTAT(bvh_collide_prim_intersections2++);
if (scene0 == scene1 && geomID0 == geomID1)
{
/* ignore intersection with topological neighbors */
const vint4 t0(tri0.v[0],tri0.v[1],tri0.v[2],tri0.v[2]);
if (any(vint4(tri1.v[0]) == t0)) return false;
if (any(vint4(tri1.v[1]) == t0)) return false;
if (any(vint4(tri1.v[2]) == t0)) return false;
}
CSTAT(bvh_collide_prim_intersections3++);
const Vec3fa a0 = mesh0->vertex(tri0.v[0]);
const Vec3fa a1 = mesh0->vertex(tri0.v[1]);
const Vec3fa a2 = mesh0->vertex(tri0.v[2]);
const Vec3fa b0 = mesh1->vertex(tri1.v[0]);
const Vec3fa b1 = mesh1->vertex(tri1.v[1]);
const Vec3fa b2 = mesh1->vertex(tri1.v[2]);
return TriangleTriangleIntersector::intersect_triangle_triangle(a0,a1,a2,b0,b1,b2);
}
template<int N>
__forceinline void BVHNColliderUserGeom<N>::processLeaf(NodeRef node0, NodeRef node1)
{
Collision collisions[16];
size_t num_collisions = 0;
size_t N0; Object* leaf0 = (Object*) node0.leaf(N0);
size_t N1; Object* leaf1 = (Object*) node1.leaf(N1);
for (size_t i=0; i<N0; i++) {
for (size_t j=0; j<N1; j++) {
const unsigned geomID0 = leaf0[i].geomID();
const unsigned primID0 = leaf0[i].primID();
const unsigned geomID1 = leaf1[j].geomID();
const unsigned primID1 = leaf1[j].primID();
if (this->scene0 == this->scene1 && geomID0 == geomID1 && primID0 == primID1) continue;
collisions[num_collisions++] = Collision(geomID0,primID0,geomID1,primID1);
if (num_collisions == 16) {
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
num_collisions = 0;
}
}
}
if (num_collisions)
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
}
template<int N>
void BVHNCollider<N>::collide_recurse(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1, size_t depth0, size_t depth1)
{
CSTAT(bvh_collide_traversal_steps++);
if (unlikely(ref0.isLeaf())) {
if (unlikely(ref1.isLeaf())) {
CSTAT(bvh_collide_leaf_pairs++);
processLeaf(ref0,ref1);
return;
} else goto recurse_node1;
} else {
if (unlikely(ref1.isLeaf())) {
goto recurse_node0;
} else {
if (area(bounds0) > area(bounds1)) {
goto recurse_node0;
}
else {
goto recurse_node1;
}
}
}
{
recurse_node0:
AABBNode* node0 = ref0.getAABBNode();
size_t mask = overlap<N>(bounds1,*node0);
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
//for (size_t i=0; i<N; i++) {
#if 0
if (depth0 < parallel_depth_threshold)
{
parallel_for(size_t(N), [&] ( size_t i ) {
if (mask & ( 1 << i)) {
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
}
});
}
else
#endif
{
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
}
}
return;
}
{
recurse_node1:
AABBNode* node1 = ref1.getAABBNode();
size_t mask = overlap<N>(bounds0,*node1);
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
//for (size_t i=0; i<N; i++) {
#if 0
if (depth1 < parallel_depth_threshold)
{
parallel_for(size_t(N), [&] ( size_t i ) {
if (mask & ( 1 << i)) {
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
}
});
}
else
#endif
{
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
}
}
return;
}
}
template<int N>
void BVHNCollider<N>::split(const CollideJob& job, jobvector& jobs)
{
if (unlikely(job.ref0.isLeaf())) {
if (unlikely(job.ref1.isLeaf())) {
jobs.push_back(job);
return;
} else goto recurse_node1;
} else {
if (unlikely(job.ref1.isLeaf())) {
goto recurse_node0;
} else {
if (area(job.bounds0) > area(job.bounds1)) {
goto recurse_node0;
}
else {
goto recurse_node1;
}
}
}
{
recurse_node0:
const AABBNode* node0 = job.ref0.getAABBNode();
size_t mask = overlap<N>(job.bounds1,*node0);
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
jobs.push_back(CollideJob(node0->child(i),node0->bounds(i),job.depth0+1,job.ref1,job.bounds1,job.depth1));
}
return;
}
{
recurse_node1:
const AABBNode* node1 = job.ref1.getAABBNode();
size_t mask = overlap<N>(job.bounds0,*node1);
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
jobs.push_back(CollideJob(job.ref0,job.bounds0,job.depth0,node1->child(i),node1->bounds(i),job.depth1+1));
}
return;
}
}
template<int N>
void BVHNCollider<N>::collide_recurse_entry(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1)
{
CSTAT(bvh_collide_traversal_steps = 0);
CSTAT(bvh_collide_leaf_pairs = 0);
CSTAT(bvh_collide_leaf_iterations = 0);
CSTAT(bvh_collide_prim_intersections1 = 0);
CSTAT(bvh_collide_prim_intersections2 = 0);
CSTAT(bvh_collide_prim_intersections3 = 0);
CSTAT(bvh_collide_prim_intersections4 = 0);
CSTAT(bvh_collide_prim_intersections5 = 0);
CSTAT(bvh_collide_prim_intersections = 0);
#if 0
collide_recurse(ref0,bounds0,ref1,bounds1,0,0);
#else
const int M = 2048;
jobvector jobs[2];
jobs[0].reserve(M);
jobs[1].reserve(M);
jobs[0].push_back(CollideJob(ref0,bounds0,0,ref1,bounds1,0));
int source = 0;
int target = 1;
/* try to split job until job list is full */
while (jobs[source].size()+8 <= M)
{
for (size_t i=0; i<jobs[source].size(); i++)
{
const CollideJob& job = jobs[source][i];
size_t remaining = jobs[source].size()-i;
if (jobs[target].size()+remaining+8 > M) {
jobs[target].push_back(job);
} else {
split(job,jobs[target]);
}
}
/* stop splitting jobs if we reached only leaves and cannot make progress anymore */
if (jobs[target].size() == jobs[source].size())
break;
jobs[source].resize(0);
std::swap(source,target);
}
/* parallel processing of all jobs */
parallel_for(size_t(jobs[source].size()), [&] ( size_t i ) {
CollideJob& j = jobs[source][i];
collide_recurse(j.ref0,j.bounds0,j.ref1,j.bounds1,j.depth0,j.depth1);
});
#endif
CSTAT(PRINT(bvh_collide_traversal_steps));
CSTAT(PRINT(bvh_collide_leaf_pairs));
CSTAT(PRINT(bvh_collide_leaf_iterations));
CSTAT(PRINT(bvh_collide_prim_intersections1));
CSTAT(PRINT(bvh_collide_prim_intersections2));
CSTAT(PRINT(bvh_collide_prim_intersections3));
CSTAT(PRINT(bvh_collide_prim_intersections4));
CSTAT(PRINT(bvh_collide_prim_intersections5));
CSTAT(PRINT(bvh_collide_prim_intersections));
}
template<int N>
void BVHNColliderUserGeom<N>::collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr)
{
BVHNColliderUserGeom<N>(bvh0->scene,bvh1->scene,callback,userPtr).
collide_recurse_entry(bvh0->root,bvh0->bounds.bounds(),bvh1->root,bvh1->bounds.bounds());
}
#if defined (EMBREE_LOWEST_ISA)
struct collision_regression_test : public RegressionTest
{
collision_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(-0.008815f, 0.041848f, -2.49875e-06f), Vec3fa(-0.008276f, 0.053318f, -2.49875e-06f), Vec3fa(0.003023f, 0.048969f, -2.49875e-06f),
Vec3fa(0.00245f, 0.037612f, -2.49875e-06f), Vec3fa(0.01434f, 0.042634f, -2.49875e-06f), Vec3fa(0.013499f, 0.031309f, -2.49875e-06f)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,1),Vec3fa(0,1,1)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,-0.1f),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,-0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(-0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
Vec3fa(-1,1,0) + Vec3fa(0,0,0),Vec3fa(-1,1,0) + Vec3fa(0.1f,0,0),Vec3fa(-1,1,0) + Vec3fa(0,0.1f,0)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
Vec3fa( 2,0.5f,0) + Vec3fa(0,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0.1f,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0,0.1f,0)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0.1f,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0.1f,0)) == false;
return passed;
}
};
collision_regression_test collision_regression("collision_regression_test");
#endif
////////////////////////////////////////////////////////////////////////////////
/// Collider Definitions
////////////////////////////////////////////////////////////////////////////////
DEFINE_COLLIDER(BVH4ColliderUserGeom,BVHNColliderUserGeom<4>);
#if defined(__AVX__)
DEFINE_COLLIDER(BVH8ColliderUserGeom,BVHNColliderUserGeom<8>);
#endif
}
}

View File

@@ -0,0 +1,72 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../geometry/trianglev.h"
#include "../geometry/object.h"
namespace embree
{
namespace isa
{
template<int N>
class BVHNCollider
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
struct CollideJob
{
CollideJob () {}
CollideJob (NodeRef ref0, const BBox3fa& bounds0, size_t depth0,
NodeRef ref1, const BBox3fa& bounds1, size_t depth1)
: ref0(ref0), bounds0(bounds0), depth0(depth0), ref1(ref1), bounds1(bounds1), depth1(depth1) {}
NodeRef ref0;
BBox3fa bounds0;
size_t depth0;
NodeRef ref1;
BBox3fa bounds1;
size_t depth1;
};
typedef vector_t<CollideJob, aligned_allocator<CollideJob,16>> jobvector;
void split(const CollideJob& job, jobvector& jobs);
public:
__forceinline BVHNCollider (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
: scene0(scene0), scene1(scene1), callback(callback), userPtr(userPtr) {}
public:
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1) = 0;
void collide_recurse(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1, size_t depth0, size_t depth1);
void collide_recurse_entry(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1);
protected:
Scene* scene0;
Scene* scene1;
RTCCollideFunc callback;
void* userPtr;
};
template<int N>
class BVHNColliderUserGeom : public BVHNCollider<N>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
__forceinline BVHNColliderUserGeom (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
: BVHNCollider<N>(scene0,scene1,callback,userPtr) {}
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1);
public:
static void collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr);
};
}
}

View File

@@ -0,0 +1,21 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
#include "../common/isa.h"
#include "../common/accel.h"
#include "../common/scene.h"
#include "../geometry/curve_intersector_virtual.h"
namespace embree
{
/*! BVH instantiations */
class BVHFactory
{
public:
enum class BuildVariant { STATIC, DYNAMIC, HIGH_QUALITY };
enum class IntersectVariant { FAST, ROBUST };
};
}

View File

@@ -0,0 +1,322 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector1.h"
#include "node_intersector1.h"
#include "bvh_traverser1.h"
#include "../geometry/intersector_iterators.h"
#include "../geometry/triangle_intersector.h"
#include "../geometry/trianglev_intersector.h"
#include "../geometry/trianglev_mb_intersector.h"
#include "../geometry/trianglei_intersector.h"
#include "../geometry/quadv_intersector.h"
#include "../geometry/quadi_intersector.h"
#include "../geometry/curveNv_intersector.h"
#include "../geometry/curveNi_intersector.h"
#include "../geometry/curveNi_mb_intersector.h"
#include "../geometry/linei_intersector.h"
#include "../geometry/subdivpatch1_intersector.h"
#include "../geometry/object_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../geometry/instance_array_intersector.h"
#include "../geometry/subgrid_intersector.h"
#include "../geometry/subgrid_mb_intersector.h"
#include "../geometry/curve_intersector_virtual.h"
namespace embree
{
namespace isa
{
template<int N, int types, bool robust, typename PrimitiveIntersector1>
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::intersect(const Accel::Intersectors* __restrict__ This,
RayHit& __restrict__ ray,
RayQueryContext* __restrict__ context)
{
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
/* perform per ray precalculations required by the primitive intersector */
Precalculations pre(ray, bvh);
/* stack state */
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack+stackSize;
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
if (bvh->root == BVH::emptyNode)
return;
/* filter out invalid rays */
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (!ray.valid()) return;
#endif
/* verify correct input */
assert(ray.valid());
assert(ray.tnear() >= 0.0f);
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
/* load the ray into SIMD registers */
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
/* initialize the node traverser */
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > ray.tfar))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(normal.trav_nodes,1,1,1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves,1,1,1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
PrimitiveIntersector1::intersect(This, pre, ray, context, prim, num, tray, lazy_node);
tray.tfar = ray.tfar;
/* push lazy node onto stack */
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
}
template<int N, int types, bool robust, typename PrimitiveIntersector1>
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::occluded(const Accel::Intersectors* __restrict__ This,
Ray& __restrict__ ray,
RayQueryContext* __restrict__ context)
{
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
/* early out for already occluded rays */
if (unlikely(ray.tfar < 0.0f))
return;
/* perform per ray precalculations required by the primitive intersector */
Precalculations pre(ray, bvh);
/* stack state */
NodeRef stack[stackSize]; // stack of nodes that still need to get traversed
NodeRef* stackPtr = stack+1; // current stack pointer
NodeRef* stackEnd = stack+stackSize;
stack[0] = bvh->root;
/* filter out invalid rays */
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (!ray.valid()) return;
#endif
/* verify correct input */
assert(ray.valid());
assert(ray.tnear() >= 0.0f);
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
/* load the ray into SIMD registers */
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
/* initialize the node traverser */
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = (NodeRef)*stackPtr;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(shadow.trav_nodes,1,1,1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
nodeTraverser.traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(shadow.trav_leaves,1,1,1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersector1::occluded(This, pre, ray, context, prim, num, tray, lazy_node)) {
ray.tfar = neg_inf;
break;
}
/* push lazy node onto stack */
if (unlikely(lazy_node)) {
*stackPtr = (NodeRef)lazy_node;
stackPtr++;
}
}
}
template<int N, int types, bool robust, typename PrimitiveIntersector1>
struct PointQueryDispatch
{
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
typedef typename PrimitiveIntersector1::Primitive Primitive;
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
{
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return false;
/* stack state */
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack+stackSize;
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
/* verify correct input */
assert(!(types & BVH_MB) || (query->time >= 0.0f && query->time <= 1.0f));
/* load the point query into SIMD registers */
TravPointQuery<N> tquery(query->p, context->query_radius);
/* initialize the node traverser */
BVHNNodeTraverser1Hit<N,types> nodeTraverser;
bool changed = false;
float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
? query->radius * query->radius
: dot(context->query_radius, context->query_radius);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > cull_radius))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(point_query.trav_nodes,1,1,1);
bool nodeIntersected;
if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
nodeIntersected = BVHNNodePointQuerySphere1<N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
} else {
nodeIntersected = BVHNNodePointQueryAABB1 <N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
}
if (unlikely(!nodeIntersected)) { STAT3(point_query.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(point_query.trav_leaves,1,1,1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersector1::pointQuery(This, query, context, prim, num, tquery, lazy_node))
{
changed = true;
tquery.rad = context->query_radius;
cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
? query->radius * query->radius
: dot(context->query_radius, context->query_radius);
}
/* push lazy node onto stack */
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
return changed;
}
};
/* disable point queries for not yet supported geometry types */
template<int N, int types, bool robust>
struct PointQueryDispatch<N, types, robust, VirtualCurveIntersector1> {
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
};
template<int N, int types, bool robust>
struct PointQueryDispatch<N, types, robust, SubdivPatch1Intersector1> {
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
};
template<int N, int types, bool robust>
struct PointQueryDispatch<N, types, robust, SubdivPatch1MBIntersector1> {
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
};
template<int N, int types, bool robust, typename PrimitiveIntersector1>
bool BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::pointQuery(
const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
{
return PointQueryDispatch<N, types, robust, PrimitiveIntersector1>::pointQuery(This, query, context);
}
}
}

View File

@@ -0,0 +1,34 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../common/ray.h"
#include "../common/point_query.h"
namespace embree
{
namespace isa
{
/*! BVH single ray intersector. */
template<int N, int types, bool robust, typename PrimitiveIntersector1>
class BVHNIntersector1
{
/* shortcuts for frequently used types */
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
typedef typename PrimitiveIntersector1::Primitive Primitive;
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
public:
static void intersect (const Accel::Intersectors* This, RayHit& ray, RayQueryContext* context);
static void occluded (const Accel::Intersectors* This, Ray& ray, RayQueryContext* context);
static bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
};
}
}

View File

@@ -0,0 +1,64 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector1.cpp"
namespace embree
{
namespace isa
{
int getISA() {
return VerifyMultiTargetLinking::getISA();
}
////////////////////////////////////////////////////////////////////////////////
/// BVH4Intersector1 Definitions
////////////////////////////////////////////////////////////////////////////////
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersector1 >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersector1 >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMvIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<QuadMiMBIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<QuadMiMBIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1Intersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector1>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1MBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubdivPatch1MBIntersector1>));
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<ObjectIntersector1<false>> >));
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<ObjectIntersector1<true>> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1MB> >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersector1Pluecker<4 COMMA true> >));
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
}
}

View File

@@ -0,0 +1,918 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_hybrid.h"
#include "bvh_traverser1.h"
#include "node_intersector1.h"
#include "node_intersector_packet.h"
#include "../geometry/intersector_iterators.h"
#include "../geometry/triangle_intersector.h"
#include "../geometry/trianglev_intersector.h"
#include "../geometry/trianglev_mb_intersector.h"
#include "../geometry/trianglei_intersector.h"
#include "../geometry/quadv_intersector.h"
#include "../geometry/quadi_intersector.h"
#include "../geometry/curveNv_intersector.h"
#include "../geometry/curveNi_intersector.h"
#include "../geometry/curveNi_mb_intersector.h"
#include "../geometry/linei_intersector.h"
#include "../geometry/subdivpatch1_intersector.h"
#include "../geometry/object_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../geometry/instance_array_intersector.h"
#include "../geometry/subgrid_intersector.h"
#include "../geometry/subgrid_mb_intersector.h"
#include "../geometry/curve_intersector_virtual.h"
#define SWITCH_DURING_DOWN_TRAVERSAL 1
#define FORCE_SINGLE_MODE 0
#define ENABLE_FAST_COHERENT_CODEPATHS 1
namespace embree
{
namespace isa
{
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayHitK<K>& ray,
const TravRayK<K, robust>& tray,
RayQueryContext* context)
{
/* stack state */
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
stack[0].ptr = root;
stack[0].dist = neg_inf;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(normal.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
tray1.tfar = ray.tfar[k];
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
RayQueryContext* __restrict__ context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
intersectCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
#if defined(__AVX__)
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
#endif
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
if (single)
{
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
for (; valid_bits!=0; ) {
const size_t i = bscf(valid_bits);
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
}
return;
}
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
/* test whether we have ray with opposing direction signs in the packet */
bool split = false;
{
size_t bits = valid_bits;
vbool<K> vsplit( false );
do
{
const size_t valid_index = bsf(bits);
vbool<K> octant_valid = octant[valid_index] == octant;
bits &= ~(size_t)movemask(octant_valid);
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
} while (bits);
if (any(vsplit)) split = true;
}
do
{
const size_t valid_index = bsf(valid_bits);
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
const vint<K> count_diff_octant = \
((diff_octant >> 2) & 1) +
((diff_octant >> 1) & 1) +
((diff_octant >> 0) & 1);
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
octant = select(octant_valid,vint<K>(0xffffffff),octant);
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
intersect1(This, bvh, cur, i, pre, ray, tray, context);
}
tray.tfar = min(tray.tfar, ray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
size_t num_child_hits = 0;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we choose to continue with that child if it
is closer than the current next child, or we push it onto the stack */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push cur node onto stack and continue with hit child */
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
*sptr_node = child; sptr_node++;
*sptr_near = childDist; sptr_near++;
}
}
}
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
#endif
if (unlikely(cur == BVH::emptyNode))
goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (any(sptr_near[-2] < sptr_near[-1]))
{
std::swap(sptr_near[-2],sptr_near[-1]);
std::swap(sptr_node[-2],sptr_node[-1]);
}
if (unlikely(num_child_hits >= 3))
{
if (any(sptr_near[-3] < sptr_near[-1]))
{
std::swap(sptr_near[-3],sptr_near[-1]);
std::swap(sptr_node[-3],sptr_node[-1]);
}
if (any(sptr_near[-3] < sptr_near[-2]))
{
std::swap(sptr_near[-3],sptr_near[-2]);
std::swap(sptr_node[-3],sptr_node[-2]);
}
}
}
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
} while(valid_bits);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
RayQueryContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
const vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node if behind closest hit point */
vfloat<K> curDist = *(float*)&stackPtr->dist;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active))) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
curDist = pos_inf;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const vfloat<K> childDist = fmin[i];
const NodeRef child = node->child(i);
BVHN<N>::prefetch(child);
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
stackPtr->ptr = cur;
*(float*)&stackPtr->dist = toScalar(curDist);
stackPtr++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
stackPtr->ptr = child;
*(float*)&stackPtr->dist = toScalar(childDist);
stackPtr++;
}
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (stackPtr[-2].dist < stackPtr[-1].dist)
std::swap(stackPtr[-2],stackPtr[-1]);
if (unlikely(num_child_hits >= 3))
{
if (stackPtr[-3].dist < stackPtr[-1].dist)
std::swap(stackPtr[-3],stackPtr[-1]);
if (stackPtr[-3].dist < stackPtr[-2].dist)
std::swap(stackPtr[-3],stackPtr[-2]);
}
}
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
/* reduce max distance interval on successful intersection */
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
{
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
frustum.template updateMaxDist<K>(tray.tfar);
}
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
} while(valid_bits);
}
// ===================================================================================================================================================================
// ===================================================================================================================================================================
// ===================================================================================================================================================================
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayK<K>& ray,
const TravRayK<K, robust>& tray,
RayQueryContext* context)
{
/* stack state */
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
NodeRef* stackPtr = stack+1; // current stack pointer
NodeRef* stackEnd = stack+stackSizeSingle;
stack[0] = root;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = (NodeRef)*stackPtr;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(shadow.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(shadow.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
ray.tfar[k] = neg_inf;
return true;
}
if (unlikely(lazy_node)) {
*stackPtr = lazy_node;
stackPtr++;
}
}
return false;
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
RayQueryContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
occludedCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out already occluded and invalid rays */
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
const size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
vbool<K> terminated = !valid;
const vfloat<K> inf = vfloat<K>(pos_inf);
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
set(terminated, i);
}
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push 'cur' node onto stack and continue with hit child */
if (likely(cur != BVH::emptyNode)) {
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
}
if (unlikely(cur == BVH::emptyNode))
goto pop;
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
RayQueryContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid,ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vbool<K> terminated = !valid;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].mask = movemask(octant_valid);
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node of active rays have already been terminated */
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
if (unlikely(m_active == 0)) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
m_active = 0;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
//size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const NodeRef child = node->child(i);
assert(child != BVH::emptyNode);
BVHN<N>::prefetch(child);
if (likely(cur != BVH::emptyNode)) {
//num_child_hits++;
stackPtr->ptr = cur;
stackPtr->mask = m_active;
stackPtr++;
}
cur = child;
m_active = movemask(lhit);
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
#if defined(__AVX__)
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
#endif
if (unlikely(!m_active)) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
octant_valid &= !terminated;
if (unlikely(none(octant_valid))) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->mask = movemask(octant_valid);
stackPtr++;
}
}
} while(valid_bits);
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
}
}

View File

@@ -0,0 +1,58 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../common/ray.h"
#include "../common/stack_item.h"
#include "node_intersector_frustum.h"
namespace embree
{
namespace isa
{
template<int K, bool robust>
struct TravRayK;
/*! BVH hybrid packet intersector. Switches between packet and single ray traversal (optional). */
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true>
class BVHNIntersectorKHybrid
{
/* shortcuts for frequently used types */
typedef typename PrimitiveIntersectorK::Precalculations Precalculations;
typedef typename PrimitiveIntersectorK::Primitive Primitive;
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::BaseNode BaseNode;
typedef typename BVH::AABBNode AABBNode;
static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
static const size_t stackSizeChunk = 1+(N-1)*BVH::maxDepth;
static const size_t switchThresholdIncoherent = \
(K==4) ? 3 :
(K==8) ? ((N==4) ? 5 : 7) :
(K==16) ? 14 : // 14 seems to work best for KNL due to better ordered chunk traversal
0;
private:
static void intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
RayHitK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
static bool occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
RayK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
public:
static void intersect(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
static void occluded (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
static void intersectCoherent(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
static void occludedCoherent (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
};
/*! BVH packet intersector. */
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK>
class BVHNIntersectorKChunk : public BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, false> {};
}
}

View File

@@ -0,0 +1,62 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_hybrid.cpp"
namespace embree
{
namespace isa
{
////////////////////////////////////////////////////////////////////////////////
/// BVH4Intersector4 Definitions
////////////////////////////////////////////////////////////////////////////////
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoellerNoFilter, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoellerNoFilter,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridPluecker,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersectorK<4> >));
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4> >));
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4MB> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorK<4>> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorKMB<4>> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorK<4>> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorKMB<4>> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersectorKPluecker <4 COMMA 4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersectorKPluecker <4 COMMA 4 COMMA true> >));
}
}

View File

@@ -0,0 +1,229 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! BVHN AABBNode */
template<typename NodeRef, int N>
struct AABBNode_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
struct Create
{
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc, size_t numChildren = 0) const
{
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const BBox3fa& bounds) const {
node.getAABBNode()->setRef(i,child);
node.getAABBNode()->setBounds(i,bounds);
}
};
struct Create2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
{
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t), NodeRef::byteNodeAlignment); node->clear();
for (size_t i=0; i<num; i++) node->setBounds(i,children[i].bounds());
return NodeRef::encodeNode(node);
}
};
struct Set2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i] == NodeRef::emptyNode);
assert(emptyChild == (children[i] == NodeRef::emptyNode));
}
#endif
AABBNode_t* node = ref.getAABBNode();
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
return ref;
}
};
struct Set3
{
Set3 (FastAllocator* allocator, PrimRef* prims)
: allocator(allocator), prims(prims) {}
template<typename BuildRecord>
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i] == NodeRef::emptyNode);
assert(emptyChild == (children[i] == NodeRef::emptyNode));
}
#endif
AABBNode_t* node = ref.getAABBNode();
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
if (unlikely(precord.alloc_barrier))
{
PrimRef* begin = &prims[precord.prims.begin()];
PrimRef* end = &prims[precord.prims.end()]; // FIXME: extended end for spatial split builder!!!!!
size_t bytes = (size_t)end - (size_t)begin;
allocator->addBlock(begin,bytes);
}
return ref;
}
FastAllocator* const allocator;
PrimRef* const prims;
};
/*! Clears the node. */
__forceinline void clear() {
lower_x = lower_y = lower_z = pos_inf;
upper_x = upper_y = upper_z = neg_inf;
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets bounding box and ID of child. */
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const BBox3fa& bounds)
{
assert(i < N);
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, const NodeRef& ref, const BBox3fa& bounds) {
setBounds(i,bounds);
children[i] = ref;
}
/*! Returns bounds of node. */
__forceinline BBox3fa bounds() const {
const Vec3fa lower(reduce_min(lower_x),reduce_min(lower_y),reduce_min(lower_z));
const Vec3fa upper(reduce_max(upper_x),reduce_max(upper_y),reduce_max(upper_z));
return BBox3fa(lower,upper);
}
/*! Returns bounds of specified child. */
__forceinline BBox3fa bounds(size_t i) const
{
assert(i < N);
const Vec3fa lower(lower_x[i],lower_y[i],lower_z[i]);
const Vec3fa upper(upper_x[i],upper_y[i],upper_z[i]);
return BBox3fa(lower,upper);
}
/*! Returns extent of bounds of specified child. */
__forceinline Vec3fa extend(size_t i) const {
return bounds(i).size();
}
/*! Returns bounds of all children (implemented later as specializations) */
__forceinline void bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const;
/*! swap two children of the node */
__forceinline void swap(size_t i, size_t j)
{
assert(i<N && j<N);
std::swap(children[i],children[j]);
std::swap(lower_x[i],lower_x[j]);
std::swap(lower_y[i],lower_y[j]);
std::swap(lower_z[i],lower_z[j]);
std::swap(upper_x[i],upper_x[j]);
std::swap(upper_y[i],upper_y[j]);
std::swap(upper_z[i],upper_z[j]);
}
/*! swap the children of two nodes */
__forceinline static void swap(AABBNode_t* a, size_t i, AABBNode_t* b, size_t j)
{
assert(i<N && j<N);
std::swap(a->children[i],b->children[j]);
std::swap(a->lower_x[i],b->lower_x[j]);
std::swap(a->lower_y[i],b->lower_y[j]);
std::swap(a->lower_z[i],b->lower_z[j]);
std::swap(a->upper_x[i],b->upper_x[j]);
std::swap(a->upper_y[i],b->upper_y[j]);
std::swap(a->upper_z[i],b->upper_z[j]);
}
/*! compacts a node (moves empty children to the end) */
__forceinline static void compact(AABBNode_t* a)
{
/* find right most filled node */
ssize_t j=N;
for (j=j-1; j>=0; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
/* replace empty nodes with filled nodes */
for (ssize_t i=0; i<j; i++) {
if (a->child(i) == NodeRef::emptyNode) {
a->swap(i,j);
for (j=j-1; j>i; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
}
}
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! output operator */
friend embree_ostream operator<<(embree_ostream o, const AABBNode_t& n)
{
o << "AABBNode { " << embree_endl;
o << " lower_x " << n.lower_x << embree_endl;
o << " upper_x " << n.upper_x << embree_endl;
o << " lower_y " << n.lower_y << embree_endl;
o << " upper_y " << n.upper_y << embree_endl;
o << " lower_z " << n.lower_z << embree_endl;
o << " upper_z " << n.upper_z << embree_endl;
o << " children = ";
for (size_t i=0; i<N; i++) o << n.children[i] << " ";
o << embree_endl;
o << "}" << embree_endl;
return o;
}
public:
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
};
template<>
__forceinline void AABBNode_t<NodeRefPtr<4>,4>::bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const {
transpose(lower_x,lower_y,lower_z,vfloat4(zero),bounds0.lower,bounds1.lower,bounds2.lower,bounds3.lower);
transpose(upper_x,upper_y,upper_z,vfloat4(zero),bounds0.upper,bounds1.upper,bounds2.upper,bounds3.upper);
}
}

View File

@@ -0,0 +1,255 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! Motion Blur AABBNode */
template<typename NodeRef, int N>
struct AABBNodeMB_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
typedef BVHNodeRecord<NodeRef> NodeRecord;
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
struct Create
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
{
AABBNodeMB_t* node = (AABBNodeMB_t*) alloc.malloc0(sizeof(AABBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
template<typename BuildRecord>
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i].ref == NodeRef::emptyNode);
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
}
#endif
AABBNodeMB_t* node = ref.getAABBNodeMB();
LBBox3fa bounds = empty;
for (size_t i=0; i<num; i++) {
node->setRef(i,children[i].ref);
node->setBounds(i,children[i].lbounds);
bounds.extend(children[i].lbounds);
}
return NodeRecordMB(ref,bounds);
}
};
struct SetTimeRange
{
__forceinline SetTimeRange(BBox1f tbounds) : tbounds(tbounds) {}
template<typename BuildRecord>
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
{
AABBNodeMB_t* node = ref.getAABBNodeMB();
LBBox3fa bounds = empty;
for (size_t i=0; i<num; i++) {
node->setRef(i, children[i].ref);
node->setBounds(i, children[i].lbounds, tbounds);
bounds.extend(children[i].lbounds);
}
return NodeRecordMB(ref,bounds);
}
BBox1f tbounds;
};
/*! Clears the node. */
__forceinline void clear() {
lower_x = lower_y = lower_z = vfloat<N>(pos_inf);
upper_x = upper_y = upper_z = vfloat<N>(neg_inf);
lower_dx = lower_dy = lower_dz = vfloat<N>(0.0f);
upper_dx = upper_dy = upper_dz = vfloat<N>(0.0f);
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets ID of child. */
__forceinline void setRef(size_t i, NodeRef ref) {
children[i] = ref;
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const BBox3fa& bounds0_i, const BBox3fa& bounds1_i)
{
/*! for empty bounds we have to avoid inf-inf=nan */
BBox3fa bounds0(min(bounds0_i.lower,Vec3fa(+FLT_MAX)),max(bounds0_i.upper,Vec3fa(-FLT_MAX)));
BBox3fa bounds1(min(bounds1_i.lower,Vec3fa(+FLT_MAX)),max(bounds1_i.upper,Vec3fa(-FLT_MAX)));
bounds0 = bounds0.enlarge_by(4.0f*float(ulp));
bounds1 = bounds1.enlarge_by(4.0f*float(ulp));
Vec3fa dlower = bounds1.lower-bounds0.lower;
Vec3fa dupper = bounds1.upper-bounds0.upper;
lower_x[i] = bounds0.lower.x; lower_y[i] = bounds0.lower.y; lower_z[i] = bounds0.lower.z;
upper_x[i] = bounds0.upper.x; upper_y[i] = bounds0.upper.y; upper_z[i] = bounds0.upper.z;
lower_dx[i] = dlower.x; lower_dy[i] = dlower.y; lower_dz[i] = dlower.z;
upper_dx[i] = dupper.x; upper_dy[i] = dupper.y; upper_dz[i] = dupper.z;
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const LBBox3fa& bounds) {
setBounds(i, bounds.bounds0, bounds.bounds1);
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) {
setBounds(i, bounds.global(tbounds));
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, NodeRef ref, const BBox3fa& bounds) {
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
children[i] = ref;
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, const NodeRecordMB4D& child)
{
setRef(i, child.ref);
setBounds(i, child.lbounds, child.dt);
}
/*! Return bounding box for time 0 */
__forceinline BBox3fa bounds0(size_t i) const {
return BBox3fa(Vec3fa(lower_x[i],lower_y[i],lower_z[i]),
Vec3fa(upper_x[i],upper_y[i],upper_z[i]));
}
/*! Return bounding box for time 1 */
__forceinline BBox3fa bounds1(size_t i) const {
return BBox3fa(Vec3fa(lower_x[i]+lower_dx[i],lower_y[i]+lower_dy[i],lower_z[i]+lower_dz[i]),
Vec3fa(upper_x[i]+upper_dx[i],upper_y[i]+upper_dy[i],upper_z[i]+upper_dz[i]));
}
/*! Returns bounds of node. */
__forceinline BBox3fa bounds() const {
return BBox3fa(Vec3fa(reduce_min(min(lower_x,lower_x+lower_dx)),
reduce_min(min(lower_y,lower_y+lower_dy)),
reduce_min(min(lower_z,lower_z+lower_dz))),
Vec3fa(reduce_max(max(upper_x,upper_x+upper_dx)),
reduce_max(max(upper_y,upper_y+upper_dy)),
reduce_max(max(upper_z,upper_z+upper_dz))));
}
/*! Return bounding box of child i */
__forceinline BBox3fa bounds(size_t i) const {
return merge(bounds0(i),bounds1(i));
}
/*! Return linear bounding box of child i */
__forceinline LBBox3fa lbounds(size_t i) const {
return LBBox3fa(bounds0(i),bounds1(i));
}
/*! Return bounding box of child i at specified time */
__forceinline BBox3fa bounds(size_t i, float time) const {
return lerp(bounds0(i),bounds1(i),time);
}
/*! Returns the expected surface area when randomly sampling the time. */
__forceinline float expectedHalfArea(size_t i) const {
return lbounds(i).expectedHalfArea();
}
/*! Returns the expected surface area when randomly sampling the time. */
__forceinline float expectedHalfArea(size_t i, const BBox1f& t0t1) const {
return lbounds(i).expectedHalfArea(t0t1);
}
/*! swap two children of the node */
__forceinline void swap(size_t i, size_t j)
{
assert(i<N && j<N);
std::swap(children[i],children[j]);
std::swap(lower_x[i],lower_x[j]);
std::swap(upper_x[i],upper_x[j]);
std::swap(lower_y[i],lower_y[j]);
std::swap(upper_y[i],upper_y[j]);
std::swap(lower_z[i],lower_z[j]);
std::swap(upper_z[i],upper_z[j]);
std::swap(lower_dx[i],lower_dx[j]);
std::swap(upper_dx[i],upper_dx[j]);
std::swap(lower_dy[i],lower_dy[j]);
std::swap(upper_dy[i],upper_dy[j]);
std::swap(lower_dz[i],lower_dz[j]);
std::swap(upper_dz[i],upper_dz[j]);
}
/*! compacts a node (moves empty children to the end) */
__forceinline static void compact(AABBNodeMB_t* a)
{
/* find right most filled node */
ssize_t j=N;
for (j=j-1; j>=0; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
/* replace empty nodes with filled nodes */
for (ssize_t i=0; i<j; i++) {
if (a->child(i) == NodeRef::emptyNode) {
a->swap(i,j);
for (j=j-1; j>i; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
}
}
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! stream output operator */
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB_t& n)
{
cout << "AABBNodeMB {" << embree_endl;
for (size_t i=0; i<N; i++)
{
const BBox3fa b0 = n.bounds0(i);
const BBox3fa b1 = n.bounds1(i);
cout << " child" << i << " { " << embree_endl;
cout << " bounds0 = " << b0 << ", " << embree_endl;
cout << " bounds1 = " << b1 << ", " << embree_endl;
cout << " }";
}
cout << "}";
return cout;
}
public:
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
vfloat<N> lower_dx; //!< X dimension of lower bounds of all N children.
vfloat<N> upper_dx; //!< X dimension of upper bounds of all N children.
vfloat<N> lower_dy; //!< Y dimension of lower bounds of all N children.
vfloat<N> upper_dy; //!< Y dimension of upper bounds of all N children.
vfloat<N> lower_dz; //!< Z dimension of lower bounds of all N children.
vfloat<N> upper_dz; //!< Z dimension of upper bounds of all N children.
};
}

View File

@@ -0,0 +1,115 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_aabb_mb.h"
namespace embree
{
/*! Aligned 4D Motion Blur Node */
template<typename NodeRef, int N>
struct AABBNodeMB4D_t : public AABBNodeMB_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
using AABBNodeMB_t<NodeRef,N>::set;
typedef BVHNodeRecord<NodeRef> NodeRecord;
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
struct Create
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord*, const size_t, const FastAllocator::CachedAllocator& alloc, bool hasTimeSplits = true) const
{
if (hasTimeSplits)
{
AABBNodeMB4D_t* node = (AABBNodeMB4D_t*) alloc.malloc0(sizeof(AABBNodeMB4D_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
else
{
AABBNodeMB_t<NodeRef,N>* node = (AABBNodeMB_t<NodeRef,N>*) alloc.malloc0(sizeof(AABBNodeMB_t<NodeRef,N>),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
}
};
struct Set
{
template<typename BuildRecord>
__forceinline void operator() (const BuildRecord&, const BuildRecord*, NodeRef ref, NodeRecordMB4D* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i].ref == NodeRef::emptyNode);
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
}
#endif
if (likely(ref.isAABBNodeMB())) {
for (size_t i=0; i<num; i++)
ref.getAABBNodeMB()->set(i, children[i]);
} else {
for (size_t i=0; i<num; i++)
ref.getAABBNodeMB4D()->set(i, children[i]);
}
}
};
/*! Clears the node. */
__forceinline void clear() {
lower_t = vfloat<N>(pos_inf);
upper_t = vfloat<N>(neg_inf);
AABBNodeMB_t<NodeRef,N>::clear();
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds)
{
AABBNodeMB_t<NodeRef,N>::setBounds(i, bounds.global(tbounds));
lower_t[i] = tbounds.lower;
upper_t[i] = tbounds.upper == 1.0f ? 1.0f+float(ulp) : tbounds.upper;
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, const NodeRecordMB4D& child) {
AABBNodeMB_t<NodeRef,N>::setRef(i,child.ref);
setBounds(i, child.lbounds, child.dt);
}
/*! Returns the expected surface area when randomly sampling the time. */
__forceinline float expectedHalfArea(size_t i) const {
return AABBNodeMB_t<NodeRef,N>::lbounds(i).expectedHalfArea(timeRange(i));
}
/*! returns time range for specified child */
__forceinline BBox1f timeRange(size_t i) const {
return BBox1f(lower_t[i],upper_t[i]);
}
/*! stream output operator */
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB4D_t& n)
{
cout << "AABBNodeMB4D {" << embree_endl;
for (size_t i=0; i<N; i++)
{
const BBox3fa b0 = n.bounds0(i);
const BBox3fa b1 = n.bounds1(i);
cout << " child" << i << " { " << embree_endl;
cout << " bounds0 = " << lerp(b0,b1,n.lower_t[i]) << ", " << embree_endl;
cout << " bounds1 = " << lerp(b0,b1,n.upper_t[i]) << ", " << embree_endl;
cout << " time_bounds = " << n.lower_t[i] << ", " << n.upper_t[i] << embree_endl;
cout << " }";
}
cout << "}";
return cout;
}
public:
vfloat<N> lower_t; //!< time dimension of lower bounds of all N children
vfloat<N> upper_t; //!< time dimension of upper bounds of all N children
};
}

View File

@@ -0,0 +1,43 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_ref.h"
namespace embree
{
/*! BVHN Base Node */
template<typename NodeRef, int N>
struct BaseNode_t
{
/*! Clears the node. */
__forceinline void clear()
{
for (size_t i=0; i<N; i++)
children[i] = NodeRef::emptyNode;
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! verifies the node */
__forceinline bool verify() const
{
for (size_t i=0; i<N; i++) {
if (child(i) == NodeRef::emptyNode) {
for (; i<N; i++) {
if (child(i) != NodeRef::emptyNode)
return false;
}
break;
}
}
return true;
}
NodeRef children[N]; //!< Pointer to the N children (can be a node or leaf)
};
}

View File

@@ -0,0 +1,98 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! Node with unaligned bounds */
template<typename NodeRef, int N>
struct OBBNode_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
struct Create
{
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
{
OBBNode_t* node = (OBBNode_t*) alloc.malloc0(sizeof(OBBNode_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const OBBox3fa& bounds) const {
node.ungetAABBNode()->setRef(i,child);
node.ungetAABBNode()->setBounds(i,bounds);
}
};
/*! Clears the node. */
__forceinline void clear()
{
naabb.l.vx = Vec3fa(nan);
naabb.l.vy = Vec3fa(nan);
naabb.l.vz = Vec3fa(nan);
naabb.p = Vec3fa(nan);
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets bounding box. */
__forceinline void setBounds(size_t i, const OBBox3fa& b)
{
assert(i < N);
AffineSpace3fa space = b.space;
space.p -= b.bounds.lower;
space = AffineSpace3fa::scale(1.0f/max(Vec3fa(1E-19f),b.bounds.upper-b.bounds.lower))*space;
naabb.l.vx.x[i] = space.l.vx.x;
naabb.l.vx.y[i] = space.l.vx.y;
naabb.l.vx.z[i] = space.l.vx.z;
naabb.l.vy.x[i] = space.l.vy.x;
naabb.l.vy.y[i] = space.l.vy.y;
naabb.l.vy.z[i] = space.l.vy.z;
naabb.l.vz.x[i] = space.l.vz.x;
naabb.l.vz.y[i] = space.l.vz.y;
naabb.l.vz.z[i] = space.l.vz.z;
naabb.p.x[i] = space.p.x;
naabb.p.y[i] = space.p.y;
naabb.p.z[i] = space.p.z;
}
/*! Sets ID of child. */
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
/*! Returns the extent of the bounds of the ith child */
__forceinline Vec3fa extent(size_t i) const {
assert(i<N);
const Vec3fa vx(naabb.l.vx.x[i],naabb.l.vx.y[i],naabb.l.vx.z[i]);
const Vec3fa vy(naabb.l.vy.x[i],naabb.l.vy.y[i],naabb.l.vy.z[i]);
const Vec3fa vz(naabb.l.vz.x[i],naabb.l.vz.y[i],naabb.l.vz.z[i]);
return rsqrt(vx*vx + vy*vy + vz*vz);
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! output operator */
friend embree_ostream operator<<(embree_ostream o, const OBBNode_t& n)
{
o << "UnAABBNode { " << n.naabb << " } " << embree_endl;
return o;
}
public:
AffineSpace3vf<N> naabb; //!< non-axis aligned bounding boxes (bounds are [0,1] in specified space)
};
}

View File

@@ -0,0 +1,90 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
template<typename NodeRef, int N>
struct OBBNodeMB_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
struct Create
{
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
{
OBBNodeMB_t* node = (OBBNodeMB_t*) alloc.malloc0(sizeof(OBBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const LinearSpace3fa& space, const LBBox3fa& lbounds, const BBox1f dt) const {
node.ungetAABBNodeMB()->setRef(i,child);
node.ungetAABBNodeMB()->setBounds(i,space,lbounds.global(dt));
}
};
/*! Clears the node. */
__forceinline void clear()
{
space0 = one;
//b0.lower = b0.upper = Vec3fa(nan);
b1.lower = b1.upper = Vec3fa(nan);
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets space and bounding boxes. */
__forceinline void setBounds(size_t i, const AffineSpace3fa& space, const LBBox3fa& lbounds) {
setBounds(i,space,lbounds.bounds0,lbounds.bounds1);
}
/*! Sets space and bounding boxes. */
__forceinline void setBounds(size_t i, const AffineSpace3fa& s0, const BBox3fa& a, const BBox3fa& c)
{
assert(i < N);
AffineSpace3fa space = s0;
space.p -= a.lower;
Vec3fa scale = 1.0f/max(Vec3fa(1E-19f),a.upper-a.lower);
space = AffineSpace3fa::scale(scale)*space;
BBox3fa a1((a.lower-a.lower)*scale,(a.upper-a.lower)*scale);
BBox3fa c1((c.lower-a.lower)*scale,(c.upper-a.lower)*scale);
space0.l.vx.x[i] = space.l.vx.x; space0.l.vx.y[i] = space.l.vx.y; space0.l.vx.z[i] = space.l.vx.z;
space0.l.vy.x[i] = space.l.vy.x; space0.l.vy.y[i] = space.l.vy.y; space0.l.vy.z[i] = space.l.vy.z;
space0.l.vz.x[i] = space.l.vz.x; space0.l.vz.y[i] = space.l.vz.y; space0.l.vz.z[i] = space.l.vz.z;
space0.p .x[i] = space.p .x; space0.p .y[i] = space.p .y; space0.p .z[i] = space.p .z;
/*b0.lower.x[i] = a1.lower.x; b0.lower.y[i] = a1.lower.y; b0.lower.z[i] = a1.lower.z;
b0.upper.x[i] = a1.upper.x; b0.upper.y[i] = a1.upper.y; b0.upper.z[i] = a1.upper.z;*/
b1.lower.x[i] = c1.lower.x; b1.lower.y[i] = c1.lower.y; b1.lower.z[i] = c1.lower.z;
b1.upper.x[i] = c1.upper.x; b1.upper.y[i] = c1.upper.y; b1.upper.z[i] = c1.upper.z;
}
/*! Sets ID of child. */
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
/*! Returns the extent of the bounds of the ith child */
__forceinline Vec3fa extent0(size_t i) const {
assert(i < N);
const Vec3fa vx(space0.l.vx.x[i],space0.l.vx.y[i],space0.l.vx.z[i]);
const Vec3fa vy(space0.l.vy.x[i],space0.l.vy.y[i],space0.l.vy.z[i]);
const Vec3fa vz(space0.l.vz.x[i],space0.l.vz.y[i],space0.l.vz.z[i]);
return rsqrt(vx*vx + vy*vy + vz*vz);
}
public:
AffineSpace3vf<N> space0;
//BBox3vf<N> b0; // these are the unit bounds
BBox3vf<N> b1;
};
}

View File

@@ -0,0 +1,273 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! BVHN Quantized Node */
template<int N>
struct __aligned(8) QuantizedBaseNode_t
{
typedef unsigned char T;
static const T MIN_QUAN = 0;
static const T MAX_QUAN = 255;
/*! Clears the node. */
__forceinline void clear() {
for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN;
for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN;
}
/*! Returns bounds of specified child. */
__forceinline BBox3fa bounds(size_t i) const
{
assert(i < N);
const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x),
madd(scale.y,(float)lower_y[i],start.y),
madd(scale.z,(float)lower_z[i],start.z));
const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x),
madd(scale.y,(float)upper_y[i],start.y),
madd(scale.z,(float)upper_z[i],start.z));
return BBox3fa(lower,upper);
}
/*! Returns extent of bounds of specified child. */
__forceinline Vec3fa extent(size_t i) const {
return bounds(i).size();
}
static __forceinline void init_dim(const vfloat<N> &lower,
const vfloat<N> &upper,
T lower_quant[N],
T upper_quant[N],
float &start,
float &scale)
{
/* quantize bounds */
const vbool<N> m_valid = lower != vfloat<N>(pos_inf);
const float minF = reduce_min(lower);
const float maxF = reduce_max(upper);
float diff = (1.0f+2.0f*float(ulp))*(maxF - minF);
float decode_scale = diff / float(MAX_QUAN);
if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero
assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF);
const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f;
vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN);
vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN);
/* lower/upper correction */
vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower;
vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper;
ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN);
iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN);
/* disable invalid lanes */
ilower = select(m_valid,ilower,MAX_QUAN);
iupper = select(m_valid,iupper,MIN_QUAN);
/* store as uchar to memory */
vint<N>::store(lower_quant,ilower);
vint<N>::store(upper_quant,iupper);
start = minF;
scale = decode_scale;
#if defined(DEBUG)
vfloat<N> extract_lower( vint<N>::loadu(lower_quant) );
vfloat<N> extract_upper( vint<N>::loadu(upper_quant) );
vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF);
vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF);
assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid));
assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid));
#endif
}
__forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node)
{
init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x);
init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y);
init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z);
}
__forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); }
#if defined(__AVX512F__) // KNL
__forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); }
#endif
__forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); }
__forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); }
__forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); }
__forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); }
__forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); }
__forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); }
template <int M>
__forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); }
#if defined(__AVX512F__)
__forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); }
__forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); }
__forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); }
#endif
union {
struct {
T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children
T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children
T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children
T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children
T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children
T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children
};
T all_planes[6*N];
};
Vec3f start;
Vec3f scale;
friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n)
{
o << "QuantizedBaseNode { " << embree_endl;
o << " start " << n.start << embree_endl;
o << " scale " << n.scale << embree_endl;
o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl;
o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl;
o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl;
o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl;
o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl;
o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl;
o << "}" << embree_endl;
return o;
}
};
template<typename NodeRef, int N>
struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N>
{
using BaseNode_t<NodeRef,N>::children;
using QuantizedBaseNode_t<N>::lower_x;
using QuantizedBaseNode_t<N>::upper_x;
using QuantizedBaseNode_t<N>::lower_y;
using QuantizedBaseNode_t<N>::upper_y;
using QuantizedBaseNode_t<N>::lower_z;
using QuantizedBaseNode_t<N>::upper_z;
using QuantizedBaseNode_t<N>::start;
using QuantizedBaseNode_t<N>::scale;
using QuantizedBaseNode_t<N>::init_dim;
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
struct Create2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const
{
__aligned(64) AABBNode_t<NodeRef,N> node;
node.clear();
for (size_t i=0; i<n; i++) {
node.setBounds(i,children[i].bounds());
}
QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment);
qnode->init(node);
return (size_t)qnode | NodeRef::tyQuantizedNode;
}
};
struct Set2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i] == NodeRef::emptyNode);
assert(emptyChild == (children[i] == NodeRef::emptyNode));
}
#endif
QuantizedNode_t* node = ref.quantizedNode();
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
return ref;
}
};
__forceinline void init(AABBNode_t<NodeRef,N>& node)
{
for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode;
init_dim(node);
}
};
/*! BVHN Quantized Node */
template<int N>
struct __aligned(8) QuantizedBaseNodeMB_t
{
QuantizedBaseNode_t<N> node0;
QuantizedBaseNode_t<N> node1;
/*! Clears the node. */
__forceinline void clear() {
node0.clear();
node1.clear();
}
/*! Returns bounds of specified child. */
__forceinline BBox3fa bounds(size_t i) const
{
assert(i < N);
BBox3fa bounds0 = node0.bounds(i);
BBox3fa bounds1 = node1.bounds(i);
bounds0.extend(bounds1);
return bounds0;
}
/*! Returns extent of bounds of specified child. */
__forceinline Vec3fa extent(size_t i) const {
return bounds(i).size();
}
__forceinline vbool<N> validMask() const { return node0.validMask(); }
template<typename T>
__forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); }
template<int M>
__forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); }
};
}

View File

@@ -0,0 +1,242 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/default.h"
#include "../common/alloc.h"
#include "../common/accel.h"
#include "../common/device.h"
#include "../common/scene.h"
#include "../geometry/primitive.h"
#include "../common/ray.h"
namespace embree
{
/* BVH node reference with bounds */
template<typename NodeRef>
struct BVHNodeRecord
{
__forceinline BVHNodeRecord() {}
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fa& bounds) : ref(ref), bounds((BBox3fx)bounds) {}
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fx& bounds) : ref(ref), bounds(bounds) {}
NodeRef ref;
BBox3fx bounds;
};
template<typename NodeRef>
struct BVHNodeRecordMB
{
__forceinline BVHNodeRecordMB() {}
__forceinline BVHNodeRecordMB(NodeRef ref, const LBBox3fa& lbounds) : ref(ref), lbounds(lbounds) {}
NodeRef ref;
LBBox3fa lbounds;
};
template<typename NodeRef>
struct BVHNodeRecordMB4D
{
__forceinline BVHNodeRecordMB4D() {}
__forceinline BVHNodeRecordMB4D(NodeRef ref, const LBBox3fa& lbounds, const BBox1f& dt) : ref(ref), lbounds(lbounds), dt(dt) {}
NodeRef ref;
LBBox3fa lbounds;
BBox1f dt;
};
template<typename NodeRef, int N> struct BaseNode_t;
template<typename NodeRef, int N> struct AABBNode_t;
template<typename NodeRef, int N> struct AABBNodeMB_t;
template<typename NodeRef, int N> struct AABBNodeMB4D_t;
template<typename NodeRef, int N> struct OBBNode_t;
template<typename NodeRef, int N> struct OBBNodeMB_t;
template<typename NodeRef, int N> struct QuantizedNode_t;
template<typename NodeRef, int N> struct QuantizedNodeMB_t;
/*! Pointer that points to a node or a list of primitives */
template<int N>
struct NodeRefPtr
{
//template<int NN> friend class BVHN;
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
static const size_t byteAlignment = 16;
static const size_t byteNodeAlignment = 4*N;
/*! highest address bit is used as barrier for some algorithms */
static const size_t barrier_mask = (1LL << (8*sizeof(size_t)-1));
/*! Masks the bits that store the number of items per leaf. */
static const size_t align_mask = byteAlignment-1;
static const size_t items_mask = byteAlignment-1;
/*! different supported node types */
static const size_t tyAABBNode = 0;
static const size_t tyAABBNodeMB = 1;
static const size_t tyAABBNodeMB4D = 6;
static const size_t tyOBBNode = 2;
static const size_t tyOBBNodeMB = 3;
static const size_t tyQuantizedNode = 5;
static const size_t tyLeaf = 8;
/*! Empty node */
static const size_t emptyNode = tyLeaf;
/*! Invalid node, used as marker in traversal */
static const size_t invalidNode = (((size_t)-1) & (~items_mask)) | (tyLeaf+0);
static const size_t popRay = (((size_t)-1) & (~items_mask)) | (tyLeaf+1);
/*! Maximum number of primitive blocks in a leaf. */
static const size_t maxLeafBlocks = items_mask-tyLeaf;
/*! Default constructor */
__forceinline NodeRefPtr () {}
/*! Construction from integer */
__forceinline NodeRefPtr (size_t ptr) : ptr(ptr) {}
/*! Cast to size_t */
__forceinline operator size_t() const { return ptr; }
/*! Sets the barrier bit. */
__forceinline void setBarrier() {
#if defined(__64BIT__)
assert(!isBarrier());
ptr |= barrier_mask;
#else
assert(false);
#endif
}
/*! Clears the barrier bit. */
__forceinline void clearBarrier() {
#if defined(__64BIT__)
ptr &= ~barrier_mask;
#else
assert(false);
#endif
}
/*! Checks if this is an barrier. A barrier tells the top level tree rotations how deep to enter the tree. */
__forceinline bool isBarrier() const { return (ptr & barrier_mask) != 0; }
/*! checks if this is a leaf */
__forceinline size_t isLeaf() const { return ptr & tyLeaf; }
/*! returns node type */
__forceinline int type() const { return ptr & (size_t)align_mask; }
/*! checks if this is a node */
__forceinline int isAABBNode() const { return (ptr & (size_t)align_mask) == tyAABBNode; }
/*! checks if this is a motion blur node */
__forceinline int isAABBNodeMB() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB; }
/*! checks if this is a 4D motion blur node */
__forceinline int isAABBNodeMB4D() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB4D; }
/*! checks if this is a node with unaligned bounding boxes */
__forceinline int isOBBNode() const { return (ptr & (size_t)align_mask) == tyOBBNode; }
/*! checks if this is a motion blur node with unaligned bounding boxes */
__forceinline int isOBBNodeMB() const { return (ptr & (size_t)align_mask) == tyOBBNodeMB; }
/*! checks if this is a quantized node */
__forceinline int isQuantizedNode() const { return (ptr & (size_t)align_mask) == tyQuantizedNode; }
/*! Encodes a node */
static __forceinline NodeRefPtr encodeNode(AABBNode_t<NodeRefPtr,N>* node) {
assert(!((size_t)node & align_mask));
return NodeRefPtr((size_t) node);
}
static __forceinline NodeRefPtr encodeNode(AABBNodeMB_t<NodeRefPtr,N>* node) {
assert(!((size_t)node & align_mask));
return NodeRefPtr((size_t) node | tyAABBNodeMB);
}
static __forceinline NodeRefPtr encodeNode(AABBNodeMB4D_t<NodeRefPtr,N>* node) {
assert(!((size_t)node & align_mask));
return NodeRefPtr((size_t) node | tyAABBNodeMB4D);
}
/*! Encodes an unaligned node */
static __forceinline NodeRefPtr encodeNode(OBBNode_t<NodeRefPtr,N>* node) {
return NodeRefPtr((size_t) node | tyOBBNode);
}
/*! Encodes an unaligned motion blur node */
static __forceinline NodeRefPtr encodeNode(OBBNodeMB_t<NodeRefPtr,N>* node) {
return NodeRefPtr((size_t) node | tyOBBNodeMB);
}
/*! Encodes a leaf */
static __forceinline NodeRefPtr encodeLeaf(void* tri, size_t num) {
assert(!((size_t)tri & align_mask));
assert(num <= maxLeafBlocks);
return NodeRefPtr((size_t)tri | (tyLeaf+min(num,(size_t)maxLeafBlocks)));
}
/*! Encodes a leaf */
static __forceinline NodeRefPtr encodeTypedLeaf(void* ptr, size_t ty) {
assert(!((size_t)ptr & align_mask));
return NodeRefPtr((size_t)ptr | (tyLeaf+ty));
}
/*! returns base node pointer */
__forceinline BaseNode_t<NodeRefPtr,N>* baseNode()
{
assert(!isLeaf());
return (BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
}
__forceinline const BaseNode_t<NodeRefPtr,N>* baseNode() const
{
assert(!isLeaf());
return (const BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
}
/*! returns node pointer */
__forceinline AABBNode_t<NodeRefPtr,N>* getAABBNode() { assert(isAABBNode()); return ( AABBNode_t<NodeRefPtr,N>*)ptr; }
__forceinline const AABBNode_t<NodeRefPtr,N>* getAABBNode() const { assert(isAABBNode()); return (const AABBNode_t<NodeRefPtr,N>*)ptr; }
/*! returns motion blur node pointer */
__forceinline AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() { assert(isAABBNodeMB() || isAABBNodeMB4D()); return ( AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() const { assert(isAABBNodeMB() || isAABBNodeMB4D()); return (const AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns 4D motion blur node pointer */
__forceinline AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() { assert(isAABBNodeMB4D()); return ( AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() const { assert(isAABBNodeMB4D()); return (const AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns unaligned node pointer */
__forceinline OBBNode_t<NodeRefPtr,N>* ungetAABBNode() { assert(isOBBNode()); return ( OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const OBBNode_t<NodeRefPtr,N>* ungetAABBNode() const { assert(isOBBNode()); return (const OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns unaligned motion blur node pointer */
__forceinline OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() { assert(isOBBNodeMB()); return ( OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() const { assert(isOBBNodeMB()); return (const OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns quantized node pointer */
__forceinline QuantizedNode_t<NodeRefPtr,N>* quantizedNode() { assert(isQuantizedNode()); return ( QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
__forceinline const QuantizedNode_t<NodeRefPtr,N>* quantizedNode() const { assert(isQuantizedNode()); return (const QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
/*! returns leaf pointer */
__forceinline char* leaf(size_t& num) const {
assert(isLeaf());
num = (ptr & (size_t)items_mask)-tyLeaf;
return (char*)(ptr & ~(size_t)align_mask);
}
/*! clear all bit flags */
__forceinline void clearFlags() {
ptr &= ~(size_t)align_mask;
}
/*! returns the wideness */
__forceinline size_t getN() const { return N; }
public:
size_t ptr;
};
}

View File

@@ -0,0 +1,258 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_refit.h"
#include "bvh_statistics.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../../common/algorithms/parallel_for.h"
namespace embree
{
namespace isa
{
static const size_t SINGLE_THREAD_THRESHOLD = 4*1024;
template<int N>
__forceinline bool compare(const typename BVHN<N>::NodeRef* a, const typename BVHN<N>::NodeRef* b)
{
size_t sa = *(size_t*)&a->node()->lower_x;
size_t sb = *(size_t*)&b->node()->lower_x;
return sa < sb;
}
template<int N>
BVHNRefitter<N>::BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds)
: bvh(bvh), leafBounds(leafBounds), numSubTrees(0)
{
}
template<int N>
void BVHNRefitter<N>::refit()
{
if (bvh->numPrimitives <= SINGLE_THREAD_THRESHOLD) {
bvh->bounds = LBBox3fa(recurse_bottom(bvh->root));
}
else
{
BBox3fa subTreeBounds[MAX_NUM_SUB_TREES];
numSubTrees = 0;
gather_subtree_refs(bvh->root,numSubTrees,0);
if (numSubTrees)
parallel_for(size_t(0), numSubTrees, size_t(1), [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
NodeRef& ref = subTrees[i];
subTreeBounds[i] = recurse_bottom(ref);
}
});
numSubTrees = 0;
bvh->bounds = LBBox3fa(refit_toplevel(bvh->root,numSubTrees,subTreeBounds,0));
}
}
template<int N>
void BVHNRefitter<N>::gather_subtree_refs(NodeRef& ref,
size_t &subtrees,
const size_t depth)
{
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
{
assert(subtrees < MAX_NUM_SUB_TREES);
subTrees[subtrees++] = ref;
return;
}
if (ref.isAABBNode())
{
AABBNode* node = ref.getAABBNode();
for (size_t i=0; i<N; i++) {
NodeRef& child = node->child(i);
if (unlikely(child == BVH::emptyNode)) continue;
gather_subtree_refs(child,subtrees,depth+1);
}
}
}
template<int N>
BBox3fa BVHNRefitter<N>::refit_toplevel(NodeRef& ref,
size_t &subtrees,
const BBox3fa *const subTreeBounds,
const size_t depth)
{
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
{
assert(subtrees < MAX_NUM_SUB_TREES);
assert(subTrees[subtrees] == ref);
return subTreeBounds[subtrees++];
}
if (ref.isAABBNode())
{
AABBNode* node = ref.getAABBNode();
BBox3fa bounds[N];
for (size_t i=0; i<N; i++)
{
NodeRef& child = node->child(i);
if (unlikely(child == BVH::emptyNode))
bounds[i] = BBox3fa(empty);
else
bounds[i] = refit_toplevel(child,subtrees,subTreeBounds,depth+1);
}
BBox3vf<N> boundsT = transpose<N>(bounds);
/* set new bounds */
node->lower_x = boundsT.lower.x;
node->lower_y = boundsT.lower.y;
node->lower_z = boundsT.lower.z;
node->upper_x = boundsT.upper.x;
node->upper_y = boundsT.upper.y;
node->upper_z = boundsT.upper.z;
return merge<N>(bounds);
}
else
return leafBounds.leafBounds(ref);
}
// =========================================================
// =========================================================
// =========================================================
template<int N>
BBox3fa BVHNRefitter<N>::recurse_bottom(NodeRef& ref)
{
/* this is a leaf node */
if (unlikely(ref.isLeaf()))
return leafBounds.leafBounds(ref);
/* recurse if this is an internal node */
AABBNode* node = ref.getAABBNode();
/* enable exclusive prefetch for >= AVX platforms */
#if defined(__AVX__)
BVH::prefetchW(ref);
#endif
BBox3fa bounds[N];
for (size_t i=0; i<N; i++)
if (unlikely(node->child(i) == BVH::emptyNode))
{
bounds[i] = BBox3fa(empty);
}
else
bounds[i] = recurse_bottom(node->child(i));
/* AOS to SOA transform */
BBox3vf<N> boundsT = transpose<N>(bounds);
/* set new bounds */
node->lower_x = boundsT.lower.x;
node->lower_y = boundsT.lower.y;
node->lower_z = boundsT.lower.z;
node->upper_x = boundsT.upper.x;
node->upper_y = boundsT.upper.y;
node->upper_z = boundsT.upper.z;
return merge<N>(bounds);
}
template<int N, typename Mesh, typename Primitive>
BVHNRefitT<N,Mesh,Primitive>::BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode)
: bvh(bvh), builder(builder), refitter(new BVHNRefitter<N>(bvh,*(typename BVHNRefitter<N>::LeafBoundsInterface*)this)), mesh(mesh), topologyVersion(0) {}
template<int N, typename Mesh, typename Primitive>
void BVHNRefitT<N,Mesh,Primitive>::clear()
{
if (builder)
builder->clear();
}
template<int N, typename Mesh, typename Primitive>
void BVHNRefitT<N,Mesh,Primitive>::build()
{
if (mesh->topologyChanged(topologyVersion)) {
topologyVersion = mesh->getTopologyVersion();
builder->build();
}
else
refitter->refit();
}
template class BVHNRefitter<4>;
#if defined(__AVX__)
template class BVHNRefitter<8>;
#endif
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4> ((BVH4*)accel,BVH4Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH4Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4v>((BVH4*)accel,BVH4Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH4Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4i>((BVH4*)accel,BVH4Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4> ((BVH8*)accel,BVH8Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH8Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4v>((BVH8*)accel,BVH8Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH8Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4i>((BVH8*)accel,BVH8Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,QuadMesh,Quad4v>((BVH4*)accel,BVH4Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,QuadMesh,Quad4v>((BVH8*)accel,BVH8Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
Builder* BVH4VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,UserGeometry,Object>((BVH4*)accel,BVH4VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
Builder* BVH8VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,UserGeometry,Object>((BVH8*)accel,BVH8VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH4InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,Instance,InstancePrimitive>((BVH4*)accel,BVH4InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH8InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,Instance,InstancePrimitive>((BVH8*)accel,BVH8InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH4InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)accel,BVH4InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH8InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)accel,BVH8InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#endif
#endif
}
}

View File

@@ -0,0 +1,95 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
namespace embree
{
namespace isa
{
template<int N>
class BVHNRefitter
{
public:
/*! Type shortcuts */
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
struct LeafBoundsInterface {
virtual const BBox3fa leafBounds(NodeRef& ref) const = 0;
};
public:
/*! Constructor. */
BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds);
/*! refits the BVH */
void refit();
private:
/* single-threaded subtree extraction based on BVH depth */
void gather_subtree_refs(NodeRef& ref,
size_t &subtrees,
const size_t depth = 0);
/* single-threaded top-level refit */
BBox3fa refit_toplevel(NodeRef& ref,
size_t &subtrees,
const BBox3fa *const subTreeBounds,
const size_t depth = 0);
/* single-threaded subtree refit */
BBox3fa recurse_bottom(NodeRef& ref);
public:
BVH* bvh; //!< BVH to refit
const LeafBoundsInterface& leafBounds; //!< calculates bounds of leaves
static const size_t MAX_SUB_TREE_EXTRACTION_DEPTH = (N==4) ? 4 : (N==8) ? 3 : 3;
static const size_t MAX_NUM_SUB_TREES = (N==4) ? 256 : (N==8) ? 512 : N*N*N; // N ^ MAX_SUB_TREE_EXTRACTION_DEPTH
size_t numSubTrees;
NodeRef subTrees[MAX_NUM_SUB_TREES];
};
template<int N, typename Mesh, typename Primitive>
class BVHNRefitT : public Builder, public BVHNRefitter<N>::LeafBoundsInterface
{
public:
/*! Type shortcuts */
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
public:
BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode);
virtual void build();
virtual void clear();
virtual const BBox3fa leafBounds (NodeRef& ref) const
{
size_t num; char* prim = ref.leaf(num);
if (unlikely(ref == BVH::emptyNode)) return empty;
BBox3fa bounds = empty;
for (size_t i=0; i<num; i++)
bounds.extend(((Primitive*)prim)[i].update(mesh));
return bounds;
}
private:
BVH* bvh;
std::unique_ptr<Builder> builder;
std::unique_ptr<BVHNRefitter<N>> refitter;
Mesh* mesh;
unsigned int topologyVersion;
};
}
}

View File

@@ -0,0 +1,127 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_rotate.h"
namespace embree
{
namespace isa
{
/*! Computes half surface area of box. */
__forceinline float halfArea3f(const BBox<vfloat4>& box) {
const vfloat4 d = box.size();
const vfloat4 a = d*shuffle<1,2,0,3>(d);
return a[0]+a[1]+a[2];
}
size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth)
{
/*! nothing to rotate if we reached a leaf node. */
if (parentRef.isBarrier()) return 0;
if (parentRef.isLeaf()) return 0;
AABBNode* parent = parentRef.getAABBNode();
/*! rotate all children first */
vint4 cdepth;
for (size_t c=0; c<4; c++)
cdepth[c] = (int)rotate(parent->child(c),depth+1);
/* compute current areas of all children */
vfloat4 sizeX = parent->upper_x-parent->lower_x;
vfloat4 sizeY = parent->upper_y-parent->lower_y;
vfloat4 sizeZ = parent->upper_z-parent->lower_z;
vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ);
/*! get node bounds */
BBox<vfloat4> child1_0,child1_1,child1_2,child1_3;
parent->bounds(child1_0,child1_1,child1_2,child1_3);
/*! Find best rotation. We pick a first child (child1) and a sub-child
(child2child) of a different second child (child2), and swap child1
and child2child. We perform the best such swap. */
float bestArea = 0;
size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1;
for (size_t c2=0; c2<4; c2++)
{
/*! ignore leaf nodes as we cannot descent into them */
if (parent->child(c2).isBarrier()) continue;
if (parent->child(c2).isLeaf()) continue;
AABBNode* child2 = parent->child(c2).getAABBNode();
/*! transpose child bounds */
BBox<vfloat4> child2c0,child2c1,child2c2,child2c3;
child2->bounds(child2c0,child2c1,child2c2,child2c3);
/*! put child1_0 at each child2 position */
float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3));
float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3));
float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3));
float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0));
vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03);
vfloat4 min0 = vreduce_min(cost0);
int pos0 = (int)bsf(movemask(min0 == cost0));
/*! put child1_1 at each child2 position */
float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3));
float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3));
float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3));
float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1));
vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13);
vfloat4 min1 = vreduce_min(cost1);
int pos1 = (int)bsf(movemask(min1 == cost1));
/*! put child1_2 at each child2 position */
float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3));
float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3));
float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3));
float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2));
vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23);
vfloat4 min2 = vreduce_min(cost2);
int pos2 = (int)bsf(movemask(min2 == cost2));
/*! put child1_3 at each child2 position */
float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3));
float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3));
float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3));
float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3));
vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33);
vfloat4 min3 = vreduce_min(cost3);
int pos3 = (int)bsf(movemask(min3 == cost3));
/*! find best other child */
vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]);
int pos[4] = { pos0,pos1,pos2,pos3 };
const size_t mbd = BVH4::maxBuildDepth;
vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints
valid &= vint4(int(c2)) != vint4(step);
if (none(valid)) continue;
size_t c1 = select_min(valid,area0123);
float area = area0123[c1];
if (c1 == c2) continue; // can happen if bounds are NANs
/*! accept a swap when it reduces cost and is not swapping a node with itself */
if (area < bestArea) {
bestArea = area;
bestChild1 = c1;
bestChild2 = c2;
bestChild2Child = pos[c1];
}
}
/*! if we did not find a swap that improves the SAH then do nothing */
if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth);
/*! perform the best found tree rotation */
AABBNode* child2 = parent->child(bestChild2).getAABBNode();
AABBNode::swap(parent,bestChild1,child2,bestChild2Child);
parent->setBounds(bestChild2,child2->bounds());
AABBNode::compact(parent);
AABBNode::compact(child2);
/*! This returned depth is conservative as the child that was
* pulled up in the tree could have been on the critical path. */
cdepth[bestChild1]++; // bestChild1 was pushed down one level
return 1+reduce_max(cdepth);
}
}
}

View File

@@ -0,0 +1,37 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
namespace embree
{
namespace isa
{
template<int N>
class BVHNRotate
{
typedef typename BVHN<N>::NodeRef NodeRef;
public:
static const bool enabled = false;
static __forceinline size_t rotate(NodeRef parentRef, size_t depth = 1) { return 0; }
static __forceinline void restructure(NodeRef ref, size_t depth = 1) {}
};
/* BVH4 tree rotations */
template<>
class BVHNRotate<4>
{
typedef BVH4::AABBNode AABBNode;
typedef BVH4::NodeRef NodeRef;
public:
static const bool enabled = true;
static size_t rotate(NodeRef parentRef, size_t depth = 1);
};
}
}

View File

@@ -0,0 +1,165 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_statistics.h"
#include "../../common/algorithms/parallel_reduce.h"
namespace embree
{
template<int N>
BVHNStatistics<N>::BVHNStatistics (BVH* bvh) : bvh(bvh)
{
double A = max(0.0f,bvh->getLinearBounds().expectedHalfArea());
stat = statistics(bvh->root,A,BBox1f(0.0f,1.0f));
}
template<int N>
std::string BVHNStatistics<N>::str()
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
stream << " primitives = " << bvh->numPrimitives << ", vertices = " << bvh->numVertices << ", depth = " << stat.depth << std::endl;
size_t totalBytes = stat.bytes(bvh);
double totalSAH = stat.sah(bvh);
stream << " total : sah = " << std::setw(7) << std::setprecision(3) << totalSAH << " (100.00%), ";
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << totalBytes/1E6 << " MB (100.00%), ";
stream << "#nodes = " << std::setw(7) << stat.size() << " (" << std::setw(6) << std::setprecision(2) << 100.0*stat.fillRate(bvh) << "% filled), ";
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(totalBytes)/double(bvh->numPrimitives) << std::endl;
if (stat.statAABBNodes.numNodes ) stream << " getAABBNodes : " << stat.statAABBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statOBBNodes.numNodes ) stream << " ungetAABBNodes : " << stat.statOBBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statAABBNodesMB.numNodes ) stream << " getAABBNodesMB : " << stat.statAABBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statAABBNodesMB4D.numNodes) stream << " getAABBNodesMB4D : " << stat.statAABBNodesMB4D.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statOBBNodesMB.numNodes) stream << " ungetAABBNodesMB : " << stat.statOBBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statQuantizedNodes.numNodes ) stream << " quantizedNodes : " << stat.statQuantizedNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
if (true) stream << " leaves : " << stat.statLeaf.toString(bvh,totalSAH,totalBytes) << std::endl;
if (true) stream << " histogram : " << stat.statLeaf.histToString() << std::endl;
return stream.str();
}
template<int N>
typename BVHNStatistics<N>::Statistics BVHNStatistics<N>::statistics(NodeRef node, const double A, const BBox1f t0t1)
{
Statistics s;
assert(t0t1.size() > 0.0f);
double dt = max(0.0f,t0t1.size());
if (node.isAABBNode())
{
AABBNode* n = node.getAABBNode();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extend(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statAABBNodes.numChildren++;
return s;
}, Statistics::add);
s.statAABBNodes.numNodes++;
s.statAABBNodes.nodeSAH += dt*A;
s.depth++;
}
else if (node.isOBBNode())
{
OBBNode* n = node.ungetAABBNode();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extent(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statOBBNodes.numChildren++;
return s;
}, Statistics::add);
s.statOBBNodes.numNodes++;
s.statOBBNodes.nodeSAH += dt*A;
s.depth++;
}
else if (node.isAABBNodeMB())
{
AABBNodeMB* n = node.getAABBNodeMB();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,n->expectedHalfArea(i,t0t1));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statAABBNodesMB.numChildren++;
return s;
}, Statistics::add);
s.statAABBNodesMB.numNodes++;
s.statAABBNodesMB.nodeSAH += dt*A;
s.depth++;
}
else if (node.isAABBNodeMB4D())
{
AABBNodeMB4D* n = node.getAABBNodeMB4D();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const BBox1f t0t1i = intersect(t0t1,n->timeRange(i));
assert(!t0t1i.empty());
const double Ai = n->AABBNodeMB::expectedHalfArea(i,t0t1i);
Statistics s = statistics(n->child(i),Ai,t0t1i);
s.statAABBNodesMB4D.numChildren++;
return s;
}, Statistics::add);
s.statAABBNodesMB4D.numNodes++;
s.statAABBNodesMB4D.nodeSAH += dt*A;
s.depth++;
}
else if (node.isOBBNodeMB())
{
OBBNodeMB* n = node.ungetAABBNodeMB();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extent0(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statOBBNodesMB.numChildren++;
return s;
}, Statistics::add);
s.statOBBNodesMB.numNodes++;
s.statOBBNodesMB.nodeSAH += dt*A;
s.depth++;
}
else if (node.isQuantizedNode())
{
QuantizedNode* n = node.quantizedNode();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extent(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statQuantizedNodes.numChildren++;
return s;
}, Statistics::add);
s.statQuantizedNodes.numNodes++;
s.statQuantizedNodes.nodeSAH += dt*A;
s.depth++;
}
else if (node.isLeaf())
{
size_t num; const char* tri = node.leaf(num);
if (num)
{
for (size_t i=0; i<num; i++)
{
const size_t bytes = bvh->primTy->getBytes(tri);
s.statLeaf.numPrimsActive += bvh->primTy->sizeActive(tri);
s.statLeaf.numPrimsTotal += bvh->primTy->sizeTotal(tri);
s.statLeaf.numBytes += bytes;
tri+=bytes;
}
s.statLeaf.numLeaves++;
s.statLeaf.numPrimBlocks += num;
s.statLeaf.leafSAH += dt*A*num;
if (num-1 < Statistics::LeafStat::NHIST) {
s.statLeaf.numPrimBlocksHistogram[num-1]++;
}
}
}
else {
abort(); //throw std::runtime_error("not supported node type in bvh_statistics");
}
return s;
}
#if defined(__AVX__)
template class BVHNStatistics<8>;
#endif
#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__)
template class BVHNStatistics<4>;
#endif
}

View File

@@ -0,0 +1,285 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include <sstream>
namespace embree
{
template<int N>
class BVHNStatistics
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::OBBNode OBBNode;
typedef typename BVH::AABBNodeMB AABBNodeMB;
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
typedef typename BVH::OBBNodeMB OBBNodeMB;
typedef typename BVH::QuantizedNode QuantizedNode;
typedef typename BVH::NodeRef NodeRef;
struct Statistics
{
template<typename Node>
struct NodeStat
{
NodeStat ( double nodeSAH = 0,
size_t numNodes = 0,
size_t numChildren = 0)
: nodeSAH(nodeSAH),
numNodes(numNodes),
numChildren(numChildren) {}
double sah(BVH* bvh) const {
return nodeSAH/bvh->getLinearBounds().expectedHalfArea();
}
size_t bytes() const {
return numNodes*sizeof(Node);
}
size_t size() const {
return numNodes;
}
double fillRateNom () const { return double(numChildren); }
double fillRateDen () const { return double(numNodes*N); }
double fillRate () const { return fillRateNom()/fillRateDen(); }
__forceinline friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
{
return NodeStat(a.nodeSAH + b.nodeSAH,
a.numNodes+b.numNodes,
a.numChildren+b.numChildren);
}
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << bytes()/1E6 << " MB ";
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes())/double(bytesTotal) << "%), ";
stream << "#nodes = " << std::setw(7) << numNodes << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate() << "% filled), ";
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes())/double(bvh->numPrimitives);
return stream.str();
}
public:
double nodeSAH;
size_t numNodes;
size_t numChildren;
};
struct LeafStat
{
static const int NHIST = 8;
LeafStat ( double leafSAH = 0.0f,
size_t numLeaves = 0,
size_t numPrimsActive = 0,
size_t numPrimsTotal = 0,
size_t numPrimBlocks = 0,
size_t numBytes = 0)
: leafSAH(leafSAH),
numLeaves(numLeaves),
numPrimsActive(numPrimsActive),
numPrimsTotal(numPrimsTotal),
numPrimBlocks(numPrimBlocks),
numBytes(numBytes)
{
for (size_t i=0; i<NHIST; i++)
numPrimBlocksHistogram[i] = 0;
}
double sah(BVH* bvh) const {
return leafSAH/bvh->getLinearBounds().expectedHalfArea();
}
size_t bytes(BVH* bvh) const {
return numBytes;
}
size_t size() const {
return numLeaves;
}
double fillRateNom (BVH* bvh) const { return double(numPrimsActive); }
double fillRateDen (BVH* bvh) const { return double(numPrimsTotal); }
double fillRate (BVH* bvh) const { return fillRateNom(bvh)/fillRateDen(bvh); }
__forceinline friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
{
LeafStat stat(a.leafSAH + b.leafSAH,
a.numLeaves+b.numLeaves,
a.numPrimsActive+b.numPrimsActive,
a.numPrimsTotal+b.numPrimsTotal,
a.numPrimBlocks+b.numPrimBlocks,
a.numBytes+b.numBytes);
for (size_t i=0; i<NHIST; i++) {
stat.numPrimBlocksHistogram[i] += a.numPrimBlocksHistogram[i];
stat.numPrimBlocksHistogram[i] += b.numPrimBlocksHistogram[i];
}
return stat;
}
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << double(bytes(bvh))/1E6 << " MB ";
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes(bvh))/double(bytesTotal) << "%), ";
stream << "#nodes = " << std::setw(7) << numLeaves << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate(bvh) << "% filled), ";
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes(bvh))/double(bvh->numPrimitives);
return stream.str();
}
std::string histToString() const
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
for (size_t i=0; i<NHIST; i++)
stream << std::setw(6) << std::setprecision(2) << 100.0f*float(numPrimBlocksHistogram[i])/float(numLeaves) << "% ";
return stream.str();
}
public:
double leafSAH; //!< SAH of the leaves only
size_t numLeaves; //!< Number of leaf nodes.
size_t numPrimsActive; //!< Number of active primitives (
size_t numPrimsTotal; //!< Number of active and inactive primitives
size_t numPrimBlocks; //!< Number of primitive blocks.
size_t numBytes; //!< Number of bytes of leaves.
size_t numPrimBlocksHistogram[8];
};
public:
Statistics (size_t depth = 0,
LeafStat statLeaf = LeafStat(),
NodeStat<AABBNode> statAABBNodes = NodeStat<AABBNode>(),
NodeStat<OBBNode> statOBBNodes = NodeStat<OBBNode>(),
NodeStat<AABBNodeMB> statAABBNodesMB = NodeStat<AABBNodeMB>(),
NodeStat<AABBNodeMB4D> statAABBNodesMB4D = NodeStat<AABBNodeMB4D>(),
NodeStat<OBBNodeMB> statOBBNodesMB = NodeStat<OBBNodeMB>(),
NodeStat<QuantizedNode> statQuantizedNodes = NodeStat<QuantizedNode>())
: depth(depth),
statLeaf(statLeaf),
statAABBNodes(statAABBNodes),
statOBBNodes(statOBBNodes),
statAABBNodesMB(statAABBNodesMB),
statAABBNodesMB4D(statAABBNodesMB4D),
statOBBNodesMB(statOBBNodesMB),
statQuantizedNodes(statQuantizedNodes) {}
double sah(BVH* bvh) const
{
return statLeaf.sah(bvh) +
statAABBNodes.sah(bvh) +
statOBBNodes.sah(bvh) +
statAABBNodesMB.sah(bvh) +
statAABBNodesMB4D.sah(bvh) +
statOBBNodesMB.sah(bvh) +
statQuantizedNodes.sah(bvh);
}
size_t bytes(BVH* bvh) const {
return statLeaf.bytes(bvh) +
statAABBNodes.bytes() +
statOBBNodes.bytes() +
statAABBNodesMB.bytes() +
statAABBNodesMB4D.bytes() +
statOBBNodesMB.bytes() +
statQuantizedNodes.bytes();
}
size_t size() const
{
return statLeaf.size() +
statAABBNodes.size() +
statOBBNodes.size() +
statAABBNodesMB.size() +
statAABBNodesMB4D.size() +
statOBBNodesMB.size() +
statQuantizedNodes.size();
}
double fillRate (BVH* bvh) const
{
double nom = statLeaf.fillRateNom(bvh) +
statAABBNodes.fillRateNom() +
statOBBNodes.fillRateNom() +
statAABBNodesMB.fillRateNom() +
statAABBNodesMB4D.fillRateNom() +
statOBBNodesMB.fillRateNom() +
statQuantizedNodes.fillRateNom();
double den = statLeaf.fillRateDen(bvh) +
statAABBNodes.fillRateDen() +
statOBBNodes.fillRateDen() +
statAABBNodesMB.fillRateDen() +
statAABBNodesMB4D.fillRateDen() +
statOBBNodesMB.fillRateDen() +
statQuantizedNodes.fillRateDen();
return nom/den;
}
friend Statistics operator+ ( const Statistics& a, const Statistics& b )
{
return Statistics(max(a.depth,b.depth),
a.statLeaf + b.statLeaf,
a.statAABBNodes + b.statAABBNodes,
a.statOBBNodes + b.statOBBNodes,
a.statAABBNodesMB + b.statAABBNodesMB,
a.statAABBNodesMB4D + b.statAABBNodesMB4D,
a.statOBBNodesMB + b.statOBBNodesMB,
a.statQuantizedNodes + b.statQuantizedNodes);
}
static Statistics add ( const Statistics& a, const Statistics& b ) {
return a+b;
}
public:
size_t depth;
LeafStat statLeaf;
NodeStat<AABBNode> statAABBNodes;
NodeStat<OBBNode> statOBBNodes;
NodeStat<AABBNodeMB> statAABBNodesMB;
NodeStat<AABBNodeMB4D> statAABBNodesMB4D;
NodeStat<OBBNodeMB> statOBBNodesMB;
NodeStat<QuantizedNode> statQuantizedNodes;
};
public:
/* Constructor gathers statistics. */
BVHNStatistics (BVH* bvh);
/*! Convert statistics into a string */
std::string str();
double sah() const {
return stat.sah(bvh);
}
size_t bytesUsed() const {
return stat.bytes(bvh);
}
private:
Statistics statistics(NodeRef node, const double A, const BBox1f dt);
private:
BVH* bvh;
Statistics stat;
};
typedef BVHNStatistics<4> BVH4Statistics;
typedef BVHNStatistics<8> BVH8Statistics;
}

View File

@@ -0,0 +1,466 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "node_intersector1.h"
#include "../common/stack_item.h"
#define NEW_SORTING_CODE 1
namespace embree
{
namespace isa
{
/*! BVH regular node traversal for single rays. */
template<int N, int types>
class BVHNNodeTraverser1Hit;
#if defined(__AVX512VL__) // SKX
template<int N>
__forceinline void isort_update(vint<N> &dist, const vint<N> &d)
{
const vint<N> dist_shift = align_shift_right<N-1>(dist,dist);
const vboolf<N> m_geq = d >= dist;
const vboolf<N> m_geq_shift = m_geq << 1;
dist = select(m_geq,d,dist);
dist = select(m_geq_shift,dist_shift,dist);
}
template<int N>
__forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) {
dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero)));
}
__forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) {
return toScalar(permutex2var((__m256i)index,n0,n1));
}
__forceinline float permuteExtract(const vint8& index, const vfloat8& n) {
return toScalar(permute(n,index));
}
#endif
/* Specialization for BVH4. */
template<int types>
class BVHNNodeTraverser1Hit<4, types>
{
typedef BVH4 BVH;
typedef BVH4::NodeRef NodeRef;
typedef BVH4::BaseNode BaseNode;
public:
/* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */
static __forceinline void traverseClosestHit(NodeRef& cur,
size_t mask,
const vfloat4& tNear,
StackItemT<NodeRef>*& stackPtr,
StackItemT<NodeRef>* stackEnd)
{
assert(mask != 0);
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
if (likely(mask == 0)) {
assert(cur != BVH::emptyNode);
return;
}
/*! two children are hit, push far child, and continue with closer child */
NodeRef c0 = cur;
const unsigned int d0 = ((unsigned int*)&tNear)[r];
r = bscf(mask);
NodeRef c1 = node->child(r);
BVH::prefetch(c1,types);
const unsigned int d1 = ((unsigned int*)&tNear)[r];
assert(c0 != BVH::emptyNode);
assert(c1 != BVH::emptyNode);
if (likely(mask == 0)) {
assert(stackPtr < stackEnd);
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
}
#if NEW_SORTING_CODE == 1
vint4 s0((size_t)c0,(size_t)d0);
vint4 s1((size_t)c1,(size_t)d1);
r = bscf(mask);
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
vint4 s2((size_t)c2,(size_t)d2);
/* 3 hits */
if (likely(mask == 0)) {
StackItemT<NodeRef>::sort3(s0,s1,s2);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
cur = toSizeT(s2);
stackPtr+=2;
return;
}
r = bscf(mask);
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
vint4 s3((size_t)c3,(size_t)d3);
/* 4 hits */
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
cur = toSizeT(s3);
stackPtr+=3;
#else
/*! Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there. */
assert(stackPtr < stackEnd);
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
assert(stackPtr < stackEnd);
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (likely(mask == 0)) {
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
return;
}
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
#endif
}
/* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */
static __forceinline void traverseAnyHit(NodeRef& cur,
size_t mask,
const vfloat4& tNear,
NodeRef*& stackPtr,
NodeRef* stackEnd)
{
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
/* simpler in sequence traversal order */
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
for (; ;)
{
r = bscf(mask);
cur = node->child(r); BVH::prefetch(cur,types);
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
}
}
};
/* Specialization for BVH8. */
template<int types>
class BVHNNodeTraverser1Hit<8, types>
{
typedef BVH8 BVH;
typedef BVH8::NodeRef NodeRef;
typedef BVH8::BaseNode BaseNode;
#if defined(__AVX512VL__)
template<class NodeRef, class BaseNode>
static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur,
size_t mask,
const vfloat8& tNear,
StackItemT<NodeRef>*& stackPtr,
StackItemT<NodeRef>* stackEnd)
{
assert(mask != 0);
const BaseNode* node = cur.baseNode();
const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]);
const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]);
vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step);
distance_i = vint8::compact((int)mask,distance_i,distance_i);
cur = permuteExtract(distance_i,n0,n1);
BVH::prefetch(cur,types);
mask &= mask-1;
if (likely(mask == 0)) return;
/* 2 hits: order A0 B0 */
const vint8 d0(distance_i);
const vint8 d1(shuffle<1>(distance_i));
cur = permuteExtract(d1,n0,n1);
BVH::prefetch(cur,types);
const vint8 dist_A0 = min(d0, d1);
const vint8 dist_B0 = max(d0, d1);
assert(dist_A0[0] < dist_B0[0]);
mask &= mask-1;
if (likely(mask == 0)) {
cur = permuteExtract(dist_A0,n0,n1);
stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1);
*(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear);
stackPtr++;
return;
}
/* 3 hits: order A1 B1 C1 */
const vint8 d2(shuffle<2>(distance_i));
cur = permuteExtract(d2,n0,n1);
BVH::prefetch(cur,types);
const vint8 dist_A1 = min(dist_A0,d2);
const vint8 dist_tmp_B1 = max(dist_A0,d2);
const vint8 dist_B1 = min(dist_B0,dist_tmp_B1);
const vint8 dist_C1 = max(dist_B0,dist_tmp_B1);
assert(dist_A1[0] < dist_B1[0]);
assert(dist_B1[0] < dist_C1[0]);
mask &= mask-1;
if (likely(mask == 0)) {
cur = permuteExtract(dist_A1,n0,n1);
stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1);
*(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear);
stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1);
*(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear);
stackPtr+=2;
return;
}
/* 4 hits: order A2 B2 C2 D2 */
const vint8 d3(shuffle<3>(distance_i));
cur = permuteExtract(d3,n0,n1);
BVH::prefetch(cur,types);
const vint8 dist_A2 = min(dist_A1,d3);
const vint8 dist_tmp_B2 = max(dist_A1,d3);
const vint8 dist_B2 = min(dist_B1,dist_tmp_B2);
const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2);
const vint8 dist_C2 = min(dist_C1,dist_tmp_C2);
const vint8 dist_D2 = max(dist_C1,dist_tmp_C2);
assert(dist_A2[0] < dist_B2[0]);
assert(dist_B2[0] < dist_C2[0]);
assert(dist_C2[0] < dist_D2[0]);
mask &= mask-1;
if (likely(mask == 0)) {
cur = permuteExtract(dist_A2,n0,n1);
stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1);
*(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear);
stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1);
*(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear);
stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1);
*(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear);
stackPtr+=3;
return;
}
/* >=5 hits: reverse to descending order for writing to stack */
distance_i = align_shift_right<3>(distance_i,distance_i);
const size_t hits = 4 + popcnt(mask);
vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert
isort_quick_update<8>(dist,dist_A2);
isort_quick_update<8>(dist,dist_B2);
isort_quick_update<8>(dist,dist_C2);
isort_quick_update<8>(dist,dist_D2);
do {
distance_i = align_shift_right<1>(distance_i,distance_i);
cur = permuteExtract(distance_i,n0,n1);
BVH::prefetch(cur,types);
const vint8 new_dist(permute(distance_i,vint8(zero)));
mask &= mask-1;
isort_update<8>(dist,new_dist);
} while(mask);
for (size_t i=0; i<7; i++)
assert(dist[i+0]>=dist[i+1]);
for (size_t i=0;i<hits-1;i++)
{
stackPtr->ptr = permuteExtract(dist,n0,n1);
*(float*)&stackPtr->dist = permuteExtract(dist,tNear);
dist = align_shift_right<1>(dist,dist);
stackPtr++;
}
cur = permuteExtract(dist,n0,n1);
}
#endif
public:
static __forceinline void traverseClosestHit(NodeRef& cur,
size_t mask,
const vfloat8& tNear,
StackItemT<NodeRef>*& stackPtr,
StackItemT<NodeRef>* stackEnd)
{
assert(mask != 0);
#if defined(__AVX512VL__)
traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
#else
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
if (likely(mask == 0)) {
assert(cur != BVH::emptyNode);
return;
}
/*! two children are hit, push far child, and continue with closer child */
NodeRef c0 = cur;
const unsigned int d0 = ((unsigned int*)&tNear)[r];
r = bscf(mask);
NodeRef c1 = node->child(r);
BVH::prefetch(c1,types);
const unsigned int d1 = ((unsigned int*)&tNear)[r];
assert(c0 != BVH::emptyNode);
assert(c1 != BVH::emptyNode);
if (likely(mask == 0)) {
assert(stackPtr < stackEnd);
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
}
#if NEW_SORTING_CODE == 1
vint4 s0((size_t)c0,(size_t)d0);
vint4 s1((size_t)c1,(size_t)d1);
r = bscf(mask);
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
vint4 s2((size_t)c2,(size_t)d2);
/* 3 hits */
if (likely(mask == 0)) {
StackItemT<NodeRef>::sort3(s0,s1,s2);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
cur = toSizeT(s2);
stackPtr+=2;
return;
}
r = bscf(mask);
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
vint4 s3((size_t)c3,(size_t)d3);
/* 4 hits */
if (likely(mask == 0)) {
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
cur = toSizeT(s3);
stackPtr+=3;
return;
}
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3;
/*! fallback case if more than 4 children are hit */
StackItemT<NodeRef>* stackFirst = stackPtr;
stackPtr+=4;
while (1)
{
assert(stackPtr < stackEnd);
r = bscf(mask);
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r];
const vint4 s((size_t)c,(size_t)d);
*(vint4*)stackPtr++ = s;
assert(c != BVH::emptyNode);
if (unlikely(mask == 0)) break;
}
sort(stackFirst,stackPtr);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
#else
/*! Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there. */
assert(stackPtr < stackEnd);
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
assert(stackPtr < stackEnd);
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (likely(mask == 0)) {
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
return;
}
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (likely(mask == 0)) {
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
return;
}
/*! fallback case if more than 4 children are hit */
StackItemT<NodeRef>* stackFirst = stackPtr-4;
while (1)
{
assert(stackPtr < stackEnd);
r = bscf(mask);
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (unlikely(mask == 0)) break;
}
sort(stackFirst,stackPtr);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
#endif
#endif
}
static __forceinline void traverseAnyHit(NodeRef& cur,
size_t mask,
const vfloat8& tNear,
NodeRef*& stackPtr,
NodeRef* stackEnd)
{
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
/* simpler in sequence traversal order */
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
for (; ;)
{
r = bscf(mask);
cur = node->child(r); BVH::prefetch(cur,types);
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
}
}
};
}
}

View File

@@ -0,0 +1,31 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
namespace embree
{
namespace isa
{
struct NearFarPrecalculations
{
size_t nearX, nearY, nearZ;
size_t farX, farY, farZ;
__forceinline NearFarPrecalculations() {}
__forceinline NearFarPrecalculations(const Vec3fa& dir, size_t N)
{
const size_t size = sizeof(float)*N;
nearX = (dir.x < 0.0f) ? 1*size : 0*size;
nearY = (dir.y < 0.0f) ? 3*size : 2*size;
nearZ = (dir.z < 0.0f) ? 5*size : 4*size;
farX = nearX ^ size;
farY = nearY ^ size;
farZ = nearZ ^ size;
}
};
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,257 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "node_intersector.h"
namespace embree
{
namespace isa
{
//////////////////////////////////////////////////////////////////////////////////////
// Frustum structure used in hybrid and stream traversal
//////////////////////////////////////////////////////////////////////////////////////
/*
Optimized frustum test. We calculate t=(p-org)/dir in ray/box
intersection. We assume the rays are split by octant, thus
dir intervals are either positive or negative in each
dimension.
Case 1: dir.min >= 0 && dir.max >= 0:
t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
Case 2: dir.min < 0 && dir.max < 0:
t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
*/
template<bool robust>
struct Frustum;
/* Fast variant */
template<>
struct Frustum<false>
{
__forceinline Frustum() {}
template<int K>
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
reduce_min(select(valid, org.y, pos_inf)),
reduce_min(select(valid, org.z, pos_inf)));
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
reduce_max(select(valid, org.y, neg_inf)),
reduce_max(select(valid, org.z, neg_inf)));
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
reduce_min(select(valid, rdir.y, pos_inf)),
reduce_min(select(valid, rdir.z, pos_inf)));
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
reduce_max(select(valid, rdir.y, neg_inf)),
reduce_max(select(valid, rdir.z, neg_inf)));
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
}
__forceinline void init(const Vec3fa& reduced_min_org,
const Vec3fa& reduced_max_org,
const Vec3fa& reduced_min_rdir,
const Vec3fa& reduced_max_rdir,
float reduced_min_dist,
float reduced_max_dist,
int N)
{
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
#if defined (__aarch64__)
neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
#else
min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
#endif
min_dist = reduced_min_dist;
max_dist = reduced_max_dist;
nf = NearFarPrecalculations(min_rdir, N);
}
template<int K>
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
{
max_dist = reduce_max(ray_tfar);
}
NearFarPrecalculations nf;
Vec3fa min_rdir;
Vec3fa max_rdir;
#if defined (__aarch64__)
Vec3fa neg_min_org_rdir;
Vec3fa neg_max_org_rdir;
#else
Vec3fa min_org_rdir;
Vec3fa max_org_rdir;
#endif
float min_dist;
float max_dist;
};
typedef Frustum<false> FrustumFast;
/* Robust variant */
template<>
struct Frustum<true>
{
__forceinline Frustum() {}
template<int K>
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
reduce_min(select(valid, org.y, pos_inf)),
reduce_min(select(valid, org.z, pos_inf)));
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
reduce_max(select(valid, org.y, neg_inf)),
reduce_max(select(valid, org.z, neg_inf)));
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
reduce_min(select(valid, rdir.y, pos_inf)),
reduce_min(select(valid, rdir.z, pos_inf)));
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
reduce_max(select(valid, rdir.y, neg_inf)),
reduce_max(select(valid, rdir.z, neg_inf)));
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
}
__forceinline void init(const Vec3fa& reduced_min_org,
const Vec3fa& reduced_max_org,
const Vec3fa& reduced_min_rdir,
const Vec3fa& reduced_max_rdir,
float reduced_min_dist,
float reduced_max_dist,
int N)
{
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
min_dist = reduced_min_dist;
max_dist = reduced_max_dist;
nf = NearFarPrecalculations(min_rdir, N);
}
template<int K>
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
{
max_dist = reduce_max(ray_tfar);
}
NearFarPrecalculations nf;
Vec3fa min_rdir;
Vec3fa max_rdir;
Vec3fa min_org;
Vec3fa max_org;
float min_dist;
float max_dist;
};
typedef Frustum<true> FrustumRobust;
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N>
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
const FrustumFast& frustum, vfloat<N>& dist)
{
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
#if defined (__aarch64__)
const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
#else
const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
#endif
const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
dist = fmin;
const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
const vbool<N> vmask_node_hit = fmin <= fmax;
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
return m_node;
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N>
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
const FrustumRobust& frustum, vfloat<N>& dist)
{
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
const float round_up = 1.0f+2.0f*float(ulp);
const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
dist = fmin;
const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
return m_node;
}
}
}

View File

@@ -0,0 +1,844 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "node_intersector.h"
namespace embree
{
namespace isa
{
//////////////////////////////////////////////////////////////////////////////////////
// Ray packet structure used in hybrid traversal
//////////////////////////////////////////////////////////////////////////////////////
template<int K, bool robust>
struct TravRayK;
/* Fast variant */
template<int K>
struct TravRayK<K, false>
{
__forceinline TravRayK() {}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
init(ray_org, ray_dir, N);
}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
init(ray_org, ray_dir, N);
tnear = ray_tnear;
tfar = ray_tfar;
}
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
org = ray_org;
dir = ray_dir;
rdir = rcp_safe(ray_dir);
#if defined(__aarch64__)
neg_org_rdir = -(org * rdir);
#elif defined(__AVX2__)
org_rdir = org * rdir;
#endif
if (N)
{
const int size = sizeof(float)*N;
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
}
}
Vec3vf<K> org;
Vec3vf<K> dir;
Vec3vf<K> rdir;
#if defined(__aarch64__)
Vec3vf<K> neg_org_rdir;
#elif defined(__AVX2__)
Vec3vf<K> org_rdir;
#endif
Vec3vi<K> nearXYZ;
vfloat<K> tnear;
vfloat<K> tfar;
};
template<int K>
using TravRayKFast = TravRayK<K, false>;
/* Robust variant */
template<int K>
struct TravRayK<K, true>
{
__forceinline TravRayK() {}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
init(ray_org, ray_dir, N);
}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
init(ray_org, ray_dir, N);
tnear = ray_tnear;
tfar = ray_tfar;
}
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
org = ray_org;
dir = ray_dir;
rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
if (N)
{
const int size = sizeof(float)*N;
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
}
}
Vec3vf<K> org;
Vec3vf<K> dir;
Vec3vf<K> rdir;
Vec3vi<K> nearXYZ;
vfloat<K> tnear;
vfloat<K> tfar;
};
template<int K>
using TravRayKRobust = TravRayK<K, true>;
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
const TravRayKFast<K>& ray, vfloat<K>& dist)
{
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
#endif
#if defined(__AVX512F__) // SKX
if (K == 16)
{
/* use mixed float/int min/max */
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
#if defined(__AVX512F__) // SKX
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
#else
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
#endif
dist = lnearP;
return lhit;
}
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
const TravRayKRobust<K>& ray, vfloat<K>& dist)
{
// FIXME: use per instruction rounding for AVX512
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNodeMB intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
#endif
#if defined(__AVX512F__) // SKX
if (K == 16)
{
/* use mixed float/int min/max */
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
#if defined(__AVX512F__) // SKX
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
#else
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
#endif
dist = lnearP;
return lhit;
}
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNodeMB intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
#if defined(__AVX512F__) // SKX
if (K == 16)
{
const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNodeMB4D intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
#endif
const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
if (unlikely(ref.isAABBNodeMB4D())) {
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
}
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNodeMB4D intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
if (unlikely(ref.isAABBNodeMB4D())) {
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
}
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast OBBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K, bool robust>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
const TravRayK<K,robust>& ray, vfloat<K>& dist)
{
const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
const Vec3vf<K> org = xfmPoint(naabb, ray.org);
const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
const vfloat<K> lclipMinY = org.y * nrdir.y;
const vfloat<K> lclipMinZ = org.z * nrdir.z;
const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
if (robust) {
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
}
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast OBBNodeMB intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K, bool robust>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
const Vec3vf<K> b0_lower = zero;
const Vec3vf<K> b0_upper = one;
const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
const Vec3vf<K> rdir = rcp_safe(dir);
const Vec3vf<K> org = xfmPoint(xfm, ray.org);
const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
if (robust) {
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
}
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// QuantizedBaseNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
const TravRayK<K,false>& ray, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<N> lower_x = node->dequantizeLowerX();
const vfloat<N> upper_x = node->dequantizeUpperX();
const vfloat<N> lower_y = node->dequantizeLowerY();
const vfloat<N> upper_y = node->dequantizeUpperY();
const vfloat<N> lower_z = node->dequantizeLowerZ();
const vfloat<N> upper_z = node->dequantizeUpperZ();
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
#endif
#if defined(__AVX512F__) // SKX
if (K == 16)
{
/* use mixed float/int min/max */
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
#if defined(__AVX512F__) // SKX
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
#else
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
#endif
dist = lnearP;
return lhit;
}
}
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
const TravRayK<K,true>& ray, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<N> lower_x = node->dequantizeLowerX();
const vfloat<N> upper_x = node->dequantizeUpperX();
const vfloat<N> lower_y = node->dequantizeLowerY();
const vfloat<N> upper_y = node->dequantizeUpperY();
const vfloat<N> lower_z = node->dequantizeLowerZ();
const vfloat<N> upper_z = node->dequantizeUpperZ();
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
#endif
const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Node intersectors used in hybrid traversal
//////////////////////////////////////////////////////////////////////////////////////
/*! Intersects N nodes with K rays */
template<int N, int K, int types, bool robust>
struct BVHNNodeIntersectorK;
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
{
/* vmask is both an input and an output parameter! Its initial value should be the parent node
hit mask, which is used for correctly computing the current hit mask. The parent hit mask
is actually required only for motion blur node intersections (because different rays may
have different times), so for regular nodes vmask is simply overwritten. */
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
assert(node.isOBBNodeMB());
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
}
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
assert(node.isOBBNodeMB());
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
}
return true;
}
};
/*! Intersects N nodes with K rays */
template<int N, int K, bool robust>
struct BVHNQuantizedBaseNodeIntersectorK;
template<int N, int K>
struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
{
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
const TravRayK<K,false>& ray, vfloat<K>& dist)
{
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
}
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
}
};
template<int N, int K>
struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
{
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
const TravRayK<K,true>& ray, vfloat<K>& dist)
{
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
}
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
}
};
}
}