initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

View File

@@ -0,0 +1,321 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
namespace embree
{
namespace isa
{
struct Cone
{
const Vec3fa p0; //!< start position of cone
const Vec3fa p1; //!< end position of cone
const float r0; //!< start radius of cone
const float r1; //!< end radius of cone
__forceinline Cone(const Vec3fa& p0, const float r0, const Vec3fa& p1, const float r1)
: p0(p0), p1(p1), r0(r0), r1(r1) {}
__forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir,
BBox1f& t_o,
float& u0_o, Vec3fa& Ng0_o,
float& u1_o, Vec3fa& Ng1_o) const
{
/* calculate quadratic equation to solve */
const Vec3fa v0 = p0-org;
const Vec3fa v1 = p1-org;
const float rl = rcp_length(v1-v0);
const Vec3fa P0 = v0, dP = (v1-v0)*rl;
const float dr = (r1-r0)*rl;
const Vec3fa O = -P0, dO = dir;
const float dOdO = dot(dO,dO);
const float OdO = dot(dO,O);
const float OO = dot(O,O);
const float dOz = dot(dP,dO);
const float Oz = dot(dP,O);
const float R = r0 + Oz*dr;
const float A = dOdO - sqr(dOz) * (1.0f+sqr(dr));
const float B = 2.0f * (OdO - dOz*(Oz + R*dr));
const float C = OO - (sqr(Oz) + sqr(R));
/* we miss the cone if determinant is smaller than zero */
const float D = B*B - 4.0f*A*C;
if (D < 0.0f) return false;
/* special case for rays that are "parallel" to the cone */
const float eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
if (unlikely(abs(A) < eps))
{
/* cylinder case */
if (abs(dr) < 16.0f*float(ulp)) {
if (C <= 0.0f) { t_o = BBox1f(neg_inf,pos_inf); return true; }
else { t_o = BBox1f(pos_inf,neg_inf); return false; }
}
/* cone case */
else
{
/* if we hit the negative cone there cannot be a hit */
const float t = -C/B;
const float z0 = Oz+t*dOz;
const float z0r = r0+z0*dr;
if (z0r < 0.0f) return false;
/* test if we start inside or outside the cone */
if (dOz*dr > 0.0f) t_o = BBox1f(t,pos_inf);
else t_o = BBox1f(neg_inf,t);
}
}
/* standard case for "non-parallel" rays */
else
{
const float Q = sqrt(D);
const float rcp_2A = rcp(2.0f*A);
t_o.lower = (-B-Q)*rcp_2A;
t_o.upper = (-B+Q)*rcp_2A;
/* standard case where both hits are on same cone */
if (likely(A > 0.0f)) {
const float z0 = Oz+t_o.lower*dOz;
const float z0r = r0+z0*dr;
if (z0r < 0.0f) return false;
}
/* special case where the hits are on the positive and negative cone */
else
{
/* depending on the ray direction and the open direction
* of the cone we have a hit from inside or outside the
* cone */
if (dOz*dr > 0) t_o.upper = pos_inf;
else t_o.lower = neg_inf;
}
}
/* calculates u and Ng for near hit */
{
u0_o = (Oz+t_o.lower*dOz)*rl;
const Vec3fa Pr = t_o.lower*dir;
const Vec3fa Pl = v0 + u0_o*(v1-v0);
const Vec3fa R = normalize(Pr-Pl);
const Vec3fa U = (p1-p0)+(r1-r0)*R;
const Vec3fa V = cross(p1-p0,R);
Ng0_o = cross(V,U);
}
/* calculates u and Ng for far hit */
{
u1_o = (Oz+t_o.upper*dOz)*rl;
const Vec3fa Pr = t_o.upper*dir;
const Vec3fa Pl = v0 + u1_o*(v1-v0);
const Vec3fa R = normalize(Pr-Pl);
const Vec3fa U = (p1-p0)+(r1-r0)*R;
const Vec3fa V = cross(p1-p0,R);
Ng1_o = cross(V,U);
}
return true;
}
__forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir, BBox1f& t_o) const
{
float u0_o; Vec3fa Ng0_o; float u1_o; Vec3fa Ng1_o;
return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
}
static bool verify(const size_t id, const Cone& cone, const Ray& ray, bool shouldhit, const float t0, const float t1)
{
float eps = 0.001f;
BBox1f t; bool hit;
hit = cone.intersect(ray.org,ray.dir,t);
bool failed = hit != shouldhit;
if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : (t0 == -1E6) ? t.lower > -1E6f : abs(t0-t.lower) > eps;
if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : (t1 == +1E6) ? t.upper < +1E6f : abs(t1-t.upper) > eps;
if (!failed) return true;
embree_cout << "Cone test " << id << " failed: cone = " << cone << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl;
return false;
}
/* verify cone class */
static bool verify()
{
bool passed = true;
const Cone cone0(Vec3fa(0.0f,0.0f,0.0f),0.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f);
passed &= verify(0,cone0,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,3.0f,pos_inf);
passed &= verify(1,cone0,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f);
passed &= verify(2,cone0,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),false,0.0f,0.0f);
passed &= verify(3,cone0,Ray(Vec3fa(+1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,3.0f);
passed &= verify(4,cone0,Ray(Vec3fa(-1.0f,0.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,1.0f,pos_inf);
passed &= verify(5,cone0,Ray(Vec3fa(+1.0f,0.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f);
passed &= verify(6,cone0,Ray(Vec3fa(+0.0f,0.0f,1.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,1.0f);
passed &= verify(7,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
passed &= verify(8,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(+1.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.5f,+1E6);
passed &= verify(9,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,+1.0f,+0.0f),0.0f,float(inf)),true,-1E6,-0.5f);
const Cone cone1(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),0.0f);
passed &= verify(10,cone1,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,2.0f);
passed &= verify(11,cone1,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,0.0f,4.0f);
const Cone cylinder(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f);
passed &= verify(12,cylinder,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
passed &= verify(13,cylinder,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
passed &= verify(14,cylinder,Ray(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
passed &= verify(15,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
passed &= verify(16,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
passed &= verify(17,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
passed &= verify(18,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
return passed;
}
/*! output operator */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cone& c) {
return cout << "Cone { p0 = " << c.p0 << ", r0 = " << c.r0 << ", p1 = " << c.p1 << ", r1 = " << c.r1 << "}";
}
};
template<int N>
struct ConeN
{
typedef Vec3<vfloat<N>> Vec3vfN;
const Vec3vfN p0; //!< start position of cone
const Vec3vfN p1; //!< end position of cone
const vfloat<N> r0; //!< start radius of cone
const vfloat<N> r1; //!< end radius of cone
__forceinline ConeN(const Vec3vfN& p0, const vfloat<N>& r0, const Vec3vfN& p1, const vfloat<N>& r1)
: p0(p0), p1(p1), r0(r0), r1(r1) {}
__forceinline Cone operator[] (const size_t i) const
{
assert(i<N);
return Cone(Vec3fa(p0.x[i],p0.y[i],p0.z[i]),r0[i],Vec3fa(p1.x[i],p1.y[i],p1.z[i]),r1[i]);
}
__forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir,
BBox<vfloat<N>>& t_o,
vfloat<N>& u0_o, Vec3vfN& Ng0_o,
vfloat<N>& u1_o, Vec3vfN& Ng1_o) const
{
/* calculate quadratic equation to solve */
const Vec3vfN v0 = p0-Vec3vfN(org);
const Vec3vfN v1 = p1-Vec3vfN(org);
const vfloat<N> rl = rcp_length(v1-v0);
const Vec3vfN P0 = v0, dP = (v1-v0)*rl;
const vfloat<N> dr = (r1-r0)*rl;
const Vec3vfN O = -P0, dO = dir;
const vfloat<N> dOdO = dot(dO,dO);
const vfloat<N> OdO = dot(dO,O);
const vfloat<N> OO = dot(O,O);
const vfloat<N> dOz = dot(dP,dO);
const vfloat<N> Oz = dot(dP,O);
const vfloat<N> R = r0 + Oz*dr;
const vfloat<N> A = dOdO - sqr(dOz) * (vfloat<N>(1.0f)+sqr(dr));
const vfloat<N> B = 2.0f * (OdO - dOz*(Oz + R*dr));
const vfloat<N> C = OO - (sqr(Oz) + sqr(R));
/* we miss the cone if determinant is smaller than zero */
const vfloat<N> D = B*B - 4.0f*A*C;
vbool<N> valid = D >= 0.0f;
if (none(valid)) return valid;
/* special case for rays that are "parallel" to the cone */
const vfloat<N> eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
const vbool<N> validt = valid & (abs(A) < eps);
const vbool<N> validf = valid & !(abs(A) < eps);
if (unlikely(any(validt)))
{
const vboolx validtt = validt & (abs(dr) < 16.0f*float(ulp));
const vboolx validtf = validt & (abs(dr) >= 16.0f*float(ulp));
/* cylinder case */
if (unlikely(any(validtt)))
{
t_o.lower = select(validtt, select(C <= 0.0f, vfloat<N>(neg_inf), vfloat<N>(pos_inf)), t_o.lower);
t_o.upper = select(validtt, select(C <= 0.0f, vfloat<N>(pos_inf), vfloat<N>(neg_inf)), t_o.upper);
valid &= !validtt | C <= 0.0f;
}
/* cone case */
if (any(validtf))
{
/* if we hit the negative cone there cannot be a hit */
const vfloat<N> t = -C/B;
const vfloat<N> z0 = Oz+t*dOz;
const vfloat<N> z0r = r0+z0*dr;
valid &= !validtf | z0r >= 0.0f;
/* test if we start inside or outside the cone */
t_o.lower = select(validtf, select(dOz*dr > 0.0f, t, vfloat<N>(neg_inf)), t_o.lower);
t_o.upper = select(validtf, select(dOz*dr > 0.0f, vfloat<N>(pos_inf), t), t_o.upper);
}
}
/* standard case for "non-parallel" rays */
if (likely(any(validf)))
{
const vfloat<N> Q = sqrt(D);
const vfloat<N> rcp_2A = 0.5f*rcp(A);
t_o.lower = select(validf, (-B-Q)*rcp_2A, t_o.lower);
t_o.upper = select(validf, (-B+Q)*rcp_2A, t_o.upper);
/* standard case where both hits are on same cone */
const vbool<N> validft = validf & A>0.0f;
const vbool<N> validff = validf & !(A>0.0f);
if (any(validft)) {
const vfloat<N> z0 = Oz+t_o.lower*dOz;
const vfloat<N> z0r = r0+z0*dr;
valid &= !validft | z0r >= 0.0f;
}
/* special case where the hits are on the positive and negative cone */
if (any(validff)) {
/* depending on the ray direction and the open direction
* of the cone we have a hit from inside or outside the
* cone */
t_o.lower = select(validff, select(dOz*dr > 0.0f, t_o.lower, float(neg_inf)), t_o.lower);
t_o.upper = select(validff, select(dOz*dr > 0.0f, float(pos_inf), t_o.upper), t_o.upper);
}
}
/* calculates u and Ng for near hit */
{
u0_o = (Oz+t_o.lower*dOz)*rl;
const Vec3vfN Pr = t_o.lower*Vec3vfN(dir);
const Vec3vfN Pl = v0 + u0_o*(v1-v0);
const Vec3vfN R = normalize(Pr-Pl);
const Vec3vfN U = (p1-p0)+(r1-r0)*R;
const Vec3vfN V = cross(p1-p0,R);
Ng0_o = cross(V,U);
}
/* calculates u and Ng for far hit */
{
u1_o = (Oz+t_o.upper*dOz)*rl;
const Vec3vfN Pr = t_o.lower*Vec3vfN(dir);
const Vec3vfN Pl = v0 + u1_o*(v1-v0);
const Vec3vfN R = normalize(Pr-Pl);
const Vec3vfN U = (p1-p0)+(r1-r0)*R;
const Vec3vfN V = cross(p1-p0,R);
Ng1_o = cross(V,U);
}
return valid;
}
__forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const
{
vfloat<N> u0_o; Vec3vfN Ng0_o; vfloat<N> u1_o; Vec3vfN Ng1_o;
return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
}
};
}
}

View File

@@ -0,0 +1,209 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
namespace isa
{
namespace __coneline_internal
{
template<int M, typename Epilog, typename ray_tfar_func>
static __forceinline bool intersectCone(const vbool<M>& valid_i,
const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir,
const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar,
const Vec4vf<M>& v0, const Vec4vf<M>& v1,
const vbool<M>& cL, const vbool<M>& cR,
const Epilog& epilog)
{
vbool<M> valid = valid_i;
/* move ray origin closer to make calculations numerically stable */
const vfloat<M> dOdO = sqr(ray_dir);
const vfloat<M> rcp_dOdO = rcp(dOdO);
const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz());
const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO;
const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir;
const Vec3vf<M> dP = v1.xyz() - v0.xyz();
const Vec3vf<M> p0 = ray_org - v0.xyz();
const Vec3vf<M> p1 = ray_org - v1.xyz();
const vfloat<M> dPdP = sqr(dP);
const vfloat<M> dP0 = dot(p0,dP);
const vfloat<M> dP1 = dot(p1,dP);
const vfloat<M> dOdP = dot(ray_dir,dP);
// intersect cone body
const vfloat<M> dr = v0.w - v1.w;
const vfloat<M> hy = dPdP + sqr(dr);
const vfloat<M> dO0 = dot(ray_dir,p0);
const vfloat<M> OO = sqr(p0);
const vfloat<M> dPdP2 = sqr(dPdP);
const vfloat<M> dPdPr0 = dPdP*v0.w;
const vfloat<M> A = dPdP2 - sqr(dOdP)*hy;
const vfloat<M> B = dPdP2*dO0 - dP0*dOdP*hy + dPdPr0*(dr*dOdP);
const vfloat<M> C = dPdP2*OO - sqr(dP0)*hy + dPdPr0*(2.0f*dr*dP0 - dPdPr0);
const vfloat<M> D = B*B - A*C;
valid &= D >= 0.0f;
if (unlikely(none(valid))) {
return false;
}
/* standard case for "non-parallel" rays */
const vfloat<M> Q = sqrt(D);
const vfloat<M> rcp_A = rcp(A);
/* special case for rays that are "parallel" to the cone - assume miss */
const vbool<M> isParallel = abs(A) <= min_rcp_input;
vfloat<M> t_cone_lower = select (isParallel, neg_inf, (-B-Q)*rcp_A);
vfloat<M> t_cone_upper = select (isParallel, pos_inf, (-B+Q)*rcp_A);
const vfloat<M> y_lower = dP0 + t_cone_lower*dOdP;
const vfloat<M> y_upper = dP0 + t_cone_upper*dOdP;
t_cone_lower = select(valid & y_lower > 0.0f & y_lower < dPdP, t_cone_lower, pos_inf);
t_cone_upper = select(valid & y_upper > 0.0f & y_upper < dPdP, t_cone_upper, neg_inf);
const vbool<M> hitDisk0 = valid & cL;
const vbool<M> hitDisk1 = valid & cR;
const vfloat<M> rcp_dOdP = rcp(dOdP);
const vfloat<M> t_disk0 = select (hitDisk0, select (sqr(p0*dOdP-ray_dir*dP0)<(sqr(v0.w)*sqr(dOdP)), -dP0*rcp_dOdP, pos_inf), pos_inf);
const vfloat<M> t_disk1 = select (hitDisk1, select (sqr(p1*dOdP-ray_dir*dP1)<(sqr(v1.w)*sqr(dOdP)), -dP1*rcp_dOdP, pos_inf), pos_inf);
const vfloat<M> t_disk_lower = min(t_disk0, t_disk1);
const vfloat<M> t_disk_upper = max(t_disk0, t_disk1);
const vfloat<M> t_lower = min(t_cone_lower, t_disk_lower);
const vfloat<M> t_upper = max(t_cone_upper, select(t_lower==t_disk_lower,
select(t_disk_upper==vfloat<M>(pos_inf),neg_inf,t_disk_upper),
select(t_disk_lower==vfloat<M>(pos_inf),neg_inf,t_disk_lower)));
const vbool<M> valid_lower = valid & ray_tnear <= dt+t_lower & dt+t_lower <= ray_tfar() & t_lower != vfloat<M>(pos_inf);
const vbool<M> valid_upper = valid & ray_tnear <= dt+t_upper & dt+t_upper <= ray_tfar() & t_upper != vfloat<M>(neg_inf);
const vbool<M> valid_first = valid_lower | valid_upper;
if (unlikely(none(valid_first)))
return false;
const vfloat<M> t_first = select(valid_lower, t_lower, t_upper);
const vfloat<M> y_first = select(valid_lower, y_lower, y_upper);
const vfloat<M> rcp_dPdP = rcp(dPdP);
const Vec3vf<M> dP2drr0dP = dPdP*dr*v0.w*dP;
const Vec3vf<M> dPhy = dP*hy;
const vbool<M> cone_hit_first = valid & (t_first == t_cone_lower | t_first == t_cone_upper);
const vbool<M> disk0_hit_first = valid & (t_first == t_disk0);
const Vec3vf<M> Ng_first = select(cone_hit_first, dPdP2*(p0+t_first*ray_dir)+dP2drr0dP-dPhy*y_first, select(disk0_hit_first, -dP, dP));
const vfloat<M> u_first = select(cone_hit_first, y_first*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
/* invoke intersection filter for first hit */
RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first);
const bool is_hit_first = epilog(valid_first, hit);
/* check for possible second hits before potentially accepted hit */
const vfloat<M> t_second = t_upper;
const vfloat<M> y_second = y_upper;
const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_upper <= ray_tfar());
if (unlikely(none(valid_second)))
return is_hit_first;
/* invoke intersection filter for second hit */
const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper;
const vbool<M> disk0_hit_second = t_second == t_disk0;
const Vec3vf<M> Ng_second = select(cone_hit_second, dPdP2*(p0+t_second*ray_dir)+dP2drr0dP-dPhy*y_second, select(disk0_hit_second, -dP, dP));
const vfloat<M> u_second = select(cone_hit_second, y_second*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second);
const bool is_hit_second = epilog(valid_second, hit);
return is_hit_first | is_hit_second;
}
}
template<int M>
struct ConeLineIntersectorHitM
{
__forceinline ConeLineIntersectorHitM() {}
__forceinline ConeLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
: vu(u), vv(v), vt(t), vNg(Ng) {}
__forceinline void finalize() {}
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
public:
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
};
template<int M>
struct ConeCurveIntersector1
{
typedef CurvePrecalculations1 Precalculations;
struct ray_tfar {
Ray& ray;
__forceinline ray_tfar(Ray& ray) : ray(ray) {}
__forceinline vfloat<M> operator() () const { return ray.tfar; };
};
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
Ray& ray,
RayQueryContext* context,
const LineSegments* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
const vbool<M>& cL, const vbool<M>& cR,
const Epilog& epilog)
{
const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
const vfloat<M> ray_tnear(ray.tnear());
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i);
return __coneline_internal::intersectCone<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,cL,cR,epilog);
}
};
template<int M, int K>
struct ConeCurveIntersectorK
{
typedef CurvePrecalculationsK<K> Precalculations;
struct ray_tfar {
RayK<K>& ray;
size_t k;
__forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {}
__forceinline vfloat<M> operator() () const { return ray.tfar[k]; };
};
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
RayK<K>& ray, size_t k,
RayQueryContext* context,
const LineSegments* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
const vbool<M>& cL, const vbool<M>& cR,
const Epilog& epilog)
{
const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
const vfloat<M> ray_tnear = ray.tnear()[k];
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i);
return __coneline_internal::intersectCone<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,cL,cR,epilog);
}
};
}
}

View File

@@ -0,0 +1,141 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "coneline_intersector.h"
#include "intersector_epilog.h"
namespace embree
{
namespace isa
{
template<int M, bool filter>
struct ConeCurveMiIntersector1
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom);
const vbool<M> valid = line.valid();
ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom);
const vbool<M> valid = line.valid();
return ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
return false;
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
}
};
template<int M, bool filter>
struct ConeCurveMiMBIntersector1
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom,ray.time());
const vbool<M> valid = line.valid();
ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom,ray.time());
const vbool<M> valid = line.valid();
return ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
return false;
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
}
};
template<int M, int K, bool filter>
struct ConeCurveMiIntersectorK
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom);
const vbool<M> valid = line.valid();
ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom);
const vbool<M> valid = line.valid();
return ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
};
template<int M, int K, bool filter>
struct ConeCurveMiMBIntersectorK
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom,ray.time()[k]);
const vbool<M> valid = line.valid();
ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1;
vbool<M> cL,cR;
line.gather(v0,v1,cL,cR,geom,ray.time()[k]);
const vbool<M> valid = line.valid();
return ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
};
}
}

View File

@@ -0,0 +1,222 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
template<int M>
struct CurveNi
{
struct Type : public PrimitiveType {
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* Returns maximum number of stored primitives */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
static __forceinline size_t bytes(size_t N)
{
const size_t f = N/M, r = N%M;
static_assert(sizeof(CurveNi) == 22+25*M, "internal data layout issue");
return f*sizeof(CurveNi) + (r!=0)*(22 + 25*r);
}
public:
/*! Default constructor. */
__forceinline CurveNi () {}
/*! fill curve from curve list */
__forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene)
{
size_t end = min(begin+M,_end);
N = (unsigned char)(end-begin);
const unsigned int geomID0 = prims[begin].geomID();
this->geomID(N) = geomID0;
ty = (unsigned char) scene->get(geomID0)->getType();
/* encode all primitives */
BBox3fa bounds = empty;
for (size_t i=0; i<N; i++)
{
const PrimRef& prim = prims[begin+i];
const unsigned int geomID = prim.geomID(); assert(geomID == geomID0);
const unsigned int primID = prim.primID();
bounds.extend(scene->get(geomID)->vbounds(primID));
}
/* calculate offset and scale */
Vec3fa loffset = bounds.lower;
float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f)));
if (bounds.size() == Vec3fa(zero)) lscale = 0.0f;
*this->offset(N) = loffset;
*this->scale(N) = lscale;
/* encode all primitives */
for (size_t i=0; i<M && begin<end; i++, begin++)
{
const PrimRef& prim = prims[begin];
const unsigned int geomID = prim.geomID();
const unsigned int primID = prim.primID();
const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpace(primID);
const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz));
const BBox3fa bounds = scene->get(geomID)->vbounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID);
bounds_vx_x(N)[i] = (char) space3.vx.x;
bounds_vx_y(N)[i] = (char) space3.vx.y;
bounds_vx_z(N)[i] = (char) space3.vx.z;
bounds_vx_lower(N)[i] = (short) clamp(floor(bounds.lower.x),-32767.0f,32767.0f);
bounds_vx_upper(N)[i] = (short) clamp(ceil (bounds.upper.x),-32767.0f,32767.0f);
assert(-32767.0f <= floor(bounds.lower.x) && floor(bounds.lower.x) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.upper.x) && ceil (bounds.upper.x) <= 32767.0f);
bounds_vy_x(N)[i] = (char) space3.vy.x;
bounds_vy_y(N)[i] = (char) space3.vy.y;
bounds_vy_z(N)[i] = (char) space3.vy.z;
bounds_vy_lower(N)[i] = (short) clamp(floor(bounds.lower.y),-32767.0f,32767.0f);
bounds_vy_upper(N)[i] = (short) clamp(ceil (bounds.upper.y),-32767.0f,32767.0f);
assert(-32767.0f <= floor(bounds.lower.y) && floor(bounds.lower.y) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.upper.y) && ceil (bounds.upper.y) <= 32767.0f);
bounds_vz_x(N)[i] = (char) space3.vz.x;
bounds_vz_y(N)[i] = (char) space3.vz.y;
bounds_vz_z(N)[i] = (char) space3.vz.z;
bounds_vz_lower(N)[i] = (short) clamp(floor(bounds.lower.z),-32767.0f,32767.0f);
bounds_vz_upper(N)[i] = (short) clamp(ceil (bounds.upper.z),-32767.0f,32767.0f);
assert(-32767.0f <= floor(bounds.lower.z) && floor(bounds.lower.z) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.upper.z) && ceil (bounds.upper.z) <= 32767.0f);
this->primID(N)[i] = primID;
}
}
template<typename BVH, typename Allocator>
__forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
{
size_t start = set.begin();
size_t items = CurveNi::blocks(set.size());
size_t numbytes = CurveNi::bytes(set.size());
CurveNi* accel = (CurveNi*) alloc.malloc1(numbytes,BVH::byteAlignment);
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return bvh->encodeLeaf((char*)accel,items);
};
public:
// 27.6 - 46 bytes per primitive
unsigned char ty;
unsigned char N;
unsigned char data[4+25*M+16];
/*
struct Layout
{
unsigned int geomID;
unsigned int primID[N];
char bounds_vx_x[N];
char bounds_vx_y[N];
char bounds_vx_z[N];
short bounds_vx_lower[N];
short bounds_vx_upper[N];
char bounds_vy_x[N];
char bounds_vy_y[N];
char bounds_vy_z[N];
short bounds_vy_lower[N];
short bounds_vy_upper[N];
char bounds_vz_x[N];
char bounds_vz_y[N];
char bounds_vz_z[N];
short bounds_vz_lower[N];
short bounds_vz_upper[N];
Vec3f offset;
float scale;
};
*/
__forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((char*)this+2); }
__forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((char*)this+2); }
__forceinline unsigned int* primID(size_t N) { return (unsigned int*)((char*)this+6); }
__forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((char*)this+6); }
__forceinline char* bounds_vx_x(size_t N) { return (char*)((char*)this+6+4*N); }
__forceinline const char* bounds_vx_x(size_t N) const { return (char*)((char*)this+6+4*N); }
__forceinline char* bounds_vx_y(size_t N) { return (char*)((char*)this+6+5*N); }
__forceinline const char* bounds_vx_y(size_t N) const { return (char*)((char*)this+6+5*N); }
__forceinline char* bounds_vx_z(size_t N) { return (char*)((char*)this+6+6*N); }
__forceinline const char* bounds_vx_z(size_t N) const { return (char*)((char*)this+6+6*N); }
__forceinline short* bounds_vx_lower(size_t N) { return (short*)((char*)this+6+7*N); }
__forceinline const short* bounds_vx_lower(size_t N) const { return (short*)((char*)this+6+7*N); }
__forceinline short* bounds_vx_upper(size_t N) { return (short*)((char*)this+6+9*N); }
__forceinline const short* bounds_vx_upper(size_t N) const { return (short*)((char*)this+6+9*N); }
__forceinline char* bounds_vy_x(size_t N) { return (char*)((char*)this+6+11*N); }
__forceinline const char* bounds_vy_x(size_t N) const { return (char*)((char*)this+6+11*N); }
__forceinline char* bounds_vy_y(size_t N) { return (char*)((char*)this+6+12*N); }
__forceinline const char* bounds_vy_y(size_t N) const { return (char*)((char*)this+6+12*N); }
__forceinline char* bounds_vy_z(size_t N) { return (char*)((char*)this+6+13*N); }
__forceinline const char* bounds_vy_z(size_t N) const { return (char*)((char*)this+6+13*N); }
__forceinline short* bounds_vy_lower(size_t N) { return (short*)((char*)this+6+14*N); }
__forceinline const short* bounds_vy_lower(size_t N) const { return (short*)((char*)this+6+14*N); }
__forceinline short* bounds_vy_upper(size_t N) { return (short*)((char*)this+6+16*N); }
__forceinline const short* bounds_vy_upper(size_t N) const { return (short*)((char*)this+6+16*N); }
__forceinline char* bounds_vz_x(size_t N) { return (char*)((char*)this+6+18*N); }
__forceinline const char* bounds_vz_x(size_t N) const { return (char*)((char*)this+6+18*N); }
__forceinline char* bounds_vz_y(size_t N) { return (char*)((char*)this+6+19*N); }
__forceinline const char* bounds_vz_y(size_t N) const { return (char*)((char*)this+6+19*N); }
__forceinline char* bounds_vz_z(size_t N) { return (char*)((char*)this+6+20*N); }
__forceinline const char* bounds_vz_z(size_t N) const { return (char*)((char*)this+6+20*N); }
__forceinline short* bounds_vz_lower(size_t N) { return (short*)((char*)this+6+21*N); }
__forceinline const short* bounds_vz_lower(size_t N) const { return (short*)((char*)this+6+21*N); }
__forceinline short* bounds_vz_upper(size_t N) { return (short*)((char*)this+6+23*N); }
__forceinline const short* bounds_vz_upper(size_t N) const { return (short*)((char*)this+6+23*N); }
__forceinline Vec3f* offset(size_t N) { return (Vec3f*)((char*)this+6+25*N); }
__forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((char*)this+6+25*N); }
__forceinline float* scale(size_t N) { return (float*)((char*)this+6+25*N+12); }
__forceinline const float* scale(size_t N) const { return (float*)((char*)this+6+25*N+12); }
__forceinline char* end(size_t N) { return (char*)this+6+25*N+16; }
__forceinline const char* end(size_t N) const { return (char*)this+6+25*N+16; }
};
template<int M>
typename CurveNi<M>::Type CurveNi<M>::type;
typedef CurveNi<4> Curve4i;
typedef CurveNi<8> Curve8i;
}

View File

@@ -0,0 +1,648 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "curveNi.h"
#include "roundline_intersector.h"
#include "coneline_intersector.h"
#include "curve_intersector_ribbon.h"
#include "curve_intersector_oriented.h"
#include "curve_intersector_sweep.h"
namespace embree
{
namespace isa
{
template<int M>
struct CurveNiIntersector1
{
typedef CurveNi<M> Primitive;
typedef Vec3vf<M> Vec3vfM;
typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
typedef CurvePrecalculations1 Precalculations;
static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o)
{
const size_t N = prim.N;
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
const Vec3fa offset = *prim.offset(N);
const float scale = *prim.scale(N);
#else
const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
const Vec3fa offset = Vec3fa(offset_scale);
const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
#endif
const Vec3fa org1 = (ray.org-offset)*scale;
const Vec3fa dir1 = ray.dir*scale;
const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
const Vec3vfM rcp_dir2 = rcp_safe(dir2);
const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> round_up (1.0f+3.0f*float(ulp));
const vfloat<M> round_down(1.0f-3.0f*float(ulp));
const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()));
const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar));
tNear_o = tNear;
return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
unsigned int vertexID = geom->curve(primID);
Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
unsigned int vertexID = geom->curve(primID);
Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
};
template<int M, int K>
struct CurveNiIntersectorK
{
typedef CurveNi<M> Primitive;
typedef Vec3vf<M> Vec3vfM;
typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o)
{
const size_t N = prim.N;
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
const Vec3fa offset = *prim.offset(N);
const float scale = *prim.scale(N);
#else
const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
const Vec3fa offset = Vec3fa(offset_scale);
const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
#endif
const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
const Vec3fa org1 = (ray_org-offset)*scale;
const Vec3fa dir1 = ray_dir*scale;
const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
const Vec3vfM rcp_dir2 = rcp_safe(dir2);
const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> round_up (1.0f+3.0f*float(ulp));
const vfloat<M> round_down(1.0f-3.0f*float(ulp));
const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k]));
const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k]));
tNear_o = tNear;
return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
unsigned int vertexID = geom->curve(primID);
Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
unsigned int vertexID = geom->curve(primID);
Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
const unsigned int primID1 = prim.primID(N)[i1];
geom->prefetchL1_vertices(geom->curve(primID1));
if (mask1) {
const size_t i2 = bsf(mask1);
const unsigned int primID2 = prim.primID(N)[i2];
geom->prefetchL2_vertices(geom->curve(primID2));
}
}
if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
};
__forceinline void convert_to_bezier(const Geometry::GType gtype,
Vec3ff& v0, Vec3ff& v1, Vec3ff& v2, Vec3ff& v3,
Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3)
{
const Geometry::GType basis = (Geometry::GType)(gtype & Geometry::GTY_BASIS_MASK);
const Geometry::GType stype = (Geometry::GType)(gtype & Geometry::GTY_SUBTYPE_MASK);
if (basis == Geometry::GTY_BASIS_BSPLINE) {
BezierCurveT<Vec3ff> bezier;
convert(BSplineCurveT<Vec3ff>(v0,v1,v2,v3),bezier);
v0 = bezier.v0; v1 = bezier.v1; v2 = bezier.v2; v3 = bezier.v3;
}
else if (basis == Geometry::GTY_BASIS_HERMITE) {
BezierCurveT<Vec3ff> bezier;
convert(HermiteCurveT<Vec3ff>(v0,v1,v2,v3),bezier);
v0 = bezier.v0; v1 = bezier.v1; v2 = bezier.v2; v3 = bezier.v3;
}
else if (basis == Geometry::GTY_BASIS_CATMULL_ROM) {
BezierCurveT<Vec3ff> bezier;
convert(CatmullRomCurveT<Vec3ff>(v0,v1,v2,v3),bezier);
v0 = bezier.v0; v1 = bezier.v1; v2 = bezier.v2; v3 = bezier.v3;
}
if (stype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE)
{
if (basis == Geometry::GTY_BASIS_BSPLINE) {
BezierCurveT<Vec3fa> bezier;
convert(BSplineCurveT<Vec3fa>(n0,n1,n2,n3),bezier);
n0 = bezier.v0; n1 = bezier.v1; n2 = bezier.v2; n3 = bezier.v3;
}
else if (basis == Geometry::GTY_BASIS_HERMITE) {
BezierCurveT<Vec3fa> bezier;
convert(HermiteCurveT<Vec3fa>(n0,n1,n2,n3),bezier);
n0 = bezier.v0; n1 = bezier.v1; n2 = bezier.v2; n3 = bezier.v3;
}
else if (basis == Geometry::GTY_BASIS_CATMULL_ROM) {
BezierCurveT<Vec3fa> bezier;
convert(CatmullRomCurveT<Vec3fa>(n0,n1,n2,n3),bezier);
n0 = bezier.v0; n1 = bezier.v1; n2 = bezier.v2; n3 = bezier.v3;
}
}
}
__forceinline void convert_to_bezier(const Geometry::GType gtype, Vec3ff& v0, Vec3ff& v1, Vec3ff& v2, Vec3ff& v3)
{
const Geometry::GType basis = (Geometry::GType)(gtype & Geometry::GTY_BASIS_MASK);
if (basis == Geometry::GTY_BASIS_BSPLINE) {
BezierCurveT<Vec3ff> bezier;
convert(BSplineCurveT<Vec3ff>(v0,v1,v2,v3),bezier);
v0 = bezier.v0; v1 = bezier.v1; v2 = bezier.v2; v3 = bezier.v3;
}
else if (basis == Geometry::GTY_BASIS_HERMITE) {
BezierCurveT<Vec3ff> bezier;
convert(HermiteCurveT<Vec3ff>(v0,v1,v2,v3),bezier);
v0 = bezier.v0; v1 = bezier.v1; v2 = bezier.v2; v3 = bezier.v3;
}
else if (basis == Geometry::GTY_BASIS_CATMULL_ROM) {
BezierCurveT<Vec3ff> bezier;
convert(CatmullRomCurveT<Vec3ff>(v0,v1,v2,v3),bezier);
v0 = bezier.v0; v1 = bezier.v1; v2 = bezier.v2; v3 = bezier.v3;
}
}
}
}

View File

@@ -0,0 +1,278 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
template<int M>
struct CurveNiMB
{
struct Type : public PrimitiveType {
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* Returns maximum number of stored primitives */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
static __forceinline size_t bytes(size_t N)
{
const size_t f = N/M, r = N%M;
static_assert(sizeof(CurveNiMB) == 6+37*M+24, "internal data layout issue");
return f*sizeof(CurveNiMB) + (r!=0)*(6+37*r+24);
}
public:
/*! Default constructor. */
__forceinline CurveNiMB () {}
/*! fill curve from curve list */
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t _end, Scene* scene, const BBox1f time_range)
{
size_t end = min(begin+M,_end);
N = (unsigned char)(end-begin);
const unsigned int geomID0 = prims[begin].geomID();
this->geomID(N) = geomID0;
ty = (unsigned char) scene->get(geomID0)->getType();
/* encode all primitives */
LBBox3fa lbounds = empty;
for (size_t i=0; i<N; i++)
{
const PrimRefMB& prim = prims[begin+i];
const unsigned int geomID = prim.geomID(); assert(geomID == geomID0);
const unsigned int primID = prim.primID();
lbounds.extend(scene->get(geomID)->vlinearBounds(primID,time_range));
}
BBox3fa bounds = lbounds.bounds();
/* calculate offset and scale */
Vec3fa loffset = bounds.lower;
float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f)));
if (bounds.size() == Vec3fa(zero)) lscale = 0.0f;
*this->offset(N) = loffset;
*this->scale(N) = lscale;
this->time_offset(N) = time_range.lower;
this->time_scale(N) = 1.0f/time_range.size();
/* encode all primitives */
for (size_t i=0; i<M && begin<end; i++, begin++)
{
const PrimRefMB& prim = prims[begin];
const unsigned int geomID = prim.geomID();
const unsigned int primID = prim.primID();
const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpaceMB(primID,time_range);
const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz));
const LBBox3fa bounds = scene->get(geomID)->vlinearBounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID,time_range);
// NOTE: this weird (char) (short) cast works around VS2015 Win32 compiler bug
bounds_vx_x(N)[i] = (char) (short) space3.vx.x;
bounds_vx_y(N)[i] = (char) (short) space3.vx.y;
bounds_vx_z(N)[i] = (char) (short) space3.vx.z;
bounds_vx_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.x),-32767.0f,32767.0f);
bounds_vx_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.x),-32767.0f,32767.0f);
bounds_vx_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.x),-32767.0f,32767.0f);
bounds_vx_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.x),-32767.0f,32767.0f);
assert(-32767.0f <= floor(bounds.bounds0.lower.x) && floor(bounds.bounds0.lower.x) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.bounds0.upper.x) && ceil (bounds.bounds0.upper.x) <= 32767.0f);
assert(-32767.0f <= floor(bounds.bounds1.lower.x) && floor(bounds.bounds1.lower.x) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.bounds1.upper.x) && ceil (bounds.bounds1.upper.x) <= 32767.0f);
bounds_vy_x(N)[i] = (char) (short) space3.vy.x;
bounds_vy_y(N)[i] = (char) (short) space3.vy.y;
bounds_vy_z(N)[i] = (char) (short) space3.vy.z;
bounds_vy_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.y),-32767.0f,32767.0f);
bounds_vy_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.y),-32767.0f,32767.0f);
bounds_vy_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.y),-32767.0f,32767.0f);
bounds_vy_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.y),-32767.0f,32767.0f);
assert(-32767.0f <= floor(bounds.bounds0.lower.y) && floor(bounds.bounds0.lower.y) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.bounds0.upper.y) && ceil (bounds.bounds0.upper.y) <= 32767.0f);
assert(-32767.0f <= floor(bounds.bounds1.lower.y) && floor(bounds.bounds1.lower.y) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.bounds1.upper.y) && ceil (bounds.bounds1.upper.y) <= 32767.0f);
bounds_vz_x(N)[i] = (char) (short) space3.vz.x;
bounds_vz_y(N)[i] = (char) (short) space3.vz.y;
bounds_vz_z(N)[i] = (char) (short) space3.vz.z;
bounds_vz_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.z),-32767.0f,32767.0f);
bounds_vz_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.z),-32767.0f,32767.0f);
bounds_vz_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.z),-32767.0f,32767.0f);
bounds_vz_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.z),-32767.0f,32767.0f);
assert(-32767.0f <= floor(bounds.bounds0.lower.z) && floor(bounds.bounds0.lower.z) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.bounds0.upper.z) && ceil (bounds.bounds0.upper.z) <= 32767.0f);
assert(-32767.0f <= floor(bounds.bounds1.lower.z) && floor(bounds.bounds1.lower.z) <= 32767.0f);
assert(-32767.0f <= ceil (bounds.bounds1.upper.z) && ceil (bounds.bounds1.upper.z) <= 32767.0f);
this->primID(N)[i] = primID;
}
return lbounds;
}
template<typename BVH, typename SetMB, typename Allocator>
__forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
{
size_t start = prims.begin();
size_t end = prims.end();
size_t items = CurveNiMB::blocks(prims.size());
size_t numbytes = CurveNiMB::bytes(prims.size());
CurveNiMB* accel = (CurveNiMB*) alloc.malloc1(numbytes,BVH::byteAlignment);
const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items);
LBBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range));
return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range);
};
public:
// 27.6 - 46 bytes per primitive
unsigned char ty;
unsigned char N;
unsigned char data[4+37*M+24];
/*
struct Layout
{
unsigned int geomID;
unsigned int primID[N];
char bounds_vx_x[N];
char bounds_vx_y[N];
char bounds_vx_z[N];
short bounds_vx_lower0[N];
short bounds_vx_upper0[N];
short bounds_vx_lower1[N];
short bounds_vx_upper1[N];
char bounds_vy_x[N];
char bounds_vy_y[N];
char bounds_vy_z[N];
short bounds_vy_lower0[N];
short bounds_vy_upper0[N];
short bounds_vy_lower1[N];
short bounds_vy_upper1[N];
char bounds_vz_x[N];
char bounds_vz_y[N];
char bounds_vz_z[N];
short bounds_vz_lower0[N];
short bounds_vz_upper0[N];
short bounds_vz_lower1[N];
short bounds_vz_upper1[N];
Vec3f offset;
float scale;
float time_offset;
float time_scale;
};
*/
__forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((char*)this+2); }
__forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((char*)this+2); }
__forceinline unsigned int* primID(size_t N) { return (unsigned int*)((char*)this+6); }
__forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((char*)this+6); }
__forceinline char* bounds_vx_x(size_t N) { return (char*)((char*)this+6+4*N); }
__forceinline const char* bounds_vx_x(size_t N) const { return (char*)((char*)this+6+4*N); }
__forceinline char* bounds_vx_y(size_t N) { return (char*)((char*)this+6+5*N); }
__forceinline const char* bounds_vx_y(size_t N) const { return (char*)((char*)this+6+5*N); }
__forceinline char* bounds_vx_z(size_t N) { return (char*)((char*)this+6+6*N); }
__forceinline const char* bounds_vx_z(size_t N) const { return (char*)((char*)this+6+6*N); }
__forceinline short* bounds_vx_lower0(size_t N) { return (short*)((char*)this+6+7*N); }
__forceinline const short* bounds_vx_lower0(size_t N) const { return (short*)((char*)this+6+7*N); }
__forceinline short* bounds_vx_upper0(size_t N) { return (short*)((char*)this+6+9*N); }
__forceinline const short* bounds_vx_upper0(size_t N) const { return (short*)((char*)this+6+9*N); }
__forceinline short* bounds_vx_lower1(size_t N) { return (short*)((char*)this+6+11*N); }
__forceinline const short* bounds_vx_lower1(size_t N) const { return (short*)((char*)this+6+11*N); }
__forceinline short* bounds_vx_upper1(size_t N) { return (short*)((char*)this+6+13*N); }
__forceinline const short* bounds_vx_upper1(size_t N) const { return (short*)((char*)this+6+13*N); }
__forceinline char* bounds_vy_x(size_t N) { return (char*)((char*)this+6+15*N); }
__forceinline const char* bounds_vy_x(size_t N) const { return (char*)((char*)this+6+15*N); }
__forceinline char* bounds_vy_y(size_t N) { return (char*)((char*)this+6+16*N); }
__forceinline const char* bounds_vy_y(size_t N) const { return (char*)((char*)this+6+16*N); }
__forceinline char* bounds_vy_z(size_t N) { return (char*)((char*)this+6+17*N); }
__forceinline const char* bounds_vy_z(size_t N) const { return (char*)((char*)this+6+17*N); }
__forceinline short* bounds_vy_lower0(size_t N) { return (short*)((char*)this+6+18*N); }
__forceinline const short* bounds_vy_lower0(size_t N) const { return (short*)((char*)this+6+18*N); }
__forceinline short* bounds_vy_upper0(size_t N) { return (short*)((char*)this+6+20*N); }
__forceinline const short* bounds_vy_upper0(size_t N) const { return (short*)((char*)this+6+20*N); }
__forceinline short* bounds_vy_lower1(size_t N) { return (short*)((char*)this+6+22*N); }
__forceinline const short* bounds_vy_lower1(size_t N) const { return (short*)((char*)this+6+22*N); }
__forceinline short* bounds_vy_upper1(size_t N) { return (short*)((char*)this+6+24*N); }
__forceinline const short* bounds_vy_upper1(size_t N) const { return (short*)((char*)this+6+24*N); }
__forceinline char* bounds_vz_x(size_t N) { return (char*)((char*)this+6+26*N); }
__forceinline const char* bounds_vz_x(size_t N) const { return (char*)((char*)this+6+26*N); }
__forceinline char* bounds_vz_y(size_t N) { return (char*)((char*)this+6+27*N); }
__forceinline const char* bounds_vz_y(size_t N) const { return (char*)((char*)this+6+27*N); }
__forceinline char* bounds_vz_z(size_t N) { return (char*)((char*)this+6+28*N); }
__forceinline const char* bounds_vz_z(size_t N) const { return (char*)((char*)this+6+28*N); }
__forceinline short* bounds_vz_lower0(size_t N) { return (short*)((char*)this+6+29*N); }
__forceinline const short* bounds_vz_lower0(size_t N) const { return (short*)((char*)this+6+29*N); }
__forceinline short* bounds_vz_upper0(size_t N) { return (short*)((char*)this+6+31*N); }
__forceinline const short* bounds_vz_upper0(size_t N) const { return (short*)((char*)this+6+31*N); }
__forceinline short* bounds_vz_lower1(size_t N) { return (short*)((char*)this+6+33*N); }
__forceinline const short* bounds_vz_lower1(size_t N) const { return (short*)((char*)this+6+33*N); }
__forceinline short* bounds_vz_upper1(size_t N) { return (short*)((char*)this+6+35*N); }
__forceinline const short* bounds_vz_upper1(size_t N) const { return (short*)((char*)this+6+35*N); }
__forceinline Vec3f* offset(size_t N) { return (Vec3f*)((char*)this+6+37*N); }
__forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((char*)this+6+37*N); }
__forceinline float* scale(size_t N) { return (float*)((char*)this+6+37*N+12); }
__forceinline const float* scale(size_t N) const { return (float*)((char*)this+6+37*N+12); }
__forceinline float& time_offset(size_t N) { return *(float*)((char*)this+6+37*N+16); }
__forceinline const float& time_offset(size_t N) const { return *(float*)((char*)this+6+37*N+16); }
__forceinline float& time_scale(size_t N) { return *(float*)((char*)this+6+37*N+20); }
__forceinline const float& time_scale(size_t N) const { return *(float*)((char*)this+6+37*N+20); }
__forceinline char* end(size_t N) { return (char*)this+6+37*N+24; }
__forceinline const char* end(size_t N) const { return (char*)this+6+37*N+24; }
};
template<int M>
typename CurveNiMB<M>::Type CurveNiMB<M>::type;
typedef CurveNiMB<4> Curve4iMB;
typedef CurveNiMB<8> Curve8iMB;
}

View File

@@ -0,0 +1,531 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "curveNi_mb.h"
#include "../subdiv/linear_bezier_patch.h"
#include "roundline_intersector.h"
#include "coneline_intersector.h"
#include "curve_intersector_ribbon.h"
#include "curve_intersector_oriented.h"
#include "curve_intersector_sweep.h"
namespace embree
{
namespace isa
{
template<int M>
struct CurveNiMBIntersector1
{
typedef CurveNiMB<M> Primitive;
typedef Vec3vf<M> Vec3vfM;
typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
typedef CurvePrecalculations1 Precalculations;
static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o)
{
const size_t N = prim.N;
#if __SYCL_DEVICE_ONLY__
const Vec3f offset = *prim.offset(N);
const float scale = *prim.scale(N);
#else
const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
const Vec3fa offset = Vec3fa(offset_scale);
const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
#endif
const Vec3fa org1 = (ray.org-offset)*scale;
const Vec3fa dir1 = ray.dir*scale;
const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
const Vec3vfM rcp_dir2 = rcp_safe(dir2);
const vfloat<M> ltime = (ray.time()-prim.time_offset(N))*prim.time_scale(N);
const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N));
const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N));
const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0);
const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N));
const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N));
const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0);
const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N));
const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N));
const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0);
const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N));
const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N));
const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0);
const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N));
const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N));
const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0);
const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N));
const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N));
const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0);
const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> round_up (1.0f+3.0f*float(ulp));
const vfloat<M> round_down(1.0f-3.0f*float(ulp));
const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()));
const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar));
tNear_o = tNear;
return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time());
Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time());
if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time());
Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time());
if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
};
template<int M, int K>
struct CurveNiMBIntersectorK
{
typedef CurveNiMB<M> Primitive;
typedef Vec3vf<M> Vec3vfM;
typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o)
{
const size_t N = prim.N;
#if __SYCL_DEVICE_ONLY__
const Vec3f offset = *prim.offset(N);
const float scale = *prim.scale(N);
#else
const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
const Vec3fa offset = Vec3fa(offset_scale);
const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
#endif
const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
const Vec3fa org1 = (ray_org-offset)*scale;
const Vec3fa dir1 = ray_dir*scale;
const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
const Vec3vfM rcp_dir2 = rcp_safe(dir2);
const vfloat<M> ltime = (ray.time()[k]-prim.time_offset(N))*prim.time_scale(N);
const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N));
const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N));
const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0);
const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N));
const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N));
const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0);
const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N));
const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N));
const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0);
const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N));
const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N));
const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0);
const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N));
const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N));
const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0);
const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N));
const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N));
const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0);
const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
const vfloat<M> round_up (1.0f+3.0f*float(ulp));
const vfloat<M> round_down(1.0f-3.0f*float(ulp));
const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k]));
const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k]));
tNear_o = tNear;
return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]);
Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]);
if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]);
Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]);
if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
template<typename Intersector, typename Epilog>
static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
};
}
}

View File

@@ -0,0 +1,101 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "curveNi.h"
namespace embree
{
template<int M>
struct CurveNv : public CurveNi<M>
{
using CurveNi<M>::N;
struct Type : public PrimitiveType {
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* Returns maximum number of stored primitives */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
static __forceinline size_t bytes(size_t N)
{
const size_t f = N/M, r = N%M;
static_assert(sizeof(CurveNv) == 22+25*M+4*16*M, "internal data layout issue");
return f*sizeof(CurveNv) + (r!=0)*(22 + 25*r + 4*16*r);
}
public:
/*! Default constructor. */
__forceinline CurveNv () {}
/*! fill curve from curve list */
__forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene)
{
size_t end = min(begin+M,_end);
size_t N = end-begin;
/* encode all primitives */
for (size_t i=0; i<N; i++)
{
const PrimRef& prim = prims[begin+i];
const unsigned int geomID = prim.geomID();
const unsigned int primID = prim.primID();
CurveGeometry* mesh = (CurveGeometry*) scene->get(geomID);
const unsigned vtxID = mesh->curve(primID);
Vec3fa::storeu(&this->vertices(i,N)[0],mesh->vertex(vtxID+0));
Vec3fa::storeu(&this->vertices(i,N)[1],mesh->vertex(vtxID+1));
Vec3fa::storeu(&this->vertices(i,N)[2],mesh->vertex(vtxID+2));
Vec3fa::storeu(&this->vertices(i,N)[3],mesh->vertex(vtxID+3));
}
}
template<typename BVH, typename Allocator>
__forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
{
if (set.size() == 0)
return BVH::emptyNode;
/* fall back to CurveNi for oriented curves */
unsigned int geomID = prims[set.begin()].geomID();
if (bvh->scene->get(geomID)->getCurveType() == Geometry::GTY_SUBTYPE_ORIENTED_CURVE) {
return CurveNi<M>::createLeaf(bvh,prims,set,alloc);
}
if (bvh->scene->get(geomID)->getCurveBasis() == Geometry::GTY_BASIS_HERMITE) {
return CurveNi<M>::createLeaf(bvh,prims,set,alloc);
}
size_t start = set.begin();
size_t items = CurveNv::blocks(set.size());
size_t numbytes = CurveNv::bytes(set.size());
CurveNv* accel = (CurveNv*) alloc.malloc1(numbytes,BVH::byteAlignment);
for (size_t i=0; i<items; i++) {
accel[i].CurveNv<M>::fill(prims,start,set.end(),bvh->scene);
accel[i].CurveNi<M>::fill(prims,start,set.end(),bvh->scene);
}
return bvh->encodeLeaf((char*)accel,items);
};
public:
unsigned char data[4*16*M];
__forceinline Vec3fa* vertices(size_t i, size_t N) { return (Vec3fa*)CurveNi<M>::end(N)+4*i; }
__forceinline const Vec3fa* vertices(size_t i, size_t N) const { return (Vec3fa*)CurveNi<M>::end(N)+4*i; }
};
template<int M>
typename CurveNv<M>::Type CurveNv<M>::type;
typedef CurveNv<4> Curve4v;
typedef CurveNv<8> Curve8v;
}

View File

@@ -0,0 +1,181 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "curveNv.h"
#include "curveNi_intersector.h"
namespace embree
{
namespace isa
{
template<int M>
struct CurveNvIntersector1 : public CurveNiIntersector1<M>
{
typedef CurveNv<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
template<typename Intersector, typename Epilog>
static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
prefetchL1(&prim.vertices(i1,N)[0]);
prefetchL1(&prim.vertices(i1,N)[4]);
if (mask1) {
const size_t i2 = bsf(mask1);
prefetchL2(&prim.vertices(i2,N)[0]);
prefetchL2(&prim.vertices(i2,N)[4]);
}
}
Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
prefetchL1(&prim.vertices(i1,N)[0]);
prefetchL1(&prim.vertices(i1,N)[4]);
if (mask1) {
const size_t i2 = bsf(mask1);
prefetchL2(&prim.vertices(i2,N)[0]);
prefetchL2(&prim.vertices(i2,N)[4]);
}
}
if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar));
}
return false;
}
};
template<int M, int K>
struct CurveNvIntersectorK : public CurveNiIntersectorK<M,K>
{
typedef CurveNv<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
template<typename Intersector, typename Epilog>
static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(normal.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
prefetchL1(&prim.vertices(i1,N)[0]);
prefetchL1(&prim.vertices(i1,N)[4]);
if (mask1) {
const size_t i2 = bsf(mask1);
prefetchL2(&prim.vertices(i2,N)[0]);
prefetchL2(&prim.vertices(i2,N)[4]);
}
}
Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
}
template<typename Intersector, typename Epilog>
static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, RayQueryContext* context, const Primitive& prim)
{
vfloat<M> tNear;
vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear);
const size_t N = prim.N;
size_t mask = movemask(valid);
while (mask)
{
const size_t i = bscf(mask);
STAT3(shadow.trav_prims,1,1,1);
const unsigned int geomID = prim.geomID(N);
const unsigned int primID = prim.primID(N)[i];
const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
size_t mask1 = mask;
const size_t i1 = bscf(mask1);
if (mask) {
prefetchL1(&prim.vertices(i1,N)[0]);
prefetchL1(&prim.vertices(i1,N)[4]);
if (mask1) {
const size_t i2 = bsf(mask1);
prefetchL2(&prim.vertices(i2,N)[0]);
prefetchL2(&prim.vertices(i2,N)[4]);
}
}
if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
return true;
mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
}
return false;
}
};
}
}

View File

@@ -0,0 +1,98 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "../subdiv/bezier_curve.h"
#include "../common/primref.h"
#include "bezier_hair_intersector.h"
#include "bezier_ribbon_intersector.h"
#include "bezier_curve_intersector.h"
#include "oriented_curve_intersector.h"
#include "../bvh/node_intersector1.h"
// FIXME: this file seems replicate of curve_intersector_virtual.h
namespace embree
{
namespace isa
{
struct VirtualCurveIntersector1
{
typedef unsigned char Primitive;
typedef CurvePrecalculations1 Precalculations;
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
leafIntersector.intersect<1>(&pre,&ray,context,prim);
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
return leafIntersector.occluded<1>(&pre,&ray,context,prim);
}
};
template<int K>
struct VirtualCurveIntersectorK
{
typedef unsigned char Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
size_t mask = movemask(valid_i);
while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim);
}
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
vbool<K> valid_o = false;
size_t mask = movemask(valid_i);
while (mask) {
size_t k = bscf(mask);
if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim))
set(valid_o, k);
}
return valid_o;
}
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
leafIntersector.intersect<K>(&pre,&ray,k,context,prim);
}
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
return leafIntersector.occluded<K>(&pre,&ray,k,context,prim);
}
};
}
}

View File

@@ -0,0 +1,170 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
namespace isa
{
template<typename NativeCurve3fa, int M>
struct DistanceCurveHit
{
__forceinline DistanceCurveHit() {}
__forceinline DistanceCurveHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
const NativeCurve3fa& curve3D)
: U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {}
__forceinline void finalize()
{
vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
vv = V;
vt = T;
}
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const {
return curve3D.eval_du(vu[i]);
}
public:
vfloat<M> U;
vfloat<M> V;
vfloat<M> T;
int i, N;
NativeCurve3fa curve3D;
public:
vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
};
template<typename NativeCurve3fa>
struct DistanceCurveHit<NativeCurve3fa,1>
{
enum { M = 1 };
__forceinline DistanceCurveHit() {}
__forceinline DistanceCurveHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
const NativeCurve3fa& curve3D)
: U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {}
__forceinline void finalize()
{
vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
vv = V;
vt = T;
}
__forceinline Vec2f uv () const { return Vec2f(vu,vv); }
__forceinline float t () const { return vt; }
__forceinline Vec3fa Ng() const { return curve3D.eval_du(vu); }
public:
vfloat<M> U;
vfloat<M> V;
vfloat<M> T;
int i, N;
NativeCurve3fa curve3D;
public:
vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
};
template<typename NativeCurve3fa, int W = VSIZEX>
struct DistanceCurve1Intersector1
{
using vboolx = vbool<W>;
using vintx = vint<W>;
using vfloatx = vfloat<W>;
using Vec4vfx = Vec4vf<W>;
template<typename Epilog>
__forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
const Epilog& epilog)
{
const int N = geom->tessellationRate;
/* transform control points into ray space */
const NativeCurve3fa curve3Di(v0,v1,v2,v3);
const NativeCurve3fa curve3D = enlargeRadiusToMinWidth(context,geom,ray.org,curve3Di);
const NativeCurve3fa curve2D = curve3D.xfm_pr(pre.ray_space,ray.org);
/* evaluate the bezier curve */
vboolx valid = vfloatx(step) < vfloatx(float(N));
const Vec4vfx p0 = curve2D.template eval0<W>(0,N);
const Vec4vfx p1 = curve2D.template eval1<W>(0,N);
/* approximative intersection with cone */
const Vec4vfx v = p1-p0;
const Vec4vfx w = -p0;
const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
const Vec4vfx p = madd(u,v,p0);
const vfloatx t = p.z*pre.depth_scale;
const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
const vfloatx r = p.w;
const vfloatx r2 = r*r;
valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar));
if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections
/* update hit information */
bool ishit = false;
if (unlikely(any(valid))) {
DistanceCurveHit<NativeCurve3fa,W> hit(valid,u,0.0f,t,0,N,curve3D);
ishit = ishit | epilog(valid,hit);
}
if (unlikely(W < N))
{
/* process SIMD-size many segments per iteration */
for (int i=W; i<N; i+=W)
{
/* evaluate the bezier curve */
vboolx valid = vintx(i)+vintx(step) < vintx(N);
const Vec4vfx p0 = curve2D.template eval0<W>(i,N);
const Vec4vfx p1 = curve2D.template eval1<W>(i,N);
/* approximative intersection with cone */
const Vec4vfx v = p1-p0;
const Vec4vfx w = -p0;
const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
const Vec4vfx p = madd(u,v,p0);
const vfloatx t = p.z*pre.depth_scale;
const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
const vfloatx r = p.w;
const vfloatx r2 = r*r;
valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar));
if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections
/* update hit information */
if (unlikely(any(valid))) {
DistanceCurveHit<NativeCurve3fa,W> hit(valid,u,0.0f,t,i,N,curve3D);
ishit = ishit | epilog(valid,hit);
}
}
}
return ishit;
}
};
}
}

View File

@@ -0,0 +1,425 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "curve_intersector_precalculations.h"
#include "curve_intersector_sweep.h"
#include "../subdiv/linear_bezier_patch.h"
#define DBG(x)
namespace embree
{
namespace isa
{
template<typename Ray, typename Epilog, int N = VSIZEX-1, int V = VSIZEX>
struct TensorLinearCubicBezierSurfaceIntersector
{
const LinearSpace3fa& ray_space;
Ray& ray;
TensorLinearCubicBezierSurface3fa curve3d;
TensorLinearCubicBezierSurface2fa curve2d;
float eps;
const Epilog& epilog;
bool isHit;
__forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog)
: ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false)
{
const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org);
curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R));
const BBox2fa b2 = curve2d.bounds();
eps = 8.0f*float(ulp)*reduce_max(max(abs(b2.lower),abs(b2.upper)));
}
__forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1)
{
if (p1 == p0) {
if (p0 == 0.0f) return Interval1f(u0,u1);
else return Interval1f(empty);
}
const float t = -p0/(p1-p0);
const float tt = lerp(u0,u1,t);
return Interval1f(tt);
}
__forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u)
{
if (sign(p0.lower) != sign(p0.upper)) u.extend(u0);
if (sign(p0.lower) != sign(p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower));
if (sign(p0.upper) != sign(p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper));
if (sign(p1.lower) != sign(p1.upper)) u.extend(u1);
}
__forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve)
{
Interval1f u = empty;
solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u);
solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u);
solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u);
solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u);
solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u);
solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u);
return intersect(u,Interval1f(0.0f,1.0f));
}
__forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve)
{
Interval1f v = empty;
solve_linear(0.0f,1.0f,curve.v0,curve.v1,v);
return intersect(v,Interval1f(0.0f,1.0f));
}
__forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2)
{
BBox2fa bounds = curve2.bounds();
if (bounds.upper.x < 0.0f) return;
if (bounds.upper.y < 0.0f) return;
if (bounds.lower.x > 0.0f) return;
if (bounds.lower.y > 0.0f) return;
if (max(cu.size(),cv.size()) < 1E-4f)
{
const float u = cu.center();
const float v = cv.center();
TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
const float t = curve_z.eval(u,v);
if (ray.tnear() <= t && t <= ray.tfar) {
const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
BezierCurveHit hit(t,u,v,Ng);
isHit |= epilog(hit);
}
return;
}
const Vec2fa dv = curve2.axis_v();
const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
if (!curve0v.hasRoot()) return;
const Interval1f v = bezier_clipping(curve0v);
if (isEmpty(v)) return;
TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v);
cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
const Vec2fa du = curve2.axis_u();
const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(du);
CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v();
int roots = curve0u.maxRoots();
if (roots == 0) return;
if (roots == 1)
{
const Interval1f u = bezier_clipping(curve0u);
if (isEmpty(u)) return;
TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u);
cu = BBox1f(lerp(cu.lower,cu.upper,u.lower),lerp(cu.lower,cu.upper,u.upper));
solve_bezier_clipping(cu,cv,curve2b);
return;
}
TensorLinearCubicBezierSurface2fa curve2l, curve2r;
curve2a.split_u(curve2l,curve2r);
solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l);
solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r);
}
__forceinline bool solve_bezier_clipping()
{
solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d);
return isHit;
}
__forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv)
{
Vec2fa uv(cu.center(),cv.center());
const Vec2fa dfdu = curve2d.eval_du(uv.x,uv.y);
const Vec2fa dfdv = curve2d.eval_dv(uv.x,uv.y);
const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
solve_newton_raphson_loop(cu,cv,uv,dfdu,dfdv,rcp_J);
}
__forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J)
{
Vec2fa uv = uv_in;
for (size_t i=0; i<200; i++)
{
const Vec2fa f = curve2d.eval(uv.x,uv.y);
const Vec2fa duv = rcp_J*f;
uv -= duv;
if (max(abs(f.x),abs(f.y)) < eps)
{
const float u = uv.x;
const float v = uv.y;
if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs
if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs
const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
const float t = curve_z.eval(u,v);
if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs
const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
BezierCurveHit hit(t,u,v,Ng);
isHit |= epilog(hit);
return;
}
}
}
__forceinline bool clip_v(BBox1f& cu, BBox1f& cv)
{
const Vec2fa dv = curve2d.eval_dv(cu.lower,cv.lower);
const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dv).clip(cu,cv);
LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
if (!curve0v.hasRoot()) return false;
Interval1f v = bezier_clipping(curve0v);
if (isEmpty(v)) return false;
v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
return true;
}
__forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv)
{
/* perform bezier clipping in v-direction to get tight v-bounds */
TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(cu,cv);
const Vec2fa dv = curve2.axis_v();
const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
if (unlikely(!curve0v.hasRoot())) return true;
Interval1f v = bezier_clipping(curve0v);
if (unlikely(isEmpty(v))) return true;
v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
curve2 = curve2.clip_v(v);
cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
/* perform one newton raphson iteration */
Vec2fa c(cu.center(),cv.center());
Vec2fa f,dfdu,dfdv; curve2d.eval(c.x,c.y,f,dfdu,dfdv);
const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
const Vec2fa c1 = c - rcp_J*f;
/* calculate bounds of derivatives */
const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds();
const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds();
/* calculate krawczyk test */
LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f),
Interval1f(0.0f), Interval1f(1.0f));
LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x),
Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y));
const LinearSpace2<Vec2f> rcp_J2(rcp_J);
const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2);
const Vec2<Interval1f> x(cu,cv);
const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c)));
/* test if there is no solution */
const Vec2<Interval1f> KK = intersect(K,x);
if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true;
/* exit if convergence cannot get proven, but terminate if we are very small */
if (unlikely(!subset(K,x) && !very_small)) return false;
/* solve using newton raphson iteration of convergence is guaranteed */
solve_newton_raphson_loop(cu,cv,c1,dfdu,dfdv,rcp_J);
return true;
}
__forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv)
{
if (!clip_v(cu,cv)) return;
return solve_newton_raphson(cu,cv);
}
__forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv)
{
unsigned int sptr = 0;
const unsigned int stack_size = 4;
unsigned int mask_stack[stack_size];
BBox1f cu_stack[stack_size];
BBox1f cv_stack[stack_size];
goto entry;
/* terminate if stack is empty */
while (sptr)
{
/* pop from stack */
{
sptr--;
size_t mask = mask_stack[sptr];
cu = cu_stack[sptr];
cv = cv_stack[sptr];
const size_t i = bscf(mask);
mask_stack[sptr] = mask;
if (mask) sptr++; // there are still items on the stack
/* process next element recurse into each hit curve segment */
const float u0 = float(i+0)*(1.0f/(N));
const float u1 = float(i+1)*(1.0f/(N));
const BBox1f cui(lerp(cu.lower,cu.upper,u0),lerp(cu.lower,cu.upper,u1));
cu = cui;
}
#if 0
solve_newton_raphson_no_recursion(cu,cv);
continue;
#else
/* we assume convergence for small u ranges and verify using krawczyk */
if (cu.size() < 1.0f/6.0f) {
const bool very_small = cu.size() < 0.001f || sptr >= stack_size;
if (solve_krawczyk(very_small,cu,cv)) {
continue;
}
}
#endif
entry:
/* split the curve into N segments in u-direction */
unsigned int mask = 0;
for (int i=0; i<N;)
{
int i0 = i;
vbool<V> valid = true;
TensorLinearCubicBezierSurface<Vec2vf<V>> subcurves = curve2d.clip_v(cv).template vsplit_u<V>(valid,cu,i,N);
/* slabs test in u-direction */
Vec2vf<V> ndv = cross(subcurves.axis_v());
BBox<vfloat<V>> boundsv = subcurves.template vxfm<V>(ndv).bounds();
valid &= boundsv.lower <= eps;
valid &= boundsv.upper >= -eps;
if (none(valid)) continue;
/* slabs test in v-direction */
Vec2vf<V> ndu = cross(subcurves.axis_u());
BBox<vfloat<V>> boundsu = subcurves.template vxfm<V>(ndu).bounds();
valid &= boundsu.lower <= eps;
valid &= boundsu.upper >= -eps;
if (none(valid)) continue;
mask |= movemask(valid) << i0;
}
if (!mask) continue;
/* push valid segments to stack */
assert(sptr < stack_size);
mask_stack [sptr] = mask;
cu_stack [sptr] = cu;
cv_stack [sptr] = cv;
sptr++;
}
}
__forceinline bool solve_newton_raphson_main()
{
BBox1f vu(0.0f,1.0f);
BBox1f vv(0.0f,1.0f);
solve_newton_raphson_recursion(vu,vv);
return isHit;
}
};
template<template<typename Ty> class SourceCurve, int N = VSIZEX-1, int V = VSIZEX>
struct OrientedCurve1Intersector1
{
//template<typename Ty> using Curve = SourceCurve<Ty>;
typedef SourceCurve<Vec3ff> SourceCurve3ff;
typedef SourceCurve<Vec3fa> SourceCurve3fa;
__forceinline OrientedCurve1Intersector1() {}
__forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {}
template<typename Ray, typename Epilog>
__forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
const Epilog& epilog) const
{
STAT3(normal.trav_prims,1,1,1);
SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
//return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog,N,V>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
}
template<typename Ray, typename Epilog>
__forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const
{
STAT3(normal.trav_prims,1,1,1);
//return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog,N,V>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
}
};
template<template<typename Ty> class SourceCurve, int K>
struct OrientedCurve1IntersectorK
{
//template<typename Ty> using Curve = SourceCurve<Ty>;
typedef SourceCurve<Vec3ff> SourceCurve3ff;
typedef SourceCurve<Vec3fa> SourceCurve3fa;
struct Ray1
{
__forceinline Ray1(RayK<K>& ray, size_t k)
: org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
Vec3fa org;
Vec3fa dir;
float _tnear;
float& tfar;
__forceinline float& tnear() { return _tnear; }
//__forceinline float& tfar() { return _tfar; }
__forceinline const float& tnear() const { return _tnear; }
//__forceinline const float& tfar() const { return _tfar; }
};
template<typename Epilog>
__forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
const Epilog& epilog)
{
STAT3(normal.trav_prims,1,1,1);
Ray1 ray(vray,k);
SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
//return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
}
template<typename Epilog>
__forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const TensorLinearCubicBezierSurface3fa& curve,
const Epilog& epilog)
{
STAT3(normal.trav_prims,1,1,1);
Ray1 ray(vray,k);
//return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
}
};
}
}

View File

@@ -0,0 +1,49 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "../common/geometry.h"
namespace embree
{
namespace isa
{
struct CurvePrecalculations1
{
float depth_scale;
LinearSpace3fa ray_space;
__forceinline CurvePrecalculations1() {}
__forceinline CurvePrecalculations1(const Ray& ray, const void* ptr)
{
depth_scale = rsqrt(dot(ray.dir,ray.dir));
LinearSpace3fa space = frame(depth_scale*ray.dir);
space.vz *= depth_scale;
ray_space = space.transposed();
}
};
template<int K>
struct CurvePrecalculationsK
{
vfloat<K> depth_scale;
LinearSpace3fa ray_space[K];
__forceinline CurvePrecalculationsK(const vbool<K>& valid, const RayK<K>& ray)
{
size_t mask = movemask(valid);
depth_scale = rsqrt(dot(ray.dir,ray.dir));
while (mask) {
size_t k = bscf(mask);
Vec3fa ray_dir_k = Vec3fa(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
LinearSpace3fa ray_space_k = frame(depth_scale[k]*ray_dir_k);
ray_space_k.vz *= depth_scale[k];
ray_space[k] = ray_space_k.transposed();
}
}
};
}
}

View File

@@ -0,0 +1,225 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "quad_intersector.h"
#include "curve_intersector_precalculations.h"
#define Bezier1Intersector1 RibbonCurve1Intersector1
#define Bezier1IntersectorK RibbonCurve1IntersectorK
namespace embree
{
namespace isa
{
template<typename NativeCurve3ff, int M>
struct RibbonHit
{
__forceinline RibbonHit() {}
__forceinline RibbonHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
const NativeCurve3ff& curve3D)
: U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {}
__forceinline void finalize()
{
vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
vv = V;
vt = T;
}
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const { return curve3D.eval_du(vu[i]); }
__forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); }
__forceinline vfloat<M> t () const { return vt; }
__forceinline Vec3vf<M> Ng() const { return (Vec3vf<M>) curve3D.template veval_du<M>(vu); }
public:
vfloat<M> U;
vfloat<M> V;
vfloat<M> T;
int i, N;
NativeCurve3ff curve3D;
public:
vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
};
/* calculate squared distance of point p0 to line p1->p2 */
template<int M>
__forceinline std::pair<vfloat<M>,vfloat<M>> sqr_point_line_distance(const Vec2vf<M>& p0, const Vec2vf<M>& p1, const Vec2vf<M>& p2)
{
const vfloat<M> num = det(p2-p1,p1-p0);
const vfloat<M> den2 = dot(p2-p1,p2-p1);
return std::make_pair(num*num,den2);
}
/* performs culling against a cylinder */
template<int M>
__forceinline vbool<M> cylinder_culling_test(const Vec2vf<M>& p0, const Vec2vf<M>& p1, const Vec2vf<M>& p2, const vfloat<M>& r)
{
const std::pair<vfloat<M>,vfloat<M>> d = sqr_point_line_distance<M>(p0,p1,p2);
return d.first <= r*r*d.second;
}
template<int M = VSIZEX, typename NativeCurve3ff, typename Epilog>
__forceinline bool intersect_ribbon(const Vec3fa& ray_org, const Vec3fa& ray_dir, const float ray_tnear, const float& ray_tfar,
const LinearSpace3fa& ray_space, const float& depth_scale,
const NativeCurve3ff& curve3D, const int N,
const Epilog& epilog)
{
/* transform control points into ray space */
const NativeCurve3ff curve2D = curve3D.xfm_pr(ray_space,ray_org);
float eps = 4.0f*float(ulp)*reduce_max(max(abs(curve2D.v0),abs(curve2D.v1),abs(curve2D.v2),abs(curve2D.v3)));
int i=0;
bool ishit = false;
#if !defined(__SYCL_DEVICE_ONLY__)
{
/* evaluate the bezier curve */
vbool<M> valid = vfloat<M>(step) < vfloat<M>(float(N));
const Vec4vf<M> p0 = curve2D.template eval0<M>(0,N);
const Vec4vf<M> p1 = curve2D.template eval1<M>(0,N);
valid &= cylinder_culling_test<M>(zero,Vec2vf<M>(p0.x,p0.y),Vec2vf<M>(p1.x,p1.y),max(p0.w,p1.w));
if (any(valid))
{
Vec3vf<M> dp0dt = curve2D.template derivative0<M>(0,N);
Vec3vf<M> dp1dt = curve2D.template derivative1<M>(0,N);
dp0dt = select(reduce_max(abs(dp0dt)) < vfloat<M>(eps),Vec3vf<M>(p1-p0),dp0dt);
dp1dt = select(reduce_max(abs(dp1dt)) < vfloat<M>(eps),Vec3vf<M>(p1-p0),dp1dt);
const Vec3vf<M> n0(dp0dt.y,-dp0dt.x,0.0f);
const Vec3vf<M> n1(dp1dt.y,-dp1dt.x,0.0f);
const Vec3vf<M> nn0 = normalize(n0);
const Vec3vf<M> nn1 = normalize(n1);
const Vec3vf<M> lp0 = madd(p0.w,nn0,Vec3vf<M>(p0));
const Vec3vf<M> lp1 = madd(p1.w,nn1,Vec3vf<M>(p1));
const Vec3vf<M> up0 = nmadd(p0.w,nn0,Vec3vf<M>(p0));
const Vec3vf<M> up1 = nmadd(p1.w,nn1,Vec3vf<M>(p1));
vfloat<M> vu,vv,vt;
vbool<M> valid0 = intersect_quad_backface_culling<M>(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt);
if (any(valid0))
{
/* ignore self intersections */
if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) {
vfloat<M> r = lerp(p0.w, p1.w, vu);
valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale;
}
if (any(valid0))
{
vv = madd(2.0f,vv,vfloat<M>(-1.0f));
RibbonHit<NativeCurve3ff,M> bhit(valid0,vu,vv,vt,0,N,curve3D);
ishit |= epilog(bhit.valid,bhit);
}
}
}
i += M;
}
if (unlikely(i < N))
#endif
{
/* process SIMD-size many segments per iteration */
for (; i<N; i+=M)
{
/* evaluate the bezier curve */
vbool<M> valid = vint<M>(i)+vint<M>(step) < vint<M>(N);
const Vec4vf<M> p0 = curve2D.template eval0<M>(i,N);
const Vec4vf<M> p1 = curve2D.template eval1<M>(i,N);
valid &= cylinder_culling_test<M>(zero,Vec2vf<M>(p0.x,p0.y),Vec2vf<M>(p1.x,p1.y),max(p0.w,p1.w));
if (none(valid)) continue;
Vec3vf<M> dp0dt = curve2D.template derivative0<M>(i,N);
Vec3vf<M> dp1dt = curve2D.template derivative1<M>(i,N);
dp0dt = select(reduce_max(abs(dp0dt)) < vfloat<M>(eps),Vec3vf<M>(p1-p0),dp0dt);
dp1dt = select(reduce_max(abs(dp1dt)) < vfloat<M>(eps),Vec3vf<M>(p1-p0),dp1dt);
const Vec3vf<M> n0(dp0dt.y,-dp0dt.x,0.0f);
const Vec3vf<M> n1(dp1dt.y,-dp1dt.x,0.0f);
const Vec3vf<M> nn0 = normalize(n0);
const Vec3vf<M> nn1 = normalize(n1);
const Vec3vf<M> lp0 = madd(p0.w,nn0,Vec3vf<M>(p0));
const Vec3vf<M> lp1 = madd(p1.w,nn1,Vec3vf<M>(p1));
const Vec3vf<M> up0 = nmadd(p0.w,nn0,Vec3vf<M>(p0));
const Vec3vf<M> up1 = nmadd(p1.w,nn1,Vec3vf<M>(p1));
vfloat<M> vu,vv,vt;
vbool<M> valid0 = intersect_quad_backface_culling<M>(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt);
if (any(valid0))
{
/* ignore self intersections */
if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) {
vfloat<M> r = lerp(p0.w, p1.w, vu);
valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale;
}
if (any(valid0))
{
vv = madd(2.0f,vv,vfloat<M>(-1.0f));
RibbonHit<NativeCurve3ff,M> bhit(valid0,vu,vv,vt,i,N,curve3D);
ishit |= epilog(bhit.valid,bhit);
}
}
}
}
return ishit;
}
template<template<typename Ty> class NativeCurve, int M = VSIZEX>
struct RibbonCurve1Intersector1
{
typedef NativeCurve<Vec3ff> NativeCurve3ff;
template<typename Ray, typename Epilog>
__forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
const Epilog& epilog)
{
const int N = geom->tessellationRate;
NativeCurve3ff curve(v0,v1,v2,v3);
curve = enlargeRadiusToMinWidth(context,geom,ray.org,curve);
return intersect_ribbon<M,NativeCurve3ff>(ray.org,ray.dir,ray.tnear(),ray.tfar,
pre.ray_space,pre.depth_scale,
curve,N,
epilog);
}
};
template<template<typename Ty> class NativeCurve, int K, int M = VSIZEX>
struct RibbonCurve1IntersectorK
{
typedef NativeCurve<Vec3ff> NativeCurve3ff;
template<typename Epilog>
__forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& ray, size_t k,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
const Epilog& epilog)
{
const int N = geom->tessellationRate;
const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
NativeCurve3ff curve(v0,v1,v2,v3);
curve = enlargeRadiusToMinWidth(context,geom,ray_org,curve);
return intersect_ribbon<M,NativeCurve3ff>(ray_org,ray_dir,ray.tnear()[k],ray.tfar[k],
pre.ray_space[k],pre.depth_scale[k],
curve,N,
epilog);
}
};
}
}

View File

@@ -0,0 +1,511 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "cylinder.h"
#include "plane.h"
#include "line_intersector.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
namespace isa
{
static const size_t numJacobianIterations = 5;
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
static const size_t numBezierSubdivisions = 2;
#elif defined(__AVX__)
static const size_t numBezierSubdivisions = 2;
#else
static const size_t numBezierSubdivisions = 3;
#endif
struct BezierCurveHit
{
__forceinline BezierCurveHit() {}
__forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
: t(t), u(u), v(0.0f), Ng(Ng) {}
__forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
: t(t), u(u), v(v), Ng(Ng) {}
__forceinline void finalize() {}
public:
float t;
float u;
float v;
Vec3fa Ng;
};
template<typename NativeCurve3ff, typename Ray, typename Epilog>
__forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
const Epilog& epilog)
{
if (tp.lower[i]+dt > ray.tfar) return false;
Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);
return epilog(hit);
}
template<typename NativeCurve3ff, typename Ray, typename Epilog>
__forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
{
const Vec3fa org = zero;
const Vec3fa dir = ray.dir;
const float length_ray_dir = length(dir);
/* error of curve evaluations is proportional to largest coordinate */
const BBox3ff box = curve.bounds();
const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));
for (size_t i=0; i<numJacobianIterations; i++)
{
const Vec3fa Q = madd(Vec3fa(t),dir,org);
//const Vec3fa dQdu = zero;
const Vec3fa dQdt = dir;
const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here
Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
//const Vec3fa dPdt = zero;
const Vec3fa R = Q-P;
const float len_R = length(R); //reduce_max(abs(R));
const float R_err = max(Q_err,P_err);
const Vec3fa dRdu = /*dQdu*/-dPdu;
const Vec3fa dRdt = dQdt;//-dPdt;
const Vec3fa T = normalize(dPdu);
const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
//const Vec3fa dTdt = zero;
const float cos_err = P_err/length(dPdu);
/* Error estimate for dot(R,T):
dot(R,T) = cos(R,T) |R| |T|
= (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)
= cos(R,T)*|R|*|T|
+- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)
+- cos_error*(|R| + |T|)
+- lower order terms
with cos(R,T) being in [0,1] and |T| = 1 we get:
dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)
*/
const float f = dot(R,T);
const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);
const float dfdu = dot(dRdu,T) + dot(R,dTdu);
const float dfdt = dot(dRdt,T);// + dot(R,dTdt);
const float K = dot(R,R)-sqr(f);
const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);
const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);
const float rsqrt_K = rsqrt(K);
const float g = sqrt(K)-P.w;
const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;
const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;
const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
const Vec2f dut = rcp(J)*Vec2f(f,g);
const Vec2f ut = Vec2f(u,t) - dut;
u = ut.x; t = ut.y;
if (abs(f) < f_err && abs(g) < g_err)
{
t+=dt;
if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs
const Vec3fa R = normalize(Q-P);
const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);
const Vec3fa V = cross(dPdu,R);
BezierCurveHit hit(t,u,cross(V,U));
return epilog(hit);
}
}
return false;
}
#if !defined(__SYCL_DEVICE_ONLY__)
template<typename NativeCurve3ff, typename Ray, typename Epilog>
__forceinline bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, const Epilog& epilog)
{
float u0 = 0.0f;
float u1 = 1.0f;
unsigned int depth = 1;
#if defined(__AVX__)
enum { VSIZEX_ = 8 };
typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
typedef vint8 vintx;
typedef vfloat8 vfloatx;
#else
enum { VSIZEX_ = 4 };
typedef vbool4 vboolx;
typedef vint4 vintx;
typedef vfloat4 vfloatx;
#endif
unsigned int maxDepth = numBezierSubdivisions;
bool found = false;
const Vec3fa org = zero;
const Vec3fa dir = ray.dir;
unsigned int sptr = 0;
const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below
struct StackEntry {
vboolx valid;
vfloatx tlower;
float u0;
float u1;
unsigned int depth;
};
StackEntry stack[stack_size];
goto entry;
/* terminate if stack is empty */
while (sptr)
{
/* pop from stack */
{
sptr--;
vboolx valid = stack[sptr].valid;
const vfloatx tlower = stack[sptr].tlower;
valid &= tlower+dt <= ray.tfar;
if (none(valid)) continue;
u0 = stack[sptr].u0;
u1 = stack[sptr].u1;
depth = stack[sptr].depth;
const size_t i = select_min(valid,tlower); clear(valid,i);
stack[sptr].valid = valid;
if (any(valid)) sptr++; // there are still items on the stack
/* process next segment */
const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
u0 = vu0[i+0];
u1 = vu0[i+1];
}
entry:
/* subdivide curve */
const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));
const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
const Vec4vfx P3 = shift_right_1(P0);
const Vec4vfx dP3du = shift_right_1(dP0du);
const Vec4vfx P1 = P0 + dP0du;
const Vec4vfx P2 = P3 - dP3du;
/* calculate bounding cylinders */
const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));
const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));
const vfloatx maxr12 = sqrt(max(rr1,rr2));
const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp);
const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);
vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
r_outer = one_plus_ulp*r_outer;
r_inner = max(0.0f,one_minus_ulp*r_inner);
const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
vboolx valid = true; clear(valid,vfloatx::size-1);
/* intersect with outer cylinder */
BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
if (none(valid)) continue;
/* intersect with cap-planes */
BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
tp = embree::intersect(tp,tc_outer);
BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
tp = embree::intersect(tp,h0);
BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
tp = embree::intersect(tp,h1);
valid &= tp.lower <= tp.upper;
if (none(valid)) continue;
/* clamp and correct u parameter */
u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));
u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));
u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));
u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));
/* intersect with inner cylinder */
BBox<vfloatx> tc_inner;
vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
/* at the unstable area we subdivide deeper */
const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f);
const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f);
/* subtract the inner interval from the current hit interval */
BBox<vfloatx> tp0, tp1;
subtract(tp,tc_inner,tp0,tp1);
vboolx valid0 = valid & (tp0.lower <= tp0.upper);
vboolx valid1 = valid & (tp1.lower <= tp1.upper);
if (none(valid0 | valid1)) continue;
/* iterate over all first hits front to back */
const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth));
vboolx recursion_valid0 = valid0 & (depth < termDepth0);
valid0 &= depth >= termDepth0;
while (any(valid0))
{
const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
//found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
valid0 &= tp0.lower+dt <= ray.tfar;
}
valid1 &= tp1.lower+dt <= ray.tfar;
/* iterate over all second hits front to back */
const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth));
vboolx recursion_valid1 = valid1 & (depth < termDepth1);
valid1 &= depth >= termDepth1;
while (any(valid1))
{
const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
//found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
valid1 &= tp1.lower+dt <= ray.tfar;
}
/* push valid segments to stack */
recursion_valid0 &= tp0.lower+dt <= ray.tfar;
recursion_valid1 &= tp1.lower+dt <= ray.tfar;
const vboolx recursion_valid = recursion_valid0 | recursion_valid1;
if (any(recursion_valid))
{
assert(sptr < stack_size);
stack[sptr].valid = recursion_valid;
stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);
stack[sptr].u0 = u0;
stack[sptr].u1 = u1;
stack[sptr].depth = depth+1;
sptr++;
}
}
return found;
}
#else
template<typename NativeCurve3ff, typename Ray, typename Epilog>
__forceinline bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, const Epilog& epilog)
{
const Vec3fa org = zero;
const Vec3fa dir = ray.dir;
const unsigned int max_depth = 7;
bool found = false;
struct ShortStack
{
/* pushes both children */
__forceinline void push() {
depth++;
}
/* pops next node */
__forceinline void pop() {
short_stack += (1<<(31-depth));
depth = 31-bsf(short_stack);
}
unsigned int depth = 0;
unsigned int short_stack = 0;
};
ShortStack stack;
do
{
const float u0 = (stack.short_stack+0*(1<<(31-stack.depth)))/float(0x80000000);
const float u1 = (stack.short_stack+1*(1<<(31-stack.depth)))/float(0x80000000);
/* subdivide bezier curve */
Vec3ff P0, dP0du; curve.eval(u0,P0,dP0du); dP0du = dP0du * (u1-u0);
Vec3ff P3, dP3du; curve.eval(u1,P3,dP3du); dP3du = dP3du * (u1-u0);
const Vec3ff P1 = P0 + dP0du*(1.0f/3.0f);
const Vec3ff P2 = P3 - dP3du*(1.0f/3.0f);
/* check if curve is well behaved, by checking deviation of tangents from straight line */
const Vec3ff W = Vec3ff(P3-P0,0.0f);
const Vec3ff dQ0 = abs(3.0f*(P1-P0) - W);
const Vec3ff dQ1 = abs(3.0f*(P2-P1) - W);
const Vec3ff dQ2 = abs(3.0f*(P3-P2) - W);
const Vec3ff max_dQ = max(dQ0,dQ1,dQ2);
const float m = max(max_dQ.x,max_dQ.y,max_dQ.z); //,max_dQ.w);
const float l = length(Vec3f(W));
const bool well_behaved = m < 0.2f*l;
if (!well_behaved && stack.depth < max_depth) {
stack.push();
continue;
}
/* calculate bounding cylinders */
const float rr1 = sqr_point_to_line_distance(Vec3f(dP0du),Vec3f(P3-P0));
const float rr2 = sqr_point_to_line_distance(Vec3f(dP3du),Vec3f(P3-P0));
const float maxr12 = sqrt(max(rr1,rr2));
const float one_plus_ulp = 1.0f+2.0f*float(ulp);
const float one_minus_ulp = 1.0f-2.0f*float(ulp);
float r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
float r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
r_outer = one_plus_ulp*r_outer;
r_inner = max(0.0f,one_minus_ulp*r_inner);
const Cylinder cylinder_outer(Vec3f(P0),Vec3f(P3),r_outer);
const Cylinder cylinder_inner(Vec3f(P0),Vec3f(P3),r_inner);
/* intersect with outer cylinder */
BBox<float> tc_outer; float u_outer0; Vec3fa Ng_outer0; float u_outer1; Vec3fa Ng_outer1;
if (!cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1))
{
stack.pop();
continue;
}
/* intersect with cap-planes */
BBox<float> tp(ray.tnear()-dt,ray.tfar-dt);
tp = embree::intersect(tp,tc_outer);
BBox<float> h0 = HalfPlane(Vec3f(P0),+Vec3f(dP0du)).intersect(org,dir);
tp = embree::intersect(tp,h0);
BBox<float> h1 = HalfPlane(Vec3f(P3),-Vec3f(dP3du)).intersect(org,dir);
tp = embree::intersect(tp,h1);
if (tp.lower > tp.upper)
{
stack.pop();
continue;
}
bool valid = true;
/* clamp and correct u parameter */
u_outer0 = clamp(u_outer0,float(0.0f),float(1.0f));
u_outer1 = clamp(u_outer1,float(0.0f),float(1.0f));
u_outer0 = lerp(u0,u1,u_outer0);
u_outer1 = lerp(u0,u1,u_outer1);
/* intersect with inner cylinder */
BBox<float> tc_inner;
float u_inner0 = zero; Vec3fa Ng_inner0 = zero; float u_inner1 = zero; Vec3fa Ng_inner1 = zero;
const bool valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
/* subtract the inner interval from the current hit interval */
BBox<float> tp0, tp1;
subtract(tp,tc_inner,tp0,tp1);
bool valid0 = valid & (tp0.lower <= tp0.upper);
bool valid1 = valid & (tp1.lower <= tp1.upper);
if (!(valid0 | valid1))
{
stack.pop();
continue;
}
/* at the unstable area we subdivide deeper */
const bool unstable0 = valid0 && ((!valid_inner) | (abs(dot(Vec3fa(ray.dir),Ng_inner0)) < 0.3f));
const bool unstable1 = valid1 && ((!valid_inner) | (abs(dot(Vec3fa(ray.dir),Ng_inner1)) < 0.3f));
if ((unstable0 | unstable1) && (stack.depth < max_depth)) {
stack.push();
continue;
}
if (valid0)
found |= intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0,tp0.lower,epilog);
/* the far hit cannot be closer, thus skip if we hit entry already */
valid1 &= tp1.lower+dt <= ray.tfar;
/* iterate over second hit */
if (valid1)
found |= intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1,tp1.upper,epilog);
stack.pop();
} while (stack.short_stack != 0x80000000);
return found;
}
#endif
template<template<typename Ty> class NativeCurve>
struct SweepCurve1Intersector1
{
typedef NativeCurve<Vec3ff> NativeCurve3ff;
template<typename Ray, typename Epilog>
__forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
const Epilog& epilog)
{
STAT3(normal.trav_prims,1,1,1);
/* move ray closer to make intersection stable */
NativeCurve3ff curve0(v0,v1,v2,v3);
curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
const NativeCurve3ff curve1 = curve0-ref;
return intersect_bezier_recursive_jacobian(ray,dt,curve1,epilog);
}
};
template<template<typename Ty> class NativeCurve, int K>
struct SweepCurve1IntersectorK
{
typedef NativeCurve<Vec3ff> NativeCurve3ff;
struct Ray1
{
__forceinline Ray1(RayK<K>& ray, size_t k)
: org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
Vec3fa org;
Vec3fa dir;
float _tnear;
float& tfar;
__forceinline float& tnear() { return _tnear; }
//__forceinline float& tfar() { return _tfar; }
__forceinline const float& tnear() const { return _tnear; }
//__forceinline const float& tfar() const { return _tfar; }
};
template<typename Epilog>
__forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
RayQueryContext* context,
const CurveGeometry* geom, const unsigned int primID,
const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
const Epilog& epilog)
{
STAT3(normal.trav_prims,1,1,1);
Ray1 ray(vray,k);
/* move ray closer to make intersection stable */
NativeCurve3ff curve0(v0,v1,v2,v3);
curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
const NativeCurve3ff curve1 = curve0-ref;
return intersect_bezier_recursive_jacobian(ray,dt,curve1,epilog);
}
};
}
}

View File

@@ -0,0 +1,671 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "../subdiv/bezier_curve.h"
#include "../builders/primref.h"
#include "curve_intersector_precalculations.h"
#include "../bvh/node_intersector1.h"
#include "../bvh/node_intersector_packet.h"
#include "intersector_epilog.h"
#include "../subdiv/bezier_curve.h"
#include "../subdiv/bspline_curve.h"
#include "../subdiv/hermite_curve.h"
#include "../subdiv/catmullrom_curve.h"
#include "spherei_intersector.h"
#include "disci_intersector.h"
#include "linei_intersector.h"
#include "roundlinei_intersector.h"
#include "conelinei_intersector.h"
#include "curveNi_intersector.h"
#include "curveNv_intersector.h"
#include "curveNi_mb_intersector.h"
#include "curve_intersector_distance.h"
#include "curve_intersector_ribbon.h"
#include "curve_intersector_oriented.h"
#include "curve_intersector_sweep.h"
namespace embree
{
struct VirtualCurveIntersector
{
typedef void (*Intersect1Ty)(void* pre, void* ray, RayQueryContext* context, const void* primitive);
typedef bool (*Occluded1Ty )(void* pre, void* ray, RayQueryContext* context, const void* primitive);
typedef void (*Intersect4Ty)(void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
typedef bool (*Occluded4Ty) (void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
typedef void (*Intersect8Ty)(void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
typedef bool (*Occluded8Ty) (void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
typedef void (*Intersect16Ty)(void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
typedef bool (*Occluded16Ty) (void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
public:
struct Intersectors
{
Intersectors() {} // WARNING: Do not zero initialize this, as we otherwise get problems with thread unsafe local static variable initialization (e.g. on VS2013) in curve_intersector_virtual.cpp.
template<int K> void intersect(void* pre, void* ray, RayQueryContext* context, const void* primitive);
template<int K> bool occluded (void* pre, void* ray, RayQueryContext* context, const void* primitive);
template<int K> void intersect(void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
template<int K> bool occluded (void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive);
public:
Intersect1Ty intersect1;
Occluded1Ty occluded1;
Intersect4Ty intersect4;
Occluded4Ty occluded4;
Intersect8Ty intersect8;
Occluded8Ty occluded8;
Intersect16Ty intersect16;
Occluded16Ty occluded16;
};
Intersectors vtbl[Geometry::GTY_END];
};
template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<1> (void* pre, void* ray, RayQueryContext* context, const void* primitive) { assert(intersect1); intersect1(pre,ray,context,primitive); }
template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<1> (void* pre, void* ray, RayQueryContext* context, const void* primitive) { assert(occluded1); return occluded1(pre,ray,context,primitive); }
template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<4>(void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive) { assert(intersect4); intersect4(pre,ray,k,context,primitive); }
template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<4> (void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive) { assert(occluded4); return occluded4(pre,ray,k,context,primitive); }
#if defined(__AVX__)
template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<8>(void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive) { assert(intersect8); intersect8(pre,ray,k,context,primitive); }
template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<8> (void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive) { assert(occluded8); return occluded8(pre,ray,k,context,primitive); }
#endif
#if defined(__AVX512F__)
template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<16>(void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive) { assert(intersect16); intersect16(pre,ray,k,context,primitive); }
template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<16> (void* pre, void* ray, size_t k, RayQueryContext* context, const void* primitive) { assert(occluded16); return occluded16(pre,ray,k,context,primitive); }
#endif
namespace isa
{
struct VirtualCurveIntersector1
{
typedef unsigned char Primitive;
typedef CurvePrecalculations1 Precalculations;
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
leafIntersector.intersect<1>(&pre,&ray,context,prim);
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
return leafIntersector.occluded<1>(&pre,&ray,context,prim);
}
};
template<int K>
struct VirtualCurveIntersectorK
{
typedef unsigned char Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
template<bool robust>
static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
size_t mask = movemask(valid_i);
while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim);
}
template<bool robust>
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
vbool<K> valid_o = false;
size_t mask = movemask(valid_i);
while (mask) {
size_t k = bscf(mask);
if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim))
set(valid_o, k);
}
return valid_o;
}
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
leafIntersector.intersect<K>(&pre,&ray,k,context,prim);
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
assert(num == 1);
RTCGeometryType ty = (RTCGeometryType)(*prim);
assert(This->leafIntersector);
VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
return leafIntersector.occluded<K>(&pre,&ray,k,context,prim);
}
};
template<int N>
static VirtualCurveIntersector::Intersectors LinearRoundConeNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors LinearConeNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors LinearRoundConeNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiMBIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiMBIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiMBIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiMBIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiMBIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiMBIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiMBIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiMBIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors LinearConeNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiMBIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiMBIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiMBIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiMBIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiMBIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiMBIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiMBIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiMBIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors LinearRibbonNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors LinearRibbonNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiMBIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiMBIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiMBIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiMBIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiMBIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiMBIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiMBIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiMBIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors SphereNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors SphereNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiMBIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiMBIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiMBIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiMBIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiMBIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiMBIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiMBIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiMBIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors DiscNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors DiscNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiMBIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiMBIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiMBIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiMBIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiMBIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiMBIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiMBIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiMBIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors OrientedDiscNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<int N>
static VirtualCurveIntersector::Intersectors OrientedDiscNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiMBIntersector1<N,true>::intersect;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiMBIntersector1<N,true>::occluded;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiMBIntersectorK<N,4,true>::intersect;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiMBIntersectorK<N,4,true>::occluded;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiMBIntersectorK<N,8,true>::intersect;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiMBIntersectorK<N,8,true>::occluded;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiMBIntersectorK<N,16,true>::intersect;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiMBIntersectorK<N,16,true>::occluded;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors RibbonNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors RibbonNvIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNvIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors RibbonNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiMBIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors CurveNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors CurveNvIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNvIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors CurveNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors OrientedCurveNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors OrientedCurveNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors HermiteRibbonNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors HermiteRibbonNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors HermiteCurveNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors HermiteCurveNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
template<template<typename Ty> class Curve, int N>
static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiMBIntersectors()
{
VirtualCurveIntersector::Intersectors intersectors;
intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
#if defined(__AVX__)
intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
#endif
#if defined(__AVX512F__)
intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
#endif
return intersectors;
}
}
}

View File

@@ -0,0 +1,223 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
namespace embree
{
namespace isa
{
struct Cylinder
{
const Vec3fa p0; //!< start location
const Vec3fa p1; //!< end position
const float rr; //!< squared radius of cylinder
__forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float r)
: p0(p0), p1(p1), rr(sqr(r)) {}
__forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float rr, bool)
: p0(p0), p1(p1), rr(rr) {}
__forceinline bool intersect(const Vec3fa& org,
const Vec3fa& dir,
BBox1f& t_o,
float& u0_o, Vec3fa& Ng0_o,
float& u1_o, Vec3fa& Ng1_o) const
{
/* calculate quadratic equation to solve */
const float rl = rcp_length(p1-p0);
const Vec3fa P0 = p0, dP = (p1-p0)*rl;
const Vec3fa O = org-P0, dO = dir;
const float dOdO = dot(dO,dO);
const float OdO = dot(dO,O);
const float OO = dot(O,O);
const float dOz = dot(dP,dO);
const float Oz = dot(dP,O);
const float A = dOdO - sqr(dOz);
const float B = 2.0f * (OdO - dOz*Oz);
const float C = OO - sqr(Oz) - rr;
/* we miss the cylinder if determinant is smaller than zero */
const float D = B*B - 4.0f*A*C;
if (D < 0.0f) {
t_o = BBox1f(pos_inf,neg_inf);
return false;
}
/* special case for rays that are parallel to the cylinder */
const float eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
if (abs(A) < eps)
{
if (C <= 0.0f) {
t_o = BBox1f(neg_inf,pos_inf);
return true;
} else {
t_o = BBox1f(pos_inf,neg_inf);
return false;
}
}
/* standard case for rays that are not parallel to the cylinder */
const float Q = sqrt(D);
const float rcp_2A = rcp(2.0f*A);
const float t0 = (-B-Q)*rcp_2A;
const float t1 = (-B+Q)*rcp_2A;
/* calculates u and Ng for near hit */
{
u0_o = madd(t0,dOz,Oz)*rl;
const Vec3fa Pr = t0*dir;
const Vec3fa Pl = madd(u0_o,p1-p0,p0);
Ng0_o = Pr-Pl;
}
/* calculates u and Ng for far hit */
{
u1_o = madd(t1,dOz,Oz)*rl;
const Vec3fa Pr = t1*dir;
const Vec3fa Pl = madd(u1_o,p1-p0,p0);
Ng1_o = Pr-Pl;
}
t_o.lower = t0;
t_o.upper = t1;
return true;
}
__forceinline bool intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox1f& t_o) const
{
float u0_o; Vec3fa Ng0_o;
float u1_o; Vec3fa Ng1_o;
return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
}
static bool verify(const size_t id, const Cylinder& cylinder, const RayHit& ray, bool shouldhit, const float t0, const float t1)
{
float eps = 0.001f;
BBox1f t; bool hit;
hit = cylinder.intersect(ray.org,ray.dir,t);
bool failed = hit != shouldhit;
if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : abs(t0-t.lower) > eps;
if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : abs(t1-t.upper) > eps;
if (!failed) return true;
embree_cout << "Cylinder test " << id << " failed: cylinder = " << cylinder << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl;
return false;
}
/* verify cylinder class */
static bool verify()
{
bool passed = true;
const Cylinder cylinder(Vec3fa(0.0f,0.0f,0.0f),Vec3fa(1.0f,0.0f,0.0f),1.0f);
passed &= verify(0,cylinder,RayHit(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
passed &= verify(1,cylinder,RayHit(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
passed &= verify(2,cylinder,RayHit(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
passed &= verify(3,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
passed &= verify(4,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
passed &= verify(5,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
passed &= verify(6,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
return passed;
}
/*! output operator */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cylinder& c) {
return cout << "Cylinder { p0 = " << c.p0 << ", p1 = " << c.p1 << ", r = " << sqrtf(c.rr) << "}";
}
};
template<int N>
struct CylinderN
{
const Vec3vf<N> p0; //!< start location
const Vec3vf<N> p1; //!< end position
const vfloat<N> rr; //!< squared radius of cylinder
__forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& r)
: p0(p0), p1(p1), rr(sqr(r)) {}
__forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& rr, bool)
: p0(p0), p1(p1), rr(rr) {}
__forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir,
BBox<vfloat<N>>& t_o,
vfloat<N>& u0_o, Vec3vf<N>& Ng0_o,
vfloat<N>& u1_o, Vec3vf<N>& Ng1_o) const
{
/* calculate quadratic equation to solve */
const vfloat<N> rl = rcp_length(p1-p0);
const Vec3vf<N> P0 = p0, dP = (p1-p0)*rl;
const Vec3vf<N> O = Vec3vf<N>(org)-P0, dO = dir;
const vfloat<N> dOdO = dot(dO,dO);
const vfloat<N> OdO = dot(dO,O);
const vfloat<N> OO = dot(O,O);
const vfloat<N> dOz = dot(dP,dO);
const vfloat<N> Oz = dot(dP,O);
const vfloat<N> A = dOdO - sqr(dOz);
const vfloat<N> B = 2.0f * (OdO - dOz*Oz);
const vfloat<N> C = OO - sqr(Oz) - rr;
/* we miss the cylinder if determinant is smaller than zero */
const vfloat<N> D = B*B - 4.0f*A*C;
vbool<N> valid = D >= 0.0f;
if (none(valid)) {
t_o = BBox<vfloat<N>>(empty);
return valid;
}
/* standard case for rays that are not parallel to the cylinder */
const vfloat<N> Q = sqrt(D);
const vfloat<N> rcp_2A = rcp(2.0f*A);
const vfloat<N> t0 = (-B-Q)*rcp_2A;
const vfloat<N> t1 = (-B+Q)*rcp_2A;
/* calculates u and Ng for near hit */
{
u0_o = madd(t0,dOz,Oz)*rl;
const Vec3vf<N> Pr = t0*Vec3vf<N>(dir);
const Vec3vf<N> Pl = madd(u0_o,p1-p0,p0);
Ng0_o = Pr-Pl;
}
/* calculates u and Ng for far hit */
{
u1_o = madd(t1,dOz,Oz)*rl;
const Vec3vf<N> Pr = t1*Vec3vf<N>(dir);
const Vec3vf<N> Pl = madd(u1_o,p1-p0,p0);
Ng1_o = Pr-Pl;
}
t_o.lower = select(valid, t0, vfloat<N>(pos_inf));
t_o.upper = select(valid, t1, vfloat<N>(neg_inf));
/* special case for rays that are parallel to the cylinder */
const vfloat<N> eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
vbool<N> validt = valid & (abs(A) < eps);
if (unlikely(any(validt)))
{
vbool<N> inside = C <= 0.0f;
t_o.lower = select(validt,select(inside,vfloat<N>(neg_inf),vfloat<N>(pos_inf)),t_o.lower);
t_o.upper = select(validt,select(inside,vfloat<N>(pos_inf),vfloat<N>(neg_inf)),t_o.upper);
valid &= !validt | inside;
}
return valid;
}
__forceinline vbool<N> intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const
{
vfloat<N> u0_o; Vec3vf<N> Ng0_o;
vfloat<N> u1_o; Vec3vf<N> Ng1_o;
return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
}
};
}
}

View File

@@ -0,0 +1,269 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "../common/scene_points.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
namespace isa
{
template<int M>
struct DiscIntersectorHitM
{
__forceinline DiscIntersectorHitM() {}
__forceinline DiscIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
: vu(u), vv(v), vt(t), vNg(Ng)
{
}
__forceinline void finalize() {}
__forceinline Vec2f uv(const size_t i) const {
return Vec2f(vu[i], vv[i]);
}
__forceinline Vec2vf<M> uv() const {
return Vec2vf<M>(vu, vv);
}
__forceinline float t(const size_t i) const {
return vt[i];
}
__forceinline vfloat<M> t() const {
return vt;
}
__forceinline Vec3fa Ng(const size_t i) const {
return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]);
}
__forceinline Vec3vf<M> Ng() const {
return vNg;
}
public:
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
};
template<>
struct DiscIntersectorHitM<1>
{
__forceinline DiscIntersectorHitM() {}
__forceinline DiscIntersectorHitM(const float& u, const float& v, const float& t, const Vec3fa& Ng)
: vu(u), vv(v), vt(t), vNg(Ng) {}
__forceinline void finalize() {}
__forceinline Vec2f uv() const {
return Vec2f(vu, vv);
}
__forceinline float t() const {
return vt;
}
__forceinline Vec3fa Ng() const {
return vNg;
}
public:
float vu;
float vv;
float vt;
Vec3fa vNg;
};
template<int M>
struct DiscIntersector1
{
typedef CurvePrecalculations1 Precalculations;
template<typename Ray, typename Epilog>
static __forceinline bool intersect(
const vbool<M>& valid_i,
Ray& ray,
RayQueryContext* context,
const Points* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i,
const Epilog& epilog)
{
vbool<M> valid = valid_i;
const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec3vf<M> center = v0.xyz();
const vfloat<M> radius = v0.w;
/* compute ray distance projC0 to hit point with ray oriented plane */
const Vec3vf<M> c0 = center - ray_org;
const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
valid &= (vfloat<M>(ray.tnear()) <= projC0) & (projC0 <= vfloat<M>(ray.tfar));
if (unlikely(none(valid)))
return false;
/* check if hit point lies inside disc */
const Vec3vf<M> perp = c0 - projC0 * ray_dir;
const vfloat<M> l2 = dot(perp, perp);
const vfloat<M> r2 = radius * radius;
valid &= (l2 <= r2);
if (unlikely(none(valid)))
return false;
/* We reject hits where the ray origin lies inside the ray
* oriented disc to avoid self intersections. */
#if defined(EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE)
const vfloat<M> m2 = dot(c0, c0);
valid &= (m2 > r2);
if (unlikely(none(valid)))
return false;
#endif
DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir);
return epilog(valid, hit);
}
template<typename Ray, typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
Ray& ray,
RayQueryContext* context,
const Points* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i,
const Vec3vf<M>& normal,
const Epilog& epilog)
{
vbool<M> valid = valid_i;
const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec3vf<M> center = v0.xyz();
const vfloat<M> radius = v0.w;
vfloat<M> divisor = dot(Vec3vf<M>((Vec3fa)ray.dir), normal);
const vbool<M> parallel = divisor == vfloat<M>(0.f);
valid &= !parallel;
divisor = select(parallel, 1.f, divisor); // prevent divide by zero
vfloat<M> t = dot(center - Vec3vf<M>((Vec3fa)ray.org), Vec3vf<M>(normal)) / divisor;
valid &= (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar));
if (unlikely(none(valid)))
return false;
Vec3vf<M> intersection = Vec3vf<M>((Vec3fa)ray.org) + Vec3vf<M>((Vec3fa)ray.dir) * t;
vfloat<M> dist2 = dot(intersection - center, intersection - center);
valid &= dist2 < radius * radius;
if (unlikely(none(valid)))
return false;
DiscIntersectorHitM<M> hit(zero, zero, t, normal);
return epilog(valid, hit);
}
};
template<int M, int K>
struct DiscIntersectorK
{
typedef CurvePrecalculationsK<K> Precalculations;
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
RayK<K>& ray,
size_t k,
RayQueryContext* context,
const Points* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i,
const Epilog& epilog)
{
vbool<M> valid = valid_i;
const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec3vf<M> center = v0.xyz();
const vfloat<M> radius = v0.w;
/* compute ray distance projC0 to hit point with ray oriented plane */
const Vec3vf<M> c0 = center - ray_org;
const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
valid &= (vfloat<M>(ray.tnear()[k]) <= projC0) & (projC0 <= vfloat<M>(ray.tfar[k]));
if (unlikely(none(valid)))
return false;
/* check if hit point lies inside disc */
const Vec3vf<M> perp = c0 - projC0 * ray_dir;
const vfloat<M> l2 = dot(perp, perp);
const vfloat<M> r2 = radius * radius;
valid &= (l2 <= r2);
if (unlikely(none(valid)))
return false;
/* We reject hits where the ray origin lies inside the ray
* oriented disc to avoid self intersections. */
#if defined(EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE)
const vfloat<M> m2 = dot(c0, c0);
valid &= (m2 > r2);
if (unlikely(none(valid)))
return false;
#endif
DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir);
return epilog(valid, hit);
}
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
RayK<K>& ray,
size_t k,
RayQueryContext* context,
const Points* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i,
const Vec3vf<M>& normal,
const Epilog& epilog)
{
vbool<M> valid = valid_i;
const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec3vf<M> center = v0.xyz();
const vfloat<M> radius = v0.w;
vfloat<M> divisor = dot(Vec3vf<M>(ray_dir), normal);
const vbool<M> parallel = divisor == vfloat<M>(0.f);
valid &= !parallel;
divisor = select(parallel, 1.f, divisor); // prevent divide by zero
vfloat<M> t = dot(center - Vec3vf<M>(ray_org), Vec3vf<M>(normal)) / divisor;
valid &= (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k]));
if (unlikely(none(valid)))
return false;
Vec3vf<M> intersection = Vec3vf<M>(ray_org) + Vec3vf<M>(ray_dir) * t;
vfloat<M> dist2 = dot(intersection - center, intersection - center);
valid &= dist2 < radius * radius;
if (unlikely(none(valid)))
return false;
DiscIntersectorHitM<M> hit(zero, zero, t, normal);
return epilog(valid, hit);
}
};
} // namespace isa
} // namespace embree

View File

@@ -0,0 +1,277 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "disc_intersector.h"
#include "intersector_epilog.h"
#include "pointi.h"
namespace embree
{
namespace isa
{
template<int M, bool filter>
struct DiscMiIntersector1
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre,
RayHit& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom);
const vbool<M> valid = Disc.valid();
DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(const Precalculations& pre,
Ray& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom);
const vbool<M> valid = Disc.valid();
return DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
};
template<int M, bool filter>
struct DiscMiMBIntersector1
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre,
RayHit& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom, ray.time());
const vbool<M> valid = Disc.valid();
DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(const Precalculations& pre,
Ray& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom, ray.time());
const vbool<M> valid = Disc.valid();
return DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
};
template<int M, int K, bool filter>
struct DiscMiIntersectorK
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(
const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom);
const vbool<M> valid = Disc.valid();
DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0,
Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(
const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom);
const vbool<M> valid = Disc.valid();
return DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0,
Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
};
template<int M, int K, bool filter>
struct DiscMiMBIntersectorK
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(
const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]);
const vbool<M> valid = Disc.valid();
DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0,
Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(
const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]);
const vbool<M> valid = Disc.valid();
return DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0, Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
};
template<int M, bool filter>
struct OrientedDiscMiIntersector1
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre,
RayHit& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom);
const vbool<M> valid = Disc.valid();
DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(const Precalculations& pre,
Ray& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom);
const vbool<M> valid = Disc.valid();
return DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
};
template<int M, bool filter>
struct OrientedDiscMiMBIntersector1
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre,
RayHit& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom, ray.time());
const vbool<M> valid = Disc.valid();
DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(const Precalculations& pre,
Ray& ray,
RayQueryContext* context,
const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom, ray.time());
const vbool<M> valid = Disc.valid();
return DiscIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID()));
}
};
template<int M, int K, bool filter>
struct OrientedDiscMiIntersectorK
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(
const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom);
const vbool<M> valid = Disc.valid();
DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0, n0,
Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(
const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom);
const vbool<M> valid = Disc.valid();
return DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0, n0,
Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
};
template<int M, int K, bool filter>
struct OrientedDiscMiMBIntersectorK
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(
const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom, ray.time()[k]);
const vbool<M> valid = Disc.valid();
DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0, n0,
Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
static __forceinline bool occluded(
const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& Disc)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(Disc.geomID());
Vec4vf<M> v0; Vec3vf<M> n0;
Disc.gather(v0, n0, geom, ray.time()[k]);
const vbool<M> valid = Disc.valid();
return DiscIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0, n0,
Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
}
};
} // namespace isa
} // namespace embree

View File

@@ -0,0 +1,157 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/geometry.h"
#include "../common/ray.h"
#include "../common/hit.h"
#include "../common/context.h"
namespace embree
{
namespace isa
{
__forceinline bool runIntersectionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, RayQueryContext* context)
{
if (geometry->intersectionFilterN)
{
geometry->intersectionFilterN(args);
if (args->valid[0] == 0)
return false;
}
if (context->getFilter())
{
if (context->enforceArgumentFilterFunction() || geometry->hasArgumentFilterFunctions())
context->getFilter()(args);
if (args->valid[0] == 0)
return false;
}
copyHitToRay(*(RayHit*)args->ray,*(Hit*)args->hit);
return true;
}
__forceinline bool runIntersectionFilter1(const Geometry* const geometry, RayHit& ray, RayQueryContext* context, Hit& hit)
{
RTCFilterFunctionNArguments args;
int mask = -1;
args.valid = &mask;
args.geometryUserPtr = geometry->userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.hit = (RTCHitN*)&hit;
args.N = 1;
return runIntersectionFilter1Helper(&args,geometry,context);
}
__forceinline bool runOcclusionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, RayQueryContext* context)
{
if (geometry->occlusionFilterN)
{
geometry->occlusionFilterN(args);
if (args->valid[0] == 0)
return false;
}
if (context->getFilter())
{
if (context->enforceArgumentFilterFunction() || geometry->hasArgumentFilterFunctions())
context->getFilter()(args);
if (args->valid[0] == 0)
return false;
}
return true;
}
__forceinline bool runOcclusionFilter1(const Geometry* const geometry, Ray& ray, RayQueryContext* context, Hit& hit)
{
RTCFilterFunctionNArguments args;
int mask = -1;
args.valid = &mask;
args.geometryUserPtr = geometry->userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.hit = (RTCHitN*)&hit;
args.N = 1;
return runOcclusionFilter1Helper(&args,geometry,context);
}
template<int K>
__forceinline vbool<K> runIntersectionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, RayQueryContext* context)
{
vint<K>* mask = (vint<K>*) args->valid;
if (geometry->intersectionFilterN)
geometry->intersectionFilterN(args);
vbool<K> valid_o = *mask != vint<K>(zero);
if (none(valid_o)) return valid_o;
if (context->getFilter()) {
if (context->enforceArgumentFilterFunction() || geometry->hasArgumentFilterFunctions())
context->getFilter()(args);
}
valid_o = *mask != vint<K>(zero);
if (none(valid_o)) return valid_o;
copyHitToRay(valid_o,*(RayHitK<K>*)args->ray,*(HitK<K>*)args->hit);
return valid_o;
}
template<int K>
__forceinline vbool<K> runIntersectionFilter(const vbool<K>& valid, const Geometry* const geometry, RayHitK<K>& ray, RayQueryContext* context, HitK<K>& hit)
{
RTCFilterFunctionNArguments args;
vint<K> mask = valid.mask32();
args.valid = (int*)&mask;
args.geometryUserPtr = geometry->userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.hit = (RTCHitN*)&hit;
args.N = K;
return runIntersectionFilterHelper<K>(&args,geometry,context);
}
template<int K>
__forceinline vbool<K> runOcclusionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, RayQueryContext* context)
{
vint<K>* mask = (vint<K>*) args->valid;
if (geometry->occlusionFilterN)
geometry->occlusionFilterN(args);
vbool<K> valid_o = *mask != vint<K>(zero);
if (none(valid_o)) return valid_o;
if (context->getFilter()) {
if (context->enforceArgumentFilterFunction() || geometry->hasArgumentFilterFunctions())
context->getFilter()(args);
}
valid_o = *mask != vint<K>(zero);
RayK<K>* ray = (RayK<K>*) args->ray;
ray->tfar = select(valid_o, vfloat<K>(neg_inf), ray->tfar);
return valid_o;
}
template<int K>
__forceinline vbool<K> runOcclusionFilter(const vbool<K>& valid, const Geometry* const geometry, RayK<K>& ray, RayQueryContext* context, HitK<K>& hit)
{
RTCFilterFunctionNArguments args;
vint<K> mask = valid.mask32();
args.valid = (int*)&mask;
args.geometryUserPtr = geometry->userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.hit = (RTCHitN*)&hit;
args.N = K;
return runOcclusionFilterHelper<K>(&args,geometry,context);
}
}
}

View File

@@ -0,0 +1,109 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/geometry.h"
#include "../common/ray.h"
#include "../common/hit.h"
#include "../common/context.h"
namespace embree
{
__forceinline bool runIntersectionFilter1Helper(RTCFilterFunctionNArguments* args, int& mask, const Geometry* const geometry, RayQueryContext* context)
{
typedef void (*RTCFilterFunctionSYCL)(const void* args);
const RTCFeatureFlags feature_mask MAYBE_UNUSED = context->args->feature_mask;
#if EMBREE_SYCL_GEOMETRY_CALLBACK
if (feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY)
{
RTCFilterFunctionSYCL gfilter = (RTCFilterFunctionSYCL) geometry->intersectionFilterN;
if (gfilter)
{
gfilter(args);
if (mask == 0)
return false;
}
}
#endif
if (feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS)
{
RTCFilterFunctionSYCL cfilter = (RTCFilterFunctionSYCL) context->args->filter;
if (cfilter)
{
if (context->enforceArgumentFilterFunction() || geometry->hasArgumentFilterFunctions())
cfilter(args);
if (mask == 0)
return false;
}
}
return true;
}
__forceinline bool runOcclusionFilter1Helper(RTCFilterFunctionNArguments* args, int& mask, const Geometry* const geometry, RayQueryContext* context)
{
typedef void (*RTCFilterFunctionSYCL)(const void* args);
const RTCFeatureFlags feature_mask MAYBE_UNUSED = context->args->feature_mask;
#if EMBREE_SYCL_GEOMETRY_CALLBACK
if (feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY)
{
RTCFilterFunctionSYCL gfilter = (RTCFilterFunctionSYCL) geometry->occlusionFilterN;
if (gfilter)
{
gfilter(args);
if (mask == 0)
return false;
}
}
#endif
if (feature_mask & RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS)
{
RTCFilterFunctionSYCL cfilter = (RTCFilterFunctionSYCL) context->args->filter;
if (cfilter)
{
if (context->enforceArgumentFilterFunction() || geometry->hasArgumentFilterFunctions())
cfilter(args);
if (mask == 0)
return false;
}
}
return true;
}
__forceinline bool runIntersectionFilter1SYCL(Geometry* geometry, RayHit& ray, sycl::private_ptr<RayQueryContext> context, Hit& hit)
{
RTCFilterFunctionNArguments args;
int mask = -1;
args.valid = &mask;
args.geometryUserPtr = geometry->userPtr;
args.context = context->user;
args.ray = (RTCRayN*) &ray;
args.hit = (RTCHitN*) &hit;
args.N = 1;
return runIntersectionFilter1Helper(&args,mask,geometry,context);
}
__forceinline bool runIntersectionFilter1SYCL(Geometry* geometry, Ray& ray, sycl::private_ptr<RayQueryContext> context, Hit& hit)
{
RTCFilterFunctionNArguments args;
int mask = -1;
args.valid = &mask;
args.geometryUserPtr = geometry->userPtr;
args.context = context->user;
args.ray = (RTCRayN*) &ray;
args.hit = (RTCHitN*) &hit;
args.N = 1;
return runOcclusionFilter1Helper(&args,mask,geometry,context);
}
}

View File

@@ -0,0 +1,99 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "grid_soa.h"
#include "grid_soa_intersector1.h"
#include "grid_soa_intersector_packet.h"
#include "../common/ray.h"
namespace embree
{
namespace isa
{
template<typename T>
class SubdivPatch1Precalculations : public T
{
public:
__forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr)
: T(ray,ptr) {}
};
template<int K, typename T>
class SubdivPatch1PrecalculationsK : public T
{
public:
__forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray)
: T(valid,ray) {}
};
class Grid1Intersector1
{
public:
typedef GridSOA Primitive;
typedef Grid1Precalculations<GridSOAIntersector1::Precalculations> Precalculations;
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
{
GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node);
}
static __forceinline void intersect(Precalculations& pre, RayHit& ray, RayQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
intersect(pre,ray,context,prim,ty,lazy_node);
}
/*! Test if the ray is occluded by the primitive */
static __forceinline bool occluded(Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
{
GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node);
}
static __forceinline bool occluded(Precalculations& pre, Ray& ray, RayQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
return occluded(pre,ray,context,prim,ty,lazy_node);
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) {
assert(false && "not implemented");
return false;
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
assert(false && "not implemented");
return false;
}
};
template <int K>
struct GridIntersectorK
{
typedef GridSOA Primitive;
typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations;
static __forceinline void intersect(const vbool<K>& valid, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
{
GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
}
static __forceinline vbool<K> occluded(const vbool<K>& valid, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
{
GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
}
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
{
GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
{
GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
}
};
typedef Grid1IntersectorK<4> SubdivPatch1Intersector4;
typedef Grid1IntersectorK<8> SubdivPatch1Intersector8;
typedef Grid1IntersectorK<16> SubdivPatch1Intersector16;
}
}

View File

@@ -0,0 +1,275 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "../common/scene_subdiv_mesh.h"
#include "../bvh/bvh.h"
#include "../subdiv/tessellation.h"
#include "../subdiv/tessellation_cache.h"
#include "subdivpatch1.h"
namespace embree
{
namespace isa
{
class GridSOA
{
public:
/*! GridSOA constructor */
GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,
const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
/*! Subgrid creation */
template<typename Allocator>
static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,
unsigned x0, unsigned x1, unsigned y0, unsigned y1,
const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
{
const unsigned width = x1-x0+1;
const unsigned height = y1-y0+1;
const GridRange range(0,width-1,0,height-1);
size_t bvhBytes = 0;
if (time_steps == 1)
bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0);
else {
bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0);
bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D));
}
const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);
size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);
#if !defined(__64BIT__)
rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
#endif
void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);
assert(data);
return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o);
}
/*! Grid creation */
template<typename Allocator>
static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,
const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
{
return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);
}
/*! returns reference to root */
__forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
__forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
/*! returns pointer to BVH array */
__forceinline char* bvhData() { return &data[0]; }
__forceinline const char* bvhData() const { return &data[0]; }
/*! returns pointer to Grid array */
__forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }
__forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }
__forceinline void* encodeLeaf(size_t u, size_t v) {
return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf
}
__forceinline float* decodeLeaf(size_t t, const void* ptr) {
return gridData(t) + (((size_t) (ptr) >> 4) - 1);
}
/*! returns the size of the BVH over the grid in bytes */
static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
/*! returns the size of the temporal BVH over the time range BVHs */
static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);
/*! calculates bounding box of grid range */
__forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
{
const float* const grid_array = gridData(time);
const float* const grid_x_array = grid_array + 0 * dim_offset;
const float* const grid_y_array = grid_array + 1 * dim_offset;
const float* const grid_z_array = grid_array + 2 * dim_offset;
/* compute the bounds just for the range! */
BBox3fa bounds( empty );
for (unsigned v = range.v_start; v<=range.v_end; v++)
{
for (unsigned u = range.u_start; u<=range.u_end; u++)
{
const float x = grid_x_array[ v * width + u];
const float y = grid_y_array[ v * width + u];
const float z = grid_z_array[ v * width + u];
bounds.extend( Vec3fa(x,y,z) );
}
}
assert(is_finite(bounds));
return bounds;
}
/*! Evaluates grid over patch and builds BVH4 tree over the grid. */
std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);
/*! Create BVH4 tree over grid. */
std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);
/*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */
std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);
/*! Create MBlur BVH4 tree over grid. */
std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);
/*! Create MSMBlur BVH4 tree over grid. */
std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);
template<typename Loader>
struct MapUV
{
typedef typename Loader::vfloat vfloat;
const float* const grid_uv;
size_t line_offset;
size_t lines;
__forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
: grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
__forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const {
const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);
const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);
const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);
const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;
u = uv[0];v = uv[1];
}
};
struct Gather2x3
{
enum { M = 4 };
typedef vbool4 vbool;
typedef vint4 vint;
typedef vfloat4 vfloat;
static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)
{
vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);
vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
if (unlikely(line_offset == 2))
{
r0 = shuffle<0,1,1,1>(r0);
r1 = shuffle<0,1,1,1>(r1);
}
return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11
shuffle<1,1,2,2>(r0), // r01, r01, r02, r02
shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12
}
static __forceinline void gather(const float* const grid_x,
const float* const grid_y,
const float* const grid_z,
const size_t line_offset,
const size_t lines,
Vec3vf4& v0_o,
Vec3vf4& v1_o,
Vec3vf4& v2_o)
{
const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines);
const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines);
const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines);
v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
}
};
#if defined (__AVX__)
struct Gather3x3
{
enum { M = 8 };
typedef vbool8 vbool;
typedef vint8 vint;
typedef vfloat8 vfloat;
static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)
{
vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);
vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
vfloat4 rc;
if (likely(lines > 2))
rc = vfloat4::loadu(grid + 2*line_offset);
else
rc = rb;
if (unlikely(line_offset == 2))
{
ra = shuffle<0,1,1,1>(ra);
rb = shuffle<0,1,1,1>(rb);
rc = shuffle<0,1,1,1>(rc);
}
const vfloat8 r0 = vfloat8(ra,rb);
const vfloat8 r1 = vfloat8(rb,rc);
return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
}
static __forceinline void gather(const float* const grid_x,
const float* const grid_y,
const float* const grid_z,
const size_t line_offset,
const size_t lines,
Vec3vf8& v0_o,
Vec3vf8& v1_o,
Vec3vf8& v2_o)
{
const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);
const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);
const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);
v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
}
};
#endif
template<typename vfloat>
static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
{
typedef typename vfloat::Int vint;
const vint iu = asInt(uv) & 0xffff;
const vint iv = srl(asInt(uv),16);
const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000);
const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000);
return Vec2<vfloat>(u,v);
}
__forceinline unsigned int geomID() const {
return _geomID;
}
__forceinline unsigned int primID() const {
return _primID;
}
public:
BVH4::NodeRef troot;
#if !defined(__64BIT__)
unsigned align1;
#endif
unsigned time_steps;
unsigned width;
unsigned height;
unsigned dim_offset;
unsigned _geomID;
unsigned _primID;
unsigned align2;
unsigned gridOffset;
unsigned gridBytes;
unsigned rootOffset;
char data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots
};
}
}

View File

@@ -0,0 +1,207 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "grid_soa.h"
#include "../common/ray.h"
#include "triangle_intersector_pluecker.h"
namespace embree
{
namespace isa
{
class GridSOAIntersector1
{
public:
typedef void Primitive;
class Precalculations
{
public:
__forceinline Precalculations (const Ray& ray, const void* ptr)
: grid(nullptr) {}
public:
GridSOA* grid;
int itime;
float ftime;
};
template<typename Loader>
static __forceinline void intersect(RayHit& ray,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t dim_offset = pre.grid->dim_offset;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> v0, v1, v2;
Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
};
template<typename Loader>
static __forceinline bool occluded(Ray& ray,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t dim_offset = pre.grid->dim_offset;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> v0, v1, v2;
Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
}
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(0,prim);
#if defined(__AVX__)
intersect<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre);
#else
intersect<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre);
if (likely(lines > 2))
intersect<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre);
#endif
}
/*! Test if the ray is occluded by the primitive */
static __forceinline bool occluded(Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(0,prim);
#if defined(__AVX__)
return occluded<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre);
#else
if (occluded<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre)) return true;
if (likely(lines > 2))
if (occluded<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre)) return true;
#endif
return false;
}
};
class GridSOAMBIntersector1
{
public:
typedef void Primitive;
typedef GridSOAIntersector1::Precalculations Precalculations;
template<typename Loader>
static __forceinline void intersect(RayHit& ray, const float ftime,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t dim_offset = pre.grid->dim_offset;
const size_t grid_offset = pre.grid->gridBytes >> 2;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> a0, a1, a2;
Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
Vec3<vfloat> b0, b1, b2;
Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
};
template<typename Loader>
static __forceinline bool occluded(Ray& ray, const float ftime,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t dim_offset = pre.grid->dim_offset;
const size_t grid_offset = pre.grid->gridBytes >> 2;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> a0, a1, a2;
Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
Vec3<vfloat> b0, b1, b2;
Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
}
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim);
#if defined(__AVX__)
intersect<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre);
#else
intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x, line_offset, lines, pre);
if (likely(lines > 2))
intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre);
#endif
}
/*! Test if the ray is occluded by the primitive */
static __forceinline bool occluded(Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim);
#if defined(__AVX__)
return occluded<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre);
#else
if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x , line_offset, lines, pre)) return true;
if (likely(lines > 2))
if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
#endif
return false;
}
};
}
}

View File

@@ -0,0 +1,445 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "grid_soa.h"
#include "../common/ray.h"
#include "triangle_intersector_pluecker.h"
namespace embree
{
namespace isa
{
template<int K>
struct MapUV0
{
const float* const grid_uv;
size_t ofs00, ofs01, ofs10, ofs11;
__forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
: grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
__forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {
const vfloat<K> uv00(grid_uv[ofs00]);
const vfloat<K> uv01(grid_uv[ofs01]);
const vfloat<K> uv10(grid_uv[ofs10]);
const vfloat<K> uv11(grid_uv[ofs11]);
const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00);
const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10);
const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
u = uv[0]; v = uv[1];
}
};
template<int K>
struct MapUV1
{
const float* const grid_uv;
size_t ofs00, ofs01, ofs10, ofs11;
__forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
: grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
__forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {
const vfloat<K> uv00(grid_uv[ofs00]);
const vfloat<K> uv01(grid_uv[ofs01]);
const vfloat<K> uv10(grid_uv[ofs10]);
const vfloat<K> uv11(grid_uv[ofs11]);
const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10);
const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11);
const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
u = uv[0]; v = uv[1];
}
};
template<int K>
class GridSOAIntersectorK
{
public:
typedef void Primitive;
class Precalculations
{
#if defined(__AVX__)
static const int M = 8;
#else
static const int M = 4;
#endif
public:
__forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray)
: grid(nullptr), intersector(valid,ray) {}
public:
GridSOA* grid;
PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector
};
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t dim_offset = pre.grid->dim_offset;
const size_t line_offset = pre.grid->width;
const float* const grid_x = pre.grid->decodeLeaf(0,prim);
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
const size_t max_x = pre.grid->width == 2 ? 1 : 2;
const size_t max_y = pre.grid->height == 2 ? 1 : 2;
for (size_t y=0; y<max_y; y++)
{
for (size_t x=0; x<max_x; x++)
{
const size_t ofs00 = (y+0)*line_offset+(x+0);
const size_t ofs01 = (y+0)*line_offset+(x+1);
const size_t ofs10 = (y+1)*line_offset+(x+0);
const size_t ofs11 = (y+1)*line_offset+(x+1);
const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
}
}
}
/*! Test if the ray is occluded by the primitive */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t dim_offset = pre.grid->dim_offset;
const size_t line_offset = pre.grid->width;
const float* const grid_x = pre.grid->decodeLeaf(0,prim);
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
vbool<K> valid = valid_i;
const size_t max_x = pre.grid->width == 2 ? 1 : 2;
const size_t max_y = pre.grid->height == 2 ? 1 : 2;
for (size_t y=0; y<max_y; y++)
{
for (size_t x=0; x<max_x; x++)
{
const size_t ofs00 = (y+0)*line_offset+(x+0);
const size_t ofs01 = (y+0)*line_offset+(x+1);
const size_t ofs10 = (y+1)*line_offset+(x+0);
const size_t ofs11 = (y+1)*line_offset+(x+1);
const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
if (none(valid)) break;
pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
if (none(valid)) break;
}
}
return !valid;
}
template<typename Loader>
static __forceinline void intersect(RayHitK<K>& ray, size_t k,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t dim_offset = pre.grid->dim_offset;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
};
template<typename Loader>
static __forceinline bool occluded(RayK<K>& ray, size_t k,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t dim_offset = pre.grid->dim_offset;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
}
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(0,prim);
#if defined(__AVX__)
intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
#else
intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre);
if (likely(lines > 2))
intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre);
#endif
}
/*! Test if the ray is occluded by the primitive */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(0,prim);
#if defined(__AVX__)
return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
#else
if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true;
if (likely(lines > 2))
if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true;
#endif
return false;
}
};
template<int K>
class GridSOAMBIntersectorK
{
public:
typedef void Primitive;
typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations;
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
vfloat<K> vftime;
vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
vbool<K> valid1 = valid_i;
while (any(valid1)) {
const size_t j = bsf(movemask(valid1));
const int itime = vitime[j];
const vbool<K> valid2 = valid1 & (itime == vitime);
valid1 = valid1 & !valid2;
intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
}
}
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t grid_offset = pre.grid->gridBytes >> 2;
const size_t dim_offset = pre.grid->dim_offset;
const size_t line_offset = pre.grid->width;
const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
const size_t max_x = pre.grid->width == 2 ? 1 : 2;
const size_t max_y = pre.grid->height == 2 ? 1 : 2;
for (size_t y=0; y<max_y; y++)
{
for (size_t x=0; x<max_x; x++)
{
size_t ofs00 = (y+0)*line_offset+(x+0);
size_t ofs01 = (y+0)*line_offset+(x+1);
size_t ofs10 = (y+1)*line_offset+(x+0);
size_t ofs11 = (y+1)*line_offset+(x+1);
const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
ofs00 += grid_offset;
ofs01 += grid_offset;
ofs10 += grid_offset;
ofs11 += grid_offset;
const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
const Vec3vf<K> p00 = lerp(a00,b00,ftime);
const Vec3vf<K> p01 = lerp(a01,b01,ftime);
const Vec3vf<K> p10 = lerp(a10,b10,ftime);
const Vec3vf<K> p11 = lerp(a11,b11,ftime);
pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
}
}
}
/*! Test if the ray is occluded by the primitive */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
vfloat<K> vftime;
vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
vbool<K> valid_o = valid_i;
vbool<K> valid1 = valid_i;
while (any(valid1)) {
const int j = int(bsf(movemask(valid1)));
const int itime = vitime[j];
const vbool<K> valid2 = valid1 & (itime == vitime);
valid1 = valid1 & !valid2;
valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
}
return !valid_o;
}
/*! Test if the ray is occluded by the primitive */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
const size_t grid_offset = pre.grid->gridBytes >> 2;
const size_t dim_offset = pre.grid->dim_offset;
const size_t line_offset = pre.grid->width;
const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
vbool<K> valid = valid_i;
const size_t max_x = pre.grid->width == 2 ? 1 : 2;
const size_t max_y = pre.grid->height == 2 ? 1 : 2;
for (size_t y=0; y<max_y; y++)
{
for (size_t x=0; x<max_x; x++)
{
size_t ofs00 = (y+0)*line_offset+(x+0);
size_t ofs01 = (y+0)*line_offset+(x+1);
size_t ofs10 = (y+1)*line_offset+(x+0);
size_t ofs11 = (y+1)*line_offset+(x+1);
const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
ofs00 += grid_offset;
ofs01 += grid_offset;
ofs10 += grid_offset;
ofs11 += grid_offset;
const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
const Vec3vf<K> p00 = lerp(a00,b00,ftime);
const Vec3vf<K> p01 = lerp(a01,b01,ftime);
const Vec3vf<K> p10 = lerp(a10,b10,ftime);
const Vec3vf<K> p11 = lerp(a11,b11,ftime);
pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
if (none(valid)) break;
pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
if (none(valid)) break;
}
}
return valid;
}
template<typename Loader>
static __forceinline void intersect(RayHitK<K>& ray, size_t k,
const float ftime,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t grid_offset = pre.grid->gridBytes >> 2;
const size_t dim_offset = pre.grid->dim_offset;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> a0, a1, a2;
Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
Vec3<vfloat> b0, b1, b2;
Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
};
template<typename Loader>
static __forceinline bool occluded(RayK<K>& ray, size_t k,
const float ftime,
RayQueryContext* context,
const float* const grid_x,
const size_t line_offset,
const size_t lines,
Precalculations& pre)
{
typedef typename Loader::vfloat vfloat;
const size_t grid_offset = pre.grid->gridBytes >> 2;
const size_t dim_offset = pre.grid->dim_offset;
const float* const grid_y = grid_x + 1 * dim_offset;
const float* const grid_z = grid_x + 2 * dim_offset;
const float* const grid_uv = grid_x + 3 * dim_offset;
Vec3<vfloat> a0, a1, a2;
Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
Vec3<vfloat> b0, b1, b2;
Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
}
/*! Intersect a ray with the primitive. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
float ftime;
int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
#if defined(__AVX__)
intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
#else
intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre);
if (likely(lines > 2))
intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre);
#endif
}
/*! Test if the ray is occluded by the primitive */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
float ftime;
int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
const size_t line_offset = pre.grid->width;
const size_t lines = pre.grid->height;
const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
#if defined(__AVX__)
return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
#else
if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true;
if (likely(lines > 2))
if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
#endif
return false;
}
};
}
}

View File

@@ -0,0 +1,78 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "../common/scene_instance.h"
namespace embree
{
struct InstancePrimitive
{
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored primitives */
static __forceinline size_t max_size() { return 1; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return N; }
public:
InstancePrimitive (const Instance* instance, unsigned int instID)
: instance(instance)
, instID_(instID)
{}
__forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene)
{
assert(end-i == 1);
const PrimRef& prim = prims[i]; i++;
const unsigned int geomID = prim.geomID();
const Instance* instance = scene->get<Instance>(geomID);
new (this) InstancePrimitive(instance, geomID);
}
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime)
{
assert(end-i == 1);
const PrimRef& prim = prims[i]; i++;
const unsigned int geomID = prim.geomID();
const Instance* instance = scene->get<Instance>(geomID);
new (this) InstancePrimitive(instance,geomID);
return instance->linearBounds(0,itime);
}
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range)
{
assert(end-i == 1);
const PrimRefMB& prim = prims[i]; i++;
const unsigned int geomID = prim.geomID();
const Instance* instance = scene->get<Instance>(geomID);
new (this) InstancePrimitive(instance,geomID);
return instance->linearBounds(0,time_range);
}
/* Updates the primitive */
__forceinline BBox3fa update(Instance* instance) {
return instance->bounds(0);
}
public:
const Instance* instance;
const unsigned int instID_ = std::numeric_limits<unsigned int>::max ();
};
}

View File

@@ -0,0 +1,85 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "../common/scene_instance_array.h"
namespace embree
{
//template<int M>
struct InstanceArrayPrimitive
{
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored primitives */
static __forceinline size_t max_size() { return 1; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return N; }
public:
InstanceArrayPrimitive (const uint32_t geomID, const uint32_t primID)
: primID_(primID)
, instID_(geomID)
{}
__forceinline bool valid() const {
return primID_ != -1;
}
void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene)
{
assert(end-i == 1);
const PrimRef& prim = prims[i]; i++;
const unsigned int geomID = prim.geomID();
const unsigned int primID = prim.primID();
new (this) InstanceArrayPrimitive(geomID, primID);
}
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime)
{
assert(end-i == 1);
const PrimRef& prim = prims[i]; i++;
const unsigned int geomID = prim.geomID();
const size_t primID = prim.primID();
new (this) InstanceArrayPrimitive(geomID, primID);
const InstanceArray* instanceArray = scene->get<InstanceArray>(geomID);
return instanceArray->linearBounds(primID,itime);
}
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range)
{
assert(end-i == 1);
const PrimRefMB& prim = prims[i]; i++;
const unsigned int geomID = prim.geomID();
const size_t primID = prim.primID();
new (this) InstanceArrayPrimitive(geomID, primID);
const InstanceArray* instanceArray = scene->get<InstanceArray>(geomID);
return instanceArray->linearBounds(primID,time_range);
}
/* Updates the primitive */
__forceinline BBox3fa update(InstanceArray* instanceArray) {
return instanceArray->bounds(0);
}
public:
unsigned int primID_;
unsigned int instID_;
};
}

View File

@@ -0,0 +1,85 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "instance_array.h"
#include "../common/ray.h"
#include "../common/point_query.h"
#include "../common/scene.h"
namespace embree
{
namespace isa
{
struct InstanceArrayIntersector1
{
typedef InstanceArrayPrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const Ray& ray, const void *ptr) {}
};
static void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim);
static bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim);
static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
};
struct InstanceArrayIntersector1MB
{
typedef InstanceArrayPrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const Ray& ray, const void *ptr) {}
};
static void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim);
static bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim);
static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
};
template<int K>
struct InstanceArrayIntersectorK
{
typedef InstanceArrayPrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
};
static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& prim);
static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& prim);
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
return ray.tfar[k] < 0.0f;
}
};
template<int K>
struct InstanceArrayIntersectorKMB
{
typedef InstanceArrayPrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
};
static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& prim);
static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& prim);
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
return ray.tfar[k] < 0.0f;
}
};
}
}

View File

@@ -0,0 +1,84 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "instance.h"
#include "../common/ray.h"
#include "../common/point_query.h"
namespace embree
{
namespace isa
{
struct InstanceIntersector1
{
typedef InstancePrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const Ray& ray, const void *ptr) {}
};
static void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim);
static bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim);
static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
};
struct InstanceIntersector1MB
{
typedef InstancePrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const Ray& ray, const void *ptr) {}
};
static void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim);
static bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim);
static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
};
template<int K>
struct InstanceIntersectorK
{
typedef InstancePrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
};
static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& prim);
static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& prim);
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
return ray.tfar[k] < 0.0f;
}
};
template<int K>
struct InstanceIntersectorKMB
{
typedef InstancePrimitive Primitive;
struct Precalculations {
__forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
};
static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& prim);
static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& prim);
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
return ray.tfar[k] < 0.0f;
}
};
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,207 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "../common/context.h"
#include "filter_sycl.h"
namespace embree
{
template<typename Ray>
struct Intersect1Epilog1_HWIF;
template<>
struct Intersect1Epilog1_HWIF<RayHit>
{
RayHit& ray;
sycl::private_ptr<RayQueryContext> context;
const unsigned int geomID;
const unsigned int primID;
const bool filter;
__forceinline Intersect1Epilog1_HWIF(RayHit& ray,
sycl::private_ptr<RayQueryContext> context,
const unsigned int geomID,
const unsigned int primID,
const bool filter)
: ray(ray), context(context), geomID(geomID), primID(primID), filter(filter) {}
template<typename Hit_i>
__forceinline bool operator() (Hit_i& hit_i) const
{
hit_i.finalize();
Scene* scene MAYBE_UNUSED = context->scene;
Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
/* ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((geometry->mask & ray.mask) == 0)
return false;
#endif
/* call intersection filter function */
#if defined(EMBREE_FILTER_FUNCTION)
if (filter && (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())))
{
Hit h(context->user,geomID,primID,Vec2f(hit_i.u,hit_i.v),hit_i.Ng);
float old_t = ray.tfar;
ray.tfar = hit_i.t;
bool found = runIntersectionFilter1SYCL(geometry,ray,context,h);
if (!found) {
ray.tfar = old_t;
return false;
}
}
#endif
ray.tfar = hit_i.t;
ray.u = hit_i.u;
ray.v = hit_i.v;
ray.Ng.x = hit_i.Ng.x;
ray.Ng.y = hit_i.Ng.y;
ray.Ng.z = hit_i.Ng.z;
ray.geomID = geomID;
ray.primID = primID;
instance_id_stack::copy_UU(context->user, context->user->instID, ray.instID);
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UU(context->user, context->user->instPrimID, ray.instPrimID);
#endif
return true;
}
template<typename Hit_i>
__forceinline bool operator() (bool, Hit_i& hit_i) const
{
hit_i.finalize();
Scene* scene MAYBE_UNUSED = context->scene;
Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
/* ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((geometry->mask & ray.mask) == 0)
return false;
#endif
const Vec3fa Ng = hit_i.Ng();
const Vec2f uv = hit_i.uv();
/* call intersection filter function */
#if defined(EMBREE_FILTER_FUNCTION)
if (filter && (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())))
{
Hit h(context->user,geomID,primID,uv,Ng);
float old_t = ray.tfar;
ray.tfar = hit_i.t();
bool found = runIntersectionFilter1SYCL(geometry,ray,context,h);
if (!found) {
ray.tfar = old_t;
return false;
}
}
#endif
ray.tfar = hit_i.t();
ray.u = uv.x;
ray.v = uv.y;
ray.Ng.x = Ng.x;
ray.Ng.y = Ng.y;
ray.Ng.z = Ng.z;
ray.geomID = geomID;
ray.primID = primID;
instance_id_stack::copy_UU(context->user, context->user->instID, ray.instID);
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UU(context->user, context->user->instPrimID, ray.instPrimID);
#endif
return true;
}
};
template<>
struct Intersect1Epilog1_HWIF<Ray>
{
Ray& ray;
sycl::private_ptr<RayQueryContext> context;
const unsigned int geomID;
const unsigned int primID;
const bool filter;
__forceinline Intersect1Epilog1_HWIF(Ray& ray,
sycl::private_ptr<RayQueryContext> context,
const unsigned int geomID,
const unsigned int primID,
const bool filter)
: ray(ray), context(context), geomID(geomID), primID(primID), filter(filter) {}
template<typename Hit_i>
__forceinline bool operator() (Hit_i& hit_i) const
{
hit_i.finalize();
Scene* scene MAYBE_UNUSED = context->scene;
Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
/* ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((geometry->mask & ray.mask) == 0)
return false;
#endif
/* call intersection filter function */
#if defined(EMBREE_FILTER_FUNCTION)
if (filter && (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())))
{
Hit h(context->user,geomID,primID,Vec2f(hit_i.u,hit_i.v),hit_i.Ng);
float old_t = ray.tfar;
ray.tfar = hit_i.t;
bool found = runIntersectionFilter1SYCL(geometry,ray,context,h);
if (!found) {
ray.tfar = old_t;
return false;
}
}
#endif
ray.tfar = neg_inf;
return true;
}
template<typename Hit_i>
__forceinline bool operator() (bool, Hit_i& hit_i) const
{
hit_i.finalize();
Scene* scene MAYBE_UNUSED = context->scene;
Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
/* ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((geometry->mask & ray.mask) == 0)
return false;
#endif
/* call intersection filter function */
#if defined(EMBREE_FILTER_FUNCTION)
if (filter && (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())))
{
const Vec3fa Ng = hit_i.Ng();
const Vec2f uv = hit_i.uv();
Hit h(context->user,geomID,primID,uv,Ng);
float old_t = ray.tfar;
ray.tfar = hit_i.t();
bool found = runIntersectionFilter1SYCL(geometry,ray,context,h);
if (!found) {
ray.tfar = old_t;
return false;
}
}
#endif
ray.tfar = neg_inf;
return true;
}
};
}

View File

@@ -0,0 +1,173 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/scene.h"
#include "../common/ray.h"
#include "../common/point_query.h"
#include "../bvh/node_intersector1.h"
#include "../bvh/node_intersector_packet.h"
namespace embree
{
namespace isa
{
template<typename Intersector>
struct ArrayIntersector1
{
typedef typename Intersector::Primitive Primitive;
typedef typename Intersector::Precalculations Precalculations;
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
for (size_t i=0; i<num; i++) {
Intersector::intersect(pre,ray,context,prim[i]);
}
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
for (size_t i=0; i<num; i++) {
if (Intersector::occluded(pre,ray,context,prim[i]))
return true;
}
return false;
}
template<int N>
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
{
bool changed = false;
for (size_t i=0; i<num; i++)
changed |= Intersector::pointQuery(query, context, prim[i]);
return changed;
}
template<int K>
static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
}
template<int K>
static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
return valid;
}
};
template<int K, typename Intersector>
struct ArrayIntersectorK_1
{
typedef typename Intersector::Primitive Primitive;
typedef typename Intersector::Precalculations Precalculations;
template<bool robust>
static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
for (size_t i=0; i<num; i++) {
Intersector::intersect(valid,pre,ray,context,prim[i]);
}
}
template<bool robust>
static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
vbool<K> valid0 = valid;
for (size_t i=0; i<num; i++) {
valid0 &= !Intersector::occluded(valid0,pre,ray,context,prim[i]);
if (none(valid0)) break;
}
return !valid0;
}
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
for (size_t i=0; i<num; i++) {
Intersector::intersect(pre,ray,k,context,prim[i]);
}
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
for (size_t i=0; i<num; i++) {
if (Intersector::occluded(pre,ray,k,context,prim[i]))
return true;
}
return false;
}
};
// =============================================================================================
template<int K, typename IntersectorK>
struct ArrayIntersectorKStream
{
typedef typename IntersectorK::Primitive PrimitiveK;
typedef typename IntersectorK::Precalculations PrecalculationsK;
static __forceinline void intersectK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayHitK<K>& ray, RayQueryContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
{
PrecalculationsK pre(valid,ray); // FIXME: might cause trouble
for (size_t i=0; i<num; i++) {
IntersectorK::intersect(valid,pre,ray,context,prim[i]);
}
}
static __forceinline vbool<K> occludedK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayK<K>& ray, RayQueryContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
{
PrecalculationsK pre(valid,ray); // FIXME: might cause trouble
vbool<K> valid0 = valid;
for (size_t i=0; i<num; i++) {
valid0 &= !IntersectorK::occluded(valid0,pre,ray,context,prim[i]);
if (none(valid0)) break;
}
return !valid0;
}
static __forceinline void intersect(const Accel::Intersectors* This, RayHitK<K>& ray, size_t k, RayQueryContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
{
PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
for (size_t i=0; i<num; i++) {
IntersectorK::intersect(pre,ray,k,context,prim[i]);
}
}
static __forceinline bool occluded(const Accel::Intersectors* This, RayK<K>& ray, size_t k, RayQueryContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
{
PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
for (size_t i=0; i<num; i++) {
if (IntersectorK::occluded(pre,ray,k,context,prim[i]))
return true;
}
return false;
}
static __forceinline size_t occluded(const Accel::Intersectors* This, size_t cur_mask, RayK<K>** __restrict__ inputPackets, RayQueryContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
{
size_t m_occluded = 0;
for (size_t i=0; i<num; i++) {
size_t bits = cur_mask & (~m_occluded);
for (; bits!=0; )
{
const size_t rayID = bscf(bits);
RayHitK<K> &ray = *inputPackets[rayID / K];
const size_t k = rayID % K;
PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
if (IntersectorK::occluded(pre,ray,k,context,prim[i]))
{
m_occluded |= (size_t)1 << rayID;
ray.tfar[k] = neg_inf;
}
}
}
return m_occluded;
}
};
}
}

View File

@@ -0,0 +1,145 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
namespace isa
{
template<int M>
struct LineIntersectorHitM
{
__forceinline LineIntersectorHitM() {}
__forceinline LineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
: vu(u), vv(v), vt(t), vNg(Ng) {}
__forceinline void finalize() {}
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
__forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); }
__forceinline vfloat<M> t () const { return vt; }
__forceinline Vec3vf<M> Ng() const { return vNg; }
public:
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
};
template<int M>
struct FlatLinearCurveIntersector1
{
typedef CurvePrecalculations1 Precalculations;
template<typename Ray, typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
Ray& ray,
RayQueryContext* context,
const LineSegments* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
const Epilog& epilog)
{
/* transform end points into ray space */
vbool<M> valid = valid_i;
vfloat<M> depth_scale = pre.depth_scale;
LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space;
const Vec3vf<M> ray_org ((Vec3fa)ray.org);
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i);
Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w);
Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w);
/* approximative intersection with cone */
const Vec4vf<M> v = p1-p0;
const Vec4vf<M> w = -p0;
const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y);
const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y);
const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one));
const Vec4vf<M> p = madd(u,v,p0);
const vfloat<M> t = p.z;
const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y);
const vfloat<M> r = p.w;
const vfloat<M> r2 = r*r;
valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar));
if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections
if (unlikely(none(valid))) return false;
/* ignore denormalized segments */
const Vec3vf<M> T = v1.xyz()-v0.xyz();
valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero));
if (unlikely(none(valid))) return false;
/* update hit information */
LineIntersectorHitM<M> hit(u,zero,t,T);
return epilog(valid,hit);
}
};
template<int M, int K>
struct FlatLinearCurveIntersectorK
{
typedef CurvePrecalculationsK<K> Precalculations;
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
RayK<K>& ray, size_t k,
RayQueryContext* context,
const LineSegments* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
const Epilog& epilog)
{
/* transform end points into ray space */
vbool<M> valid = valid_i;
vfloat<M> depth_scale = pre.depth_scale[k];
LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space[k];
const Vec3vf<M> ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
const Vec3vf<M> ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i);
Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w);
Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w);
/* approximative intersection with cone */
const Vec4vf<M> v = p1-p0;
const Vec4vf<M> w = -p0;
const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y);
const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y);
const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one));
const Vec4vf<M> p = madd(u,v,p0);
const vfloat<M> t = p.z;
const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y);
const vfloat<M> r = p.w;
const vfloat<M> r2 = r*r;
valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k]));
if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections
if (unlikely(none(valid))) return false;
/* ignore denormalized segments */
const Vec3vf<M> T = v1.xyz()-v0.xyz();
valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero));
if (unlikely(none(valid))) return false;
/* update hit information */
LineIntersectorHitM<M> hit(u,zero,t,T);
return epilog(valid,hit);
}
};
}
}

View File

@@ -0,0 +1,711 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
namespace embree
{
template<int M>
struct LineMi
{
/* Virtual interface to query information about the line segment type */
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored line segments */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N line segments */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
/* Returns required number of bytes for N line segments */
static __forceinline size_t bytes(size_t N) { return blocks(N)*sizeof(LineMi); }
public:
/* Default constructor */
__forceinline LineMi() { }
/* Construction from vertices and IDs */
__forceinline LineMi(const vuint<M>& v0, unsigned short leftExists, unsigned short rightExists, const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype)
: gtype((unsigned char)gtype), m((unsigned char)popcnt(vuint<M>(primIDs) != vuint<M>(-1))), sharedGeomID(geomIDs[0]), leftExists (leftExists), rightExists(rightExists), v0(v0), primIDs(primIDs)
{
assert(all(vuint<M>(geomID()) == geomIDs));
}
/* Returns a mask that tells which line segments are valid */
__forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
/* Returns if the specified line segment is valid */
__forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
/* Returns the number of stored line segments */
__forceinline size_t size() const { return bsf(~movemask(valid())); }
/* Returns the geometry IDs */
//template<class T>
//static __forceinline T unmask(T &index) { return index & 0x3fffffff; }
__forceinline unsigned int geomID(unsigned int i = 0) const { return sharedGeomID; }
//__forceinline vuint<M> geomID() { return unmask(geomIDs); }
//__forceinline const vuint<M> geomID() const { return unmask(geomIDs); }
//__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return unmask(geomIDs[i]); }
/* Returns the primitive IDs */
__forceinline vuint<M>& primID() { return primIDs; }
__forceinline const vuint<M>& primID() const { return primIDs; }
__forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
/* gather the line segments */
__forceinline void gather(Vec4vf<M>& p0,
Vec4vf<M>& p1,
const LineSegments* geom) const;
__forceinline void gatheri(Vec4vf<M>& p0,
Vec4vf<M>& p1,
const LineSegments* geom,
const int itime) const;
__forceinline void gather(Vec4vf<M>& p0,
Vec4vf<M>& p1,
const LineSegments* geom,
float time) const;
/* gather the line segments with lateral info */
__forceinline void gather(Vec4vf<M>& p0,
Vec4vf<M>& p1,
Vec4vf<M>& pL,
Vec4vf<M>& pR,
const LineSegments* geom) const;
__forceinline void gatheri(Vec4vf<M>& p0,
Vec4vf<M>& p1,
Vec4vf<M>& pL,
Vec4vf<M>& pR,
const LineSegments* geom,
const int itime) const;
__forceinline void gather(Vec4vf<M>& p0,
Vec4vf<M>& p1,
Vec4vf<M>& pL,
Vec4vf<M>& pR,
const LineSegments* geom,
float time) const;
__forceinline void gather(Vec4vf<M>& p0,
Vec4vf<M>& p1,
vbool<M>& cL,
vbool<M>& cR,
const LineSegments* geom) const;
__forceinline void gatheri(Vec4vf<M>& p0,
Vec4vf<M>& p1,
vbool<M>& cL,
vbool<M>& cR,
const LineSegments* geom,
const int itime) const;
__forceinline void gather(Vec4vf<M>& p0,
Vec4vf<M>& p1,
vbool<M>& cL,
vbool<M>& cR,
const LineSegments* geom,
float time) const;
/* Calculate the bounds of the line segments */
__forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const
{
BBox3fa bounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const LineSegments* geom = scene->get<LineSegments>(geomID(i));
const Vec3ff& p0 = geom->vertex(v0[i]+0,itime);
const Vec3ff& p1 = geom->vertex(v0[i]+1,itime);
BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
bounds.extend(b);
}
return bounds;
}
/* Calculate the linear bounds of the primitive */
__forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) {
return LBBox3fa(bounds(scene,itime+0), bounds(scene,itime+1));
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) {
LBBox3fa allBounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const LineSegments* geom = scene->get<LineSegments>(geomID(i));
allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps));
}
return allBounds;
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
{
LBBox3fa allBounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const LineSegments* geom = scene->get<LineSegments>(geomID((unsigned int)i));
allBounds.extend(geom->linearBounds(primID(i), time_range));
}
return allBounds;
}
/* Fill line segment from line segment list */
template<typename PrimRefT>
__forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
{
Geometry::GType gty = scene->get(prims[begin].geomID())->getType();
vuint<M> geomID, primID;
vuint<M> v0;
unsigned short leftExists = 0;
unsigned short rightExists = 0;
const PrimRefT* prim = &prims[begin];
for (size_t i=0; i<M; i++)
{
const LineSegments* geom = scene->get<LineSegments>(prim->geomID());
if (begin<end) {
geomID[i] = prim->geomID();
primID[i] = prim->primID();
v0[i] = geom->segment(prim->primID());
leftExists |= geom->segmentLeftExists(primID[i]) << i;
rightExists |= geom->segmentRightExists(primID[i]) << i;
begin++;
} else {
assert(i);
if (i>0) {
geomID[i] = geomID[i-1];
primID[i] = -1;
v0[i] = v0[i-1];
}
}
if (begin<end) prim = &prims[begin]; // FIXME: remove this line
}
new (this) LineMi(v0,leftExists,rightExists,geomID,primID,gty); // FIXME: use non temporal store
}
template<typename BVH, typename Allocator>
__forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
{
size_t start = set.begin();
size_t items = LineMi::blocks(set.size());
size_t numbytes = LineMi::bytes(set.size());
LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return bvh->encodeLeaf((char*)accel,items);
};
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
{
fill(prims,begin,end,scene);
return linearBounds(scene,itime);
}
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
{
fill(prims,begin,end,scene);
return linearBounds(scene,time_range);
}
template<typename BVH, typename SetMB, typename Allocator>
__forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
{
size_t start = prims.begin();
size_t end = prims.end();
size_t items = LineMi::blocks(prims.size());
size_t numbytes = LineMi::bytes(prims.size());
LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items);
LBBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range));
return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range);
};
/* Updates the primitive */
__forceinline BBox3fa update(LineSegments* geom)
{
BBox3fa bounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const Vec3ff& p0 = geom->vertex(v0[i]+0);
const Vec3ff& p1 = geom->vertex(v0[i]+1);
BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
bounds.extend(b);
}
return bounds;
}
/*! output operator */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const LineMi& line) {
return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}";
}
public:
unsigned char gtype;
unsigned char m;
unsigned int sharedGeomID;
unsigned short leftExists, rightExists;
vuint<M> v0; // index of start vertex
private:
vuint<M> primIDs; // primitive ID
};
template<>
__forceinline void LineMi<4>::gather(Vec4vf4& p0,
Vec4vf4& p1,
const LineSegments* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
}
template<>
__forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
Vec4vf4& p1,
const LineSegments* geom,
const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
}
template<>
__forceinline void LineMi<4>::gather(Vec4vf4& p0,
Vec4vf4& p1,
const LineSegments* geom,
float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf4 a0,a1;
gatheri(a0,a1,geom,itime);
Vec4vf4 b0,b1;
gatheri(b0,b1,geom,itime+1);
p0 = lerp(a0,b0,vfloat4(ftime));
p1 = lerp(a1,b1,vfloat4(ftime));
}
template<>
__forceinline void LineMi<4>::gather(Vec4vf4& p0,
Vec4vf4& p1,
vbool4& cL,
vbool4& cR,
const LineSegments* geom) const
{
gather(p0,p1,geom);
cL = !vbool4(leftExists);
cR = !vbool4(rightExists);
}
template<>
__forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
Vec4vf4& p1,
vbool4& cL,
vbool4& cR,
const LineSegments* geom,
const int itime) const
{
gatheri(p0,p1,geom,itime);
cL = !vbool4(leftExists);
cR = !vbool4(rightExists);
}
template<>
__forceinline void LineMi<4>::gather(Vec4vf4& p0,
Vec4vf4& p1,
vbool4& cL,
vbool4& cR,
const LineSegments* geom,
float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf4 a0,a1;
gatheri(a0,a1,geom,itime);
Vec4vf4 b0,b1;
gatheri(b0,b1,geom,itime+1);
p0 = lerp(a0,b0,vfloat4(ftime));
p1 = lerp(a1,b1,vfloat4(ftime));
cL = !vbool4(leftExists);
cR = !vbool4(rightExists);
}
template<>
__forceinline void LineMi<4>::gather(Vec4vf4& p0,
Vec4vf4& p1,
Vec4vf4& pL,
Vec4vf4& pR,
const LineSegments* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
}
template<>
__forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
Vec4vf4& p1,
Vec4vf4& pL,
Vec4vf4& pR,
const LineSegments* geom,
const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
}
template<>
__forceinline void LineMi<4>::gather(Vec4vf4& p0,
Vec4vf4& p1,
Vec4vf4& pL,
Vec4vf4& pR,
const LineSegments* geom,
float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf4 a0,a1,aL,aR;
gatheri(a0,a1,aL,aR,geom,itime);
Vec4vf4 b0,b1,bL,bR;
gatheri(b0,b1,bL,bR,geom,itime+1);
p0 = lerp(a0,b0,vfloat4(ftime));
p1 = lerp(a1,b1,vfloat4(ftime));
pL = lerp(aL,bL,vfloat4(ftime));
pR = lerp(aR,bR,vfloat4(ftime));
pL = select(vboolf4(leftExists), pL, Vec4vf4(inf));
pR = select(vboolf4(rightExists), pR, Vec4vf4(inf));
}
#if defined(__AVX__)
template<>
__forceinline void LineMi<8>::gather(Vec4vf8& p0,
Vec4vf8& p1,
const LineSegments* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
}
template<>
__forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
Vec4vf8& p1,
const LineSegments* geom,
const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
}
template<>
__forceinline void LineMi<8>::gather(Vec4vf8& p0,
Vec4vf8& p1,
const LineSegments* geom,
float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf8 a0,a1;
gatheri(a0,a1,geom,itime);
Vec4vf8 b0,b1;
gatheri(b0,b1,geom,itime+1);
p0 = lerp(a0,b0,vfloat8(ftime));
p1 = lerp(a1,b1,vfloat8(ftime));
}
template<>
__forceinline void LineMi<8>::gather(Vec4vf8& p0,
Vec4vf8& p1,
Vec4vf8& pL,
Vec4vf8& pR,
const LineSegments* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf);
const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf);
const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf);
const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf);
transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf);
const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf);
const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf);
const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf);
transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
}
template<>
__forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
Vec4vf8& p1,
Vec4vf8& pL,
Vec4vf8& pR,
const LineSegments* geom,
const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf);
const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf);
const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf);
const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf);
transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf);
const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf);
const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf);
const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf);
transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
}
template<>
__forceinline void LineMi<8>::gather(Vec4vf8& p0,
Vec4vf8& p1,
Vec4vf8& pL,
Vec4vf8& pR,
const LineSegments* geom,
float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf8 a0,a1,aL,aR;
gatheri(a0,a1,aL,aR,geom,itime);
Vec4vf8 b0,b1,bL,bR;
gatheri(b0,b1,bL,bR,geom,itime+1);
p0 = lerp(a0,b0,vfloat8(ftime));
p1 = lerp(a1,b1,vfloat8(ftime));
pL = lerp(aL,bL,vfloat8(ftime));
pR = lerp(aR,bR,vfloat8(ftime));
pL = select(vboolf4(leftExists), pL, Vec4vf8(inf));
pR = select(vboolf4(rightExists), pR, Vec4vf8(inf));
}
template<>
__forceinline void LineMi<8>::gather(Vec4vf8& p0,
Vec4vf8& p1,
vbool8& cL,
vbool8& cR,
const LineSegments* geom) const
{
gather(p0,p1,geom);
cL = !vbool8(leftExists);
cR = !vbool8(rightExists);
}
template<>
__forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
Vec4vf8& p1,
vbool8& cL,
vbool8& cR,
const LineSegments* geom,
const int itime) const
{
gatheri(p0,p1,geom,itime);
cL = !vbool8(leftExists);
cR = !vbool8(rightExists);
}
template<>
__forceinline void LineMi<8>::gather(Vec4vf8& p0,
Vec4vf8& p1,
vbool8& cL,
vbool8& cR,
const LineSegments* geom,
float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf8 a0,a1;
gatheri(a0,a1,geom,itime);
Vec4vf8 b0,b1;
gatheri(b0,b1,geom,itime+1);
p0 = lerp(a0,b0,vfloat8(ftime));
p1 = lerp(a1,b1,vfloat8(ftime));
cL = !vbool8(leftExists);
cR = !vbool8(rightExists);
}
#endif
template<int M>
typename LineMi<M>::Type LineMi<M>::type;
typedef LineMi<4> Line4i;
typedef LineMi<8> Line8i;
}

View File

@@ -0,0 +1,124 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "linei.h"
#include "line_intersector.h"
#include "intersector_epilog.h"
namespace embree
{
namespace isa
{
template<int M, bool filter>
struct FlatLinearCurveMiIntersector1
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
const vbool<M> valid = line.valid();
FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
const vbool<M> valid = line.valid();
return FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
}
};
template<int M, bool filter>
struct FlatLinearCurveMiMBIntersector1
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time());
const vbool<M> valid = line.valid();
FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time());
const vbool<M> valid = line.valid();
return FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
}
};
template<int M, int K, bool filter>
struct FlatLinearCurveMiIntersectorK
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
const vbool<M> valid = line.valid();
FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
const vbool<M> valid = line.valid();
return FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
};
template<int M, int K, bool filter>
struct FlatLinearCurveMiMBIntersectorK
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]);
const vbool<M> valid = line.valid();
FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]);
const vbool<M> valid = line.valid();
return FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
};
}
}

View File

@@ -0,0 +1,95 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
namespace embree
{
struct Object
{
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored primitives */
static __forceinline size_t max_size() { return 1; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return N; }
public:
/*! constructs a virtual object */
Object (unsigned geomID, unsigned primID)
: _geomID(geomID), _primID(primID) {}
__forceinline unsigned geomID() const {
return _geomID;
}
__forceinline unsigned primID() const {
return _primID;
}
/*! fill triangle from triangle list */
__forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene)
{
const PrimRef& prim = prims[i]; i++;
new (this) Object(prim.geomID(), prim.primID());
}
/*! fill triangle from triangle list */
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime)
{
const PrimRef& prim = prims[i]; i++;
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
new (this) Object(geomID, primID);
AccelSet* accel = (AccelSet*) scene->get(geomID);
return accel->linearBounds(primID,itime);
}
/*! fill triangle from triangle list */
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t i, Scene* scene, const BBox1f time_range)
{
const PrimRefMB& prim = prims[i]; i++;
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
new (this) Object(geomID, primID);
AccelSet* accel = (AccelSet*) scene->get(geomID);
return accel->linearBounds(primID,time_range);
}
/*! fill triangle from triangle list */
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range)
{
const PrimRefMB& prim = prims[i]; i++;
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
new (this) Object(geomID, primID);
AccelSet* accel = (AccelSet*) scene->get(geomID);
return accel->linearBounds(primID,time_range);
}
/* Updates the primitive */
__forceinline BBox3fa update(AccelSet* mesh) {
return mesh->bounds(primID());
}
private:
unsigned int _geomID; //!< geometry ID
unsigned int _primID; //!< primitive ID
};
}

View File

@@ -0,0 +1,161 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "object.h"
#include "../common/ray.h"
namespace embree
{
namespace isa
{
template<bool mblur>
struct ObjectIntersector1
{
typedef Object Primitive;
static const bool validIntersectorK = false;
struct Precalculations {
__forceinline Precalculations() {}
__forceinline Precalculations (const Ray& ray, const void *ptr) {}
};
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & accel->mask) == 0)
return;
#endif
accel->intersect(ray,prim.geomID(),prim.primID(),context);
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & accel->mask) == 0)
return false;
#endif
accel->occluded(ray,prim.geomID(),prim.primID(),context);
return ray.tfar < 0.0f;
}
static __forceinline bool intersect(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim) {
return occluded(pre,ray,context,prim);
}
static __forceinline void intersect(unsigned int k, const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& prim)
{
AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & accel->mask) == 0)
return;
#endif
accel->intersect(k,ray,prim.geomID(),prim.primID(),context);
}
static __forceinline bool occluded(unsigned int k, const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim)
{
AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
if ((ray.mask & accel->mask) == 0)
return false;
#endif
accel->occluded(k, ray,prim.geomID(),prim.primID(),context);
return ray.tfar < 0.0f;
}
static __forceinline bool intersect(unsigned int k, const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& prim) {
return occluded(k,pre,ray,context,prim);
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim)
{
AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID());
context->geomID = prim.geomID();
context->primID = prim.primID();
return accel->pointQuery(query, context);
}
template<int K>
static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
assert(false);
}
template<int K>
static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
{
assert(false);
return valid;
}
};
template<int K, bool mblur>
struct ObjectIntersectorK
{
typedef Object Primitive;
struct Precalculations {
__forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
};
static __forceinline void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& prim)
{
vbool<K> valid = valid_i;
AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
valid &= (ray.mask & accel->mask) != 0;
if (none(valid)) return;
#endif
accel->intersect(valid,ray,prim.geomID(),prim.primID(),context);
}
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& prim)
{
vbool<K> valid = valid_i;
AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
valid &= (ray.mask & accel->mask) != 0;
if (none(valid)) return false;
#endif
accel->occluded(valid,ray,prim.geomID(),prim.primID(),context);
return ray.tfar < 0.0f;
}
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& prim) {
occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
return ray.tfar[k] < 0.0f;
}
};
typedef ObjectIntersectorK<4,false> ObjectIntersector4;
typedef ObjectIntersectorK<8,false> ObjectIntersector8;
typedef ObjectIntersectorK<16,false> ObjectIntersector16;
typedef ObjectIntersectorK<4,true> ObjectIntersector4MB;
typedef ObjectIntersectorK<8,true> ObjectIntersector8MB;
typedef ObjectIntersectorK<16,true> ObjectIntersector16MB;
}
}

View File

@@ -0,0 +1,57 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
namespace embree
{
namespace isa
{
struct HalfPlane
{
const Vec3fa P; //!< plane origin
const Vec3fa N; //!< plane normal
__forceinline HalfPlane(const Vec3fa& P, const Vec3fa& N)
: P(P), N(N) {}
__forceinline BBox1f intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const
{
Vec3fa O = Vec3fa(ray_org) - P;
Vec3fa D = Vec3fa(ray_dir);
float ON = dot(O,N);
float DN = dot(D,N);
bool eps = abs(DN) < min_rcp_input;
float t = -ON*rcp(DN);
float lower = select(eps || DN < 0.0f, float(neg_inf), t);
float upper = select(eps || DN > 0.0f, float(pos_inf), t);
return BBox1f(lower,upper);
}
};
template<int M>
struct HalfPlaneN
{
const Vec3vf<M> P; //!< plane origin
const Vec3vf<M> N; //!< plane normal
__forceinline HalfPlaneN(const Vec3vf<M>& P, const Vec3vf<M>& N)
: P(P), N(N) {}
__forceinline BBox<vfloat<M>> intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const
{
Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray_org) - P;
Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray_dir);
vfloat<M> ON = dot(O,N);
vfloat<M> DN = dot(D,N);
vbool<M> eps = abs(DN) < min_rcp_input;
vfloat<M> t = -ON*rcp(DN);
vfloat<M> lower = select(eps | DN < 0.0f, vfloat<M>(neg_inf), t);
vfloat<M> upper = select(eps | DN > 0.0f, vfloat<M>(pos_inf), t);
return BBox<vfloat<M>>(lower,upper);
}
};
}
}

View File

@@ -0,0 +1,412 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
namespace embree
{
template<int M>
struct PointMi
{
/* Virtual interface to query information about the line segment type */
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored line segments */
static __forceinline size_t max_size()
{
return M;
}
/* Returns required number of primitive blocks for N line segments */
static __forceinline size_t blocks(size_t N)
{
return (N + max_size() - 1) / max_size();
}
/* Returns required number of bytes for N line segments */
static __forceinline size_t bytes(size_t N)
{
return blocks(N) * sizeof(PointMi);
}
public:
/* Default constructor */
__forceinline PointMi() {}
/* Construction from vertices and IDs */
__forceinline PointMi(const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype, uint32_t numPrimitives)
: gtype((unsigned char)gtype),
numPrimitives(numPrimitives),
sharedGeomID(geomIDs[0]),
primIDs(primIDs)
{
assert(all(vuint<M>(geomID()) == geomIDs));
}
/* Returns a mask that tells which line segments are valid */
__forceinline vbool<M> valid() const {
return vint<M>(step) < vint<M>(numPrimitives);
}
/* Returns if the specified line segment is valid */
__forceinline bool valid(const size_t i) const
{
assert(i < M);
return i < numPrimitives;
}
/* Returns the number of stored line segments */
__forceinline size_t size() const {
return numPrimitives;
}
__forceinline unsigned int geomID(unsigned int i = 0) const {
return sharedGeomID;
}
__forceinline vuint<M>& primID() {
return primIDs;
}
__forceinline const vuint<M>& primID() const {
return primIDs;
}
__forceinline unsigned int primID(const size_t i) const {
assert(i < M);
return primIDs[i];
}
/* gather the line segments */
__forceinline void gather(Vec4vf<M>& p0, const Points* geom) const;
__forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom) const;
__forceinline void gatheri(Vec4vf<M>& p0, const Points* geom, const int itime) const;
__forceinline void gatheri(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, const int itime) const;
__forceinline void gather(Vec4vf<M>& p0, const Points* geom, float time) const;
__forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, float time) const;
/* Calculate the bounds of the line segments */
__forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const
{
BBox3fa bounds = empty;
for (size_t i = 0; i < M && valid(i); i++) {
const Points* geom = scene->get<Points>(geomID(i));
bounds.extend(geom->bounds(primID(i),itime));
}
return bounds;
}
/* Calculate the linear bounds of the primitive */
__forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) {
return LBBox3fa(bounds(scene, itime + 0), bounds(scene, itime + 1));
}
__forceinline LBBox3fa linearBounds(const Scene* const scene, size_t itime, size_t numTimeSteps)
{
LBBox3fa allBounds = empty;
for (size_t i = 0; i < M && valid(i); i++) {
const Points* geom = scene->get<Points>(geomID(i));
allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps));
}
return allBounds;
}
__forceinline LBBox3fa linearBounds(const Scene* const scene, const BBox1f time_range)
{
LBBox3fa allBounds = empty;
for (size_t i = 0; i < M && valid(i); i++) {
const Points* geom = scene->get<Points>(geomID((unsigned int)i));
allBounds.extend(geom->linearBounds(primID(i), time_range));
}
return allBounds;
}
/* Fill line segment from line segment list */
template<typename PrimRefT>
__forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
{
Geometry::GType gty = scene->get(prims[begin].geomID())->getType();
vuint<M> geomID, primID;
vuint<M> v0;
const PrimRefT* prim = &prims[begin];
int numPrimitives = 0;
for (size_t i = 0; i < M; i++) {
if (begin < end) {
geomID[i] = prim->geomID();
primID[i] = prim->primID();
begin++;
numPrimitives++;
} else {
assert(i);
if (i > 0) {
geomID[i] = geomID[i - 1];
primID[i] = primID[i - 1];
}
}
if (begin < end)
prim = &prims[begin]; // FIXME: remove this line
}
new (this) PointMi(geomID, primID, gty, numPrimitives); // FIXME: use non temporal store
}
template<typename BVH, typename Allocator>
__forceinline static typename BVH::NodeRef createLeaf(BVH* bvh,
const PrimRef* prims,
const range<size_t>& set,
const Allocator& alloc)
{
size_t start = set.begin();
size_t items = PointMi::blocks(set.size());
size_t numbytes = PointMi::bytes(set.size());
PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float));
for (size_t i = 0; i < items; i++) {
accel[i].fill(prims, start, set.end(), bvh->scene);
}
return bvh->encodeLeaf((char*)accel, items);
};
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
{
fill(prims, begin, end, scene);
return linearBounds(scene, itime);
}
__forceinline LBBox3fa fillMB(
const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
{
fill(prims, begin, end, scene);
return linearBounds(scene, time_range);
}
template<typename BVH, typename SetMB, typename Allocator>
__forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
{
size_t start = prims.object_range.begin();
size_t end = prims.object_range.end();
size_t items = PointMi::blocks(prims.object_range.size());
size_t numbytes = PointMi::bytes(prims.object_range.size());
PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float));
const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel, items);
LBBox3fa bounds = empty;
for (size_t i = 0; i < items; i++)
bounds.extend(accel[i].fillMB(prims.prims->data(), start, end, bvh->scene, prims.time_range));
return typename BVH::NodeRecordMB4D(node, bounds, prims.time_range);
};
/*! output operator */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PointMi& point)
{
return cout << "Point" << M << "i {" << point.geomID() << ", " << point.primID() << "}";
}
public:
unsigned char gtype;
unsigned char numPrimitives;
unsigned int sharedGeomID;
private:
vuint<M> primIDs; // primitive ID
};
template<>
__forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
}
template<>
__forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
const vfloat4 b0 = vfloat4(geom->normal(primID(0)));
const vfloat4 b1 = vfloat4(geom->normal(primID(1)));
const vfloat4 b2 = vfloat4(geom->normal(primID(2)));
const vfloat4 b3 = vfloat4(geom->normal(primID(3)));
transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z);
}
template<>
__forceinline void PointMi<4>::gatheri(Vec4vf4& p0, const Points* geom, const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
}
template<>
__forceinline void PointMi<4>::gatheri(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
const vfloat4 b0 = vfloat4(geom->normal((size_t)primID(0), (size_t)itime));
const vfloat4 b1 = vfloat4(geom->normal((size_t)primID(1), (size_t)itime));
const vfloat4 b2 = vfloat4(geom->normal((size_t)primID(2), (size_t)itime));
const vfloat4 b3 = vfloat4(geom->normal((size_t)primID(3), (size_t)itime));
transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z);
}
template<>
__forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom, float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf4 a0; gatheri(a0, geom, itime);
Vec4vf4 b0; gatheri(b0, geom, itime + 1);
p0 = lerp(a0, b0, vfloat4(ftime));
}
template<>
__forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf4 a0, b0;
Vec3vf4 norm0, norm1;
gatheri(a0, norm0, geom, itime);
gatheri(b0, norm1, geom, itime + 1);
p0 = lerp(a0, b0, vfloat4(ftime));
n0 = lerp(norm0, norm1, vfloat4(ftime));
}
#if defined(__AVX__)
template<>
__forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4)));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5)));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6)));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7)));
transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
}
template<>
__forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4)));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5)));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6)));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7)));
transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
const vfloat4 b0 = vfloat4(geom->normal(primID(0)));
const vfloat4 b1 = vfloat4(geom->normal(primID(1)));
const vfloat4 b2 = vfloat4(geom->normal(primID(2)));
const vfloat4 b3 = vfloat4(geom->normal(primID(3)));
const vfloat4 b4 = vfloat4(geom->normal(primID(4)));
const vfloat4 b5 = vfloat4(geom->normal(primID(5)));
const vfloat4 b6 = vfloat4(geom->normal(primID(6)));
const vfloat4 b7 = vfloat4(geom->normal(primID(7)));
transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z);
}
template<>
__forceinline void PointMi<8>::gatheri(Vec4vf8& p0, const Points* geom, const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime));
transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
}
template<>
__forceinline void PointMi<8>::gatheri(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, const int itime) const
{
const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime));
const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime));
const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime));
const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime));
transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
const vfloat4 b0 = vfloat4(geom->normal((size_t)primID(0), (size_t)itime));
const vfloat4 b1 = vfloat4(geom->normal((size_t)primID(1), (size_t)itime));
const vfloat4 b2 = vfloat4(geom->normal((size_t)primID(2), (size_t)itime));
const vfloat4 b3 = vfloat4(geom->normal((size_t)primID(3), (size_t)itime));
const vfloat4 b4 = vfloat4(geom->normal((size_t)primID(4), (size_t)itime));
const vfloat4 b5 = vfloat4(geom->normal((size_t)primID(5), (size_t)itime));
const vfloat4 b6 = vfloat4(geom->normal((size_t)primID(6), (size_t)itime));
const vfloat4 b7 = vfloat4(geom->normal((size_t)primID(7), (size_t)itime));
transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z);
}
template<>
__forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom, float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf8 a0;
gatheri(a0, geom, itime);
Vec4vf8 b0;
gatheri(b0, geom, itime + 1);
p0 = lerp(a0, b0, vfloat8(ftime));
}
template<>
__forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, float time) const
{
float ftime;
const int itime = geom->timeSegment(time, ftime);
Vec4vf8 a0, b0;
Vec3vf8 norm0, norm1;
gatheri(a0, norm0, geom, itime);
gatheri(b0, norm1, geom, itime + 1);
p0 = lerp(a0, b0, vfloat8(ftime));
n0 = lerp(norm0, norm1, vfloat8(ftime));
}
#endif
template<int M>
typename PointMi<M>::Type PointMi<M>::type;
typedef PointMi<4> Point4i;
typedef PointMi<8> Point8i;
} // namespace embree

View File

@@ -0,0 +1,49 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/default.h"
#include "../common/scene.h"
#include "../../common/simd/simd.h"
#include "../builders/primref.h"
#include "../builders/primref_mb.h"
namespace embree
{
struct PrimitiveType
{
/*! returns name of this primitive type */
virtual const char* name() const = 0;
/*! Returns the number of stored active primitives in a block. */
virtual size_t sizeActive(const char* This) const = 0;
/*! Returns the number of stored active and inactive primitives in a block. */
virtual size_t sizeTotal(const char* This) const = 0;
/*! Returns the number of bytes of block. */
virtual size_t getBytes(const char* This) const = 0;
};
template<typename Primitive>
struct PrimitivePointQuery1
{
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim)
{
bool changed = false;
for (size_t i = 0; i < Primitive::max_size(); i++)
{
if (!prim.valid(i)) break;
STAT3(point_query.trav_prims,1,1,1);
AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID(i));
context->geomID = prim.geomID(i);
context->primID = prim.primID(i);
changed |= accel->pointQuery(query, context);
}
return changed;
}
static __forceinline void pointQueryNoop(PointQuery* query, PointQueryContext* context, const Primitive& prim) { }
};
}

View File

@@ -0,0 +1,400 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "primitive.h"
#include "curveNv.h"
#include "curveNi.h"
#include "curveNi_mb.h"
#include "linei.h"
#include "triangle.h"
#include "trianglev.h"
#include "trianglev_mb.h"
#include "trianglei.h"
#include "quadv.h"
#include "quadi.h"
#include "subdivpatch1.h"
#include "object.h"
#include "instance.h"
#include "instance_array.h"
#include "subgrid.h"
namespace embree
{
/********************** Curve4v **************************/
template<>
const char* Curve4v::Type::name () const {
return "curve4v";
}
template<>
size_t Curve4v::Type::sizeActive(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return ((Line4i*)This)->size();
else
return ((Curve4v*)This)->N;
}
template<>
size_t Curve4v::Type::sizeTotal(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return 4;
else
return ((Curve4v*)This)->N;
}
template<>
size_t Curve4v::Type::getBytes(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return Line4i::bytes(sizeActive(This));
else
return Curve4v::bytes(sizeActive(This));
}
/********************** Curve4i **************************/
template<>
const char* Curve4i::Type::name () const {
return "curve4i";
}
template<>
size_t Curve4i::Type::sizeActive(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return ((Line4i*)This)->size();
else
return ((Curve4i*)This)->N;
}
template<>
size_t Curve4i::Type::sizeTotal(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return 4;
else
return ((Curve4i*)This)->N;
}
template<>
size_t Curve4i::Type::getBytes(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return Line4i::bytes(sizeActive(This));
else
return Curve4i::bytes(sizeActive(This));
}
/********************** Curve4iMB **************************/
template<>
const char* Curve4iMB::Type::name () const {
return "curve4imb";
}
template<>
size_t Curve4iMB::Type::sizeActive(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return ((Line4i*)This)->size();
else
return ((Curve4iMB*)This)->N;
}
template<>
size_t Curve4iMB::Type::sizeTotal(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return 4;
else
return ((Curve4iMB*)This)->N;
}
template<>
size_t Curve4iMB::Type::getBytes(const char* This) const
{
if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
return Line4i::bytes(sizeActive(This));
else
return Curve4iMB::bytes(sizeActive(This));
}
/********************** Line4i **************************/
template<>
const char* Line4i::Type::name () const {
return "line4i";
}
template<>
size_t Line4i::Type::sizeActive(const char* This) const {
return ((Line4i*)This)->size();
}
template<>
size_t Line4i::Type::sizeTotal(const char* This) const {
return 4;
}
template<>
size_t Line4i::Type::getBytes(const char* This) const {
return sizeof(Line4i);
}
/********************** Triangle4 **************************/
template<>
const char* Triangle4::Type::name () const {
return "triangle4";
}
template<>
size_t Triangle4::Type::sizeActive(const char* This) const {
return ((Triangle4*)This)->size();
}
template<>
size_t Triangle4::Type::sizeTotal(const char* This) const {
return 4;
}
template<>
size_t Triangle4::Type::getBytes(const char* This) const {
return sizeof(Triangle4);
}
/********************** Triangle4v **************************/
template<>
const char* Triangle4v::Type::name () const {
return "triangle4v";
}
template<>
size_t Triangle4v::Type::sizeActive(const char* This) const {
return ((Triangle4v*)This)->size();
}
template<>
size_t Triangle4v::Type::sizeTotal(const char* This) const {
return 4;
}
template<>
size_t Triangle4v::Type::getBytes(const char* This) const {
return sizeof(Triangle4v);
}
/********************** Triangle4i **************************/
template<>
const char* Triangle4i::Type::name () const {
return "triangle4i";
}
template<>
size_t Triangle4i::Type::sizeActive(const char* This) const {
return ((Triangle4i*)This)->size();
}
template<>
size_t Triangle4i::Type::sizeTotal(const char* This) const {
return 4;
}
template<>
size_t Triangle4i::Type::getBytes(const char* This) const {
return sizeof(Triangle4i);
}
/********************** Triangle4vMB **************************/
template<>
const char* Triangle4vMB::Type::name () const {
return "triangle4vmb";
}
template<>
size_t Triangle4vMB::Type::sizeActive(const char* This) const {
return ((Triangle4vMB*)This)->size();
}
template<>
size_t Triangle4vMB::Type::sizeTotal(const char* This) const {
return 4;
}
template<>
size_t Triangle4vMB::Type::getBytes(const char* This) const {
return sizeof(Triangle4vMB);
}
/********************** Quad4v **************************/
template<>
const char* Quad4v::Type::name () const {
return "quad4v";
}
template<>
size_t Quad4v::Type::sizeActive(const char* This) const {
return ((Quad4v*)This)->size();
}
template<>
size_t Quad4v::Type::sizeTotal(const char* This) const {
return 4;
}
template<>
size_t Quad4v::Type::getBytes(const char* This) const {
return sizeof(Quad4v);
}
/********************** Quad4i **************************/
template<>
const char* Quad4i::Type::name () const {
return "quad4i";
}
template<>
size_t Quad4i::Type::sizeActive(const char* This) const {
return ((Quad4i*)This)->size();
}
template<>
size_t Quad4i::Type::sizeTotal(const char* This) const {
return 4;
}
template<>
size_t Quad4i::Type::getBytes(const char* This) const {
return sizeof(Quad4i);
}
/********************** SubdivPatch1 **************************/
const char* SubdivPatch1::Type::name () const {
return "subdivpatch1";
}
size_t SubdivPatch1::Type::sizeActive(const char* This) const {
return 1;
}
size_t SubdivPatch1::Type::sizeTotal(const char* This) const {
return 1;
}
size_t SubdivPatch1::Type::getBytes(const char* This) const {
return sizeof(SubdivPatch1);
}
SubdivPatch1::Type SubdivPatch1::type;
/********************** Virtual Object **************************/
const char* Object::Type::name () const {
return "object";
}
size_t Object::Type::sizeActive(const char* This) const {
return 1;
}
size_t Object::Type::sizeTotal(const char* This) const {
return 1;
}
size_t Object::Type::getBytes(const char* This) const {
return sizeof(Object);
}
Object::Type Object::type;
/********************** Instance **************************/
const char* InstancePrimitive::Type::name () const {
return "instance";
}
size_t InstancePrimitive::Type::sizeActive(const char* This) const {
return 1;
}
size_t InstancePrimitive::Type::sizeTotal(const char* This) const {
return 1;
}
size_t InstancePrimitive::Type::getBytes(const char* This) const {
return sizeof(InstancePrimitive);
}
InstancePrimitive::Type InstancePrimitive::type;
/********************** InstanceArray4 **************************/
const char* InstanceArrayPrimitive::Type::name () const {
return "instance_array";
}
size_t InstanceArrayPrimitive::Type::sizeActive(const char* This) const {
return 1;
}
size_t InstanceArrayPrimitive::Type::sizeTotal(const char* This) const {
return 1;
}
size_t InstanceArrayPrimitive::Type::getBytes(const char* This) const {
return sizeof(InstanceArrayPrimitive);
}
InstanceArrayPrimitive::Type InstanceArrayPrimitive::type;
/********************** SubGrid **************************/
const char* SubGrid::Type::name () const {
return "subgrid";
}
size_t SubGrid::Type::sizeActive(const char* This) const {
return 1;
}
size_t SubGrid::Type::sizeTotal(const char* This) const {
return 1;
}
size_t SubGrid::Type::getBytes(const char* This) const {
return sizeof(SubGrid);
}
SubGrid::Type SubGrid::type;
/********************** SubGridQBVH4 **************************/
template<>
const char* SubGridQBVH4::Type::name () const {
return "SubGridQBVH4";
}
template<>
size_t SubGridQBVH4::Type::sizeActive(const char* This) const {
return 1;
}
template<>
size_t SubGridQBVH4::Type::sizeTotal(const char* This) const {
return 1;
}
template<>
size_t SubGridQBVH4::Type::getBytes(const char* This) const {
return sizeof(SubGridQBVH4);
}
}

View File

@@ -0,0 +1,76 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
namespace embree
{
namespace isa
{
/*! Intersects a ray with a quad with backface culling
* enabled. The quad v0,v1,v2,v3 is split into two triangles
* v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
* triangles gets intersected. */
template<int N>
__forceinline vbool<N> intersect_quad_backface_culling(const vbool<N>& valid0,
const Vec3fa& ray_org,
const Vec3fa& ray_dir,
const float ray_tnear,
const float ray_tfar,
const Vec3vf<N>& quad_v0,
const Vec3vf<N>& quad_v1,
const Vec3vf<N>& quad_v2,
const Vec3vf<N>& quad_v3,
vfloat<N>& u_o,
vfloat<N>& v_o,
vfloat<N>& t_o)
{
/* calculate vertices relative to ray origin */
vbool<N> valid = valid0;
const Vec3vf<N> O = Vec3vf<N>(ray_org);
const Vec3vf<N> D = Vec3vf<N>(ray_dir);
const Vec3vf<N> va = quad_v0-O;
const Vec3vf<N> vb = quad_v1-O;
const Vec3vf<N> vc = quad_v2-O;
const Vec3vf<N> vd = quad_v3-O;
const Vec3vf<N> edb = vb-vd;
const vfloat<N> WW = dot(cross(vd,edb),D);
const Vec3vf<N> v0 = select(WW <= 0.0f,va,vc);
const Vec3vf<N> v1 = select(WW <= 0.0f,vb,vd);
const Vec3vf<N> v2 = select(WW <= 0.0f,vd,vb);
/* calculate edges */
const Vec3vf<N> e0 = v2-v0;
const Vec3vf<N> e1 = v0-v1;
/* perform edge tests */
const vfloat<N> U = dot(cross(v0,e0),D);
const vfloat<N> V = dot(cross(v1,e1),D);
valid &= max(U,V) <= 0.0f;
if (unlikely(none(valid))) return false;
/* calculate geometry normal and denominator */
const Vec3vf<N> Ng = cross(e1,e0);
const vfloat<N> den = dot(Ng,D);
const vfloat<N> rcpDen = rcp(den);
/* perform depth test */
const vfloat<N> t = rcpDen*dot(v0,Ng);
valid &= vfloat<N>(ray_tnear) <= t & t <= vfloat<N>(ray_tfar);
if (unlikely(none(valid))) return false;
/* avoid division by 0 */
valid &= den != vfloat<N>(zero);
if (unlikely(none(valid))) return false;
/* update hit information */
t_o = t;
u_o = U * rcpDen;
v_o = V * rcpDen;
u_o = select(WW <= 0.0f,u_o,1.0f-u_o);
v_o = select(WW <= 0.0f,v_o,1.0f-v_o);
return valid;
}
}
}

View File

@@ -0,0 +1,460 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "quadv.h"
#include "triangle_intersector_moeller.h"
namespace embree
{
namespace isa
{
template<int M>
struct QuadHitM
{
__forceinline QuadHitM() {}
__forceinline QuadHitM(const vbool<M>& valid,
const vfloat<M>& U,
const vfloat<M>& V,
const vfloat<M>& T,
const vfloat<M>& absDen,
const Vec3vf<M>& Ng,
const vbool<M>& flags)
: U(U), V(V), T(T), absDen(absDen), tri_Ng(Ng), valid(valid), flags(flags) {}
__forceinline void finalize()
{
const vfloat<M> rcpAbsDen = rcp(absDen);
vt = T * rcpAbsDen;
const vfloat<M> u = min(U * rcpAbsDen,1.0f);
const vfloat<M> v = min(V * rcpAbsDen,1.0f);
const vfloat<M> u1 = vfloat<M>(1.0f) - u;
const vfloat<M> v1 = vfloat<M>(1.0f) - v;
#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
vu = select(flags,u1,u);
vv = select(flags,v1,v);
vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
#else
const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
vv = select(flags,u1,v);
vu = select(flags,v1,u);
vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
#endif
}
__forceinline Vec2f uv(const size_t i)
{
const float u = vu[i];
const float v = vv[i];
return Vec2f(u,v);
}
__forceinline float t(const size_t i) { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
private:
vfloat<M> U;
vfloat<M> V;
vfloat<M> T;
vfloat<M> absDen;
Vec3vf<M> tri_Ng;
public:
vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
public:
const vbool<M> flags;
};
template<int K>
struct QuadHitK
{
__forceinline QuadHitK(const vfloat<K>& U,
const vfloat<K>& V,
const vfloat<K>& T,
const vfloat<K>& absDen,
const Vec3vf<K>& Ng,
const vbool<K>& flags)
: U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng) {}
__forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
{
const vfloat<K> rcpAbsDen = rcp(absDen);
const vfloat<K> t = T * rcpAbsDen;
const vfloat<K> u0 = min(U * rcpAbsDen,1.0f);
const vfloat<K> v0 = min(V * rcpAbsDen,1.0f);
const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
const vfloat<K> u = select(flags,u1,u0);
const vfloat<K> v = select(flags,v1,v0);
const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
return std::make_tuple(u,v,t,Ng);
}
private:
const vfloat<K> U;
const vfloat<K> V;
const vfloat<K> T;
const vfloat<K> absDen;
const vbool<K> flags;
const Vec3vf<K> tri_Ng;
};
/* ----------------------------- */
/* -- single ray intersectors -- */
/* ----------------------------- */
template<int M, bool filter>
struct QuadMIntersector1MoellerTrumbore;
/*! Intersects M quads with 1 ray */
template<int M, bool filter>
struct QuadMIntersector1MoellerTrumbore
{
__forceinline QuadMIntersector1MoellerTrumbore() {}
__forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
__forceinline void intersect(RayHit& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
UVIdentity<M> mapUV;
MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV);
MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
Intersect1EpilogM<M,filter> epilog(ray,context,geomID,primID);
/* intersect first triangle */
if (intersector.intersect(ray,v0,v1,v3,mapUV,hit))
epilog(hit.valid,hit);
/* intersect second triangle */
if (intersector.intersect(ray,v2,v3,v1,mapUV,hit))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
epilog(hit.valid,hit);
}
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
UVIdentity<M> mapUV;
MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV);
MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
Occluded1EpilogM<M,filter> epilog(ray,context,geomID,primID);
/* intersect first triangle */
if (intersector.intersect(ray,v0,v1,v3,mapUV,hit))
{
if (epilog(hit.valid,hit))
return true;
}
/* intersect second triangle */
if (intersector.intersect(ray,v2,v3,v1,mapUV,hit))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
if (epilog(hit.valid,hit))
return true;
}
return false;
}
};
#if defined(__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<bool filter>
struct QuadMIntersector1MoellerTrumbore<4,filter>
{
__forceinline QuadMIntersector1MoellerTrumbore() {}
__forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
template<typename Epilog>
__forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
UVIdentity<8> mapUV;
MoellerTrumboreHitM<8,UVIdentity<8>> hit(mapUV);
MoellerTrumboreIntersector1<8> intersector(ray,nullptr);
const vbool8 flags(0,0,0,0,1,1,1,1);
if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,mapUV,hit)))
{
vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
#if !defined(EMBREE_BACKFACE_CULLING)
hit.U = select(flags,absDen-V,U);
hit.V = select(flags,absDen-U,V);
hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); // FIXME: use XOR
#else
hit.U = select(flags,absDen-U,U);
hit.V = select(flags,absDen-V,V);
#endif
if (unlikely(epilog(hit.valid,hit)))
return true;
}
return false;
}
__forceinline bool intersect(RayHit& ray, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
}
};
#endif
/* ----------------------------- */
/* -- ray packet intersectors -- */
/* ----------------------------- */
struct MoellerTrumboreIntersector1KTriangleM
{
/*! Intersect k'th ray from ray packet of size K with M triangles. */
template<int M, int K, typename Epilog>
static __forceinline bool intersect(RayK<K>& ray,
size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const Vec3vf<M>& tri_Ng,
const vbool<M>& flags,
const Epilog& epilog)
{
/* calculate denominator */
const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
const Vec3vf<M> R = cross(C,D);
const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
const vfloat<M> absDen = abs(den);
const vfloat<M> sgnDen = signmsk(den);
/* perform edge tests */
const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen;
const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen;
/* perform backface culling */
#if defined(EMBREE_BACKFACE_CULLING)
vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#else
vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#endif
if (likely(none(valid))) return false;
/* perform depth test */
const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
if (likely(none(valid))) return false;
/* calculate hit information */
QuadHitM<M> hit(valid,U,V,T,absDen,tri_Ng,flags);
return epilog(valid,hit);
}
template<int M, int K, typename Epilog>
static __forceinline bool intersect1(RayK<K>& ray,
size_t k,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const vbool<M>& flags,
const Epilog& epilog)
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
const Vec3vf<M> Ng = cross(e2,e1);
return intersect<M,K>(ray,k,v0,e1,e2,Ng,flags,epilog);
}
};
template<int M, int K, bool filter>
struct QuadMIntersectorKMoellerTrumboreBase
{
__forceinline QuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {}
/*! Intersects K rays with one of M triangles. */
template<typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_e1,
const Vec3vf<K>& tri_e2,
const Vec3vf<K>& tri_Ng,
const vbool<K>& flags,
const Epilog& epilog) const
{
/* calculate denominator */
vbool<K> valid = valid0;
const Vec3vf<K> C = tri_v0 - ray.org;
const Vec3vf<K> R = cross(C,ray.dir);
const vfloat<K> den = dot(tri_Ng,ray.dir);
const vfloat<K> absDen = abs(den);
const vfloat<K> sgnDen = signmsk(den);
/* test against edge p2 p0 */
const vfloat<K> U = dot(R,tri_e2) ^ sgnDen;
valid &= U >= 0.0f;
if (likely(none(valid))) return false;
/* test against edge p0 p1 */
const vfloat<K> V = dot(R,tri_e1) ^ sgnDen;
valid &= V >= 0.0f;
if (likely(none(valid))) return false;
/* test against edge p1 p2 */
const vfloat<K> W = absDen-U-V;
valid &= W >= 0.0f;
if (likely(none(valid))) return false;
/* perform depth test */
const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar);
if (unlikely(none(valid))) return false;
/* perform backface culling */
#if defined(EMBREE_BACKFACE_CULLING)
valid &= den < vfloat<K>(zero);
if (unlikely(none(valid))) return false;
#else
valid &= den != vfloat<K>(zero);
if (unlikely(none(valid))) return false;
#endif
/* calculate hit information */
QuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags);
return epilog(valid,hit);
}
/*! Intersects K rays with one of M quads. */
template<typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const vbool<K>& flags,
const Epilog& epilog) const
{
const Vec3vf<K> e1 = tri_v0-tri_v1;
const Vec3vf<K> e2 = tri_v2-tri_v0;
const Vec3vf<K> Ng = cross(e2,e1);
return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,epilog);
}
/*! Intersects K rays with one of M quads. */
template<typename Epilog>
__forceinline bool intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& v0,
const Vec3vf<K>& v1,
const Vec3vf<K>& v2,
const Vec3vf<K>& v3,
const Epilog& epilog) const
{
intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
if (none(valid0)) return true;
intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
return none(valid0);
}
};
template<int M, int K, bool filter>
struct QuadMIntersectorKMoellerTrumbore : public QuadMIntersectorKMoellerTrumboreBase<M,K,filter>
{
__forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
: QuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {}
__forceinline void intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
Intersect1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID);
MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog);
MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
Occluded1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID);
if (MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
if (MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
return false;
}
};
#if defined(__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<int K, bool filter>
struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter>
{
__forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
: QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
template<typename Epilog>
__forceinline bool intersect1(RayK<K>& ray, size_t k,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
const vbool8 flags(0,0,0,0,1,1,1,1);
return MoellerTrumboreIntersector1KTriangleM::intersect1<8,K>(ray,k,vtx0,vtx1,vtx2,flags,epilog);
}
__forceinline bool intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
}
};
#endif
}
}

View File

@@ -0,0 +1,438 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "quad_intersector_moeller.h"
/*! Modified Pluecker ray/triangle intersector. The test first shifts
* the ray origin into the origin of the coordinate system and then
* uses Pluecker coordinates for the intersection. Due to the shift,
* the Pluecker coordinate calculation simplifies and the tests get
* numerically stable. The edge equations are watertight along the
* edge for neighboring triangles. */
namespace embree
{
namespace isa
{
template<int M>
struct QuadHitPlueckerM
{
__forceinline QuadHitPlueckerM() {}
__forceinline QuadHitPlueckerM(const vbool<M>& valid,
const vfloat<M>& U,
const vfloat<M>& V,
const vfloat<M>& UVW,
const vfloat<M>& t,
const Vec3vf<M>& Ng,
const vbool<M>& flags)
: U(U), V(V), UVW(UVW), tri_Ng(Ng), valid(valid), vt(t), flags(flags) {}
__forceinline void finalize()
{
const vbool<M> invalid = abs(UVW) < min_rcp_input;
const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
const vfloat<M> u = min(U * rcpUVW,1.0f);
const vfloat<M> v = min(V * rcpUVW,1.0f);
const vfloat<M> u1 = vfloat<M>(1.0f) - u;
const vfloat<M> v1 = vfloat<M>(1.0f) - v;
#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
vu = select(flags,u1,u);
vv = select(flags,v1,v);
vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
#else
const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
vv = select(flags,u1,v);
vu = select(flags,v1,u);
vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
#endif
}
__forceinline Vec2f uv(const size_t i)
{
const float u = vu[i];
const float v = vv[i];
return Vec2f(u,v);
}
__forceinline float t(const size_t i) { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
private:
vfloat<M> U;
vfloat<M> V;
vfloat<M> UVW;
Vec3vf<M> tri_Ng;
public:
vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
public:
const vbool<M> flags;
};
template<int K>
struct QuadHitPlueckerK
{
__forceinline QuadHitPlueckerK(const vfloat<K>& U,
const vfloat<K>& V,
const vfloat<K>& UVW,
const vfloat<K>& t,
const Vec3vf<K>& Ng,
const vbool<K>& flags)
: U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng) {}
__forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
{
const vbool<K> invalid = abs(UVW) < min_rcp_input;
const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
const vfloat<K> u0 = min(U * rcpUVW,1.0f);
const vfloat<K> v0 = min(V * rcpUVW,1.0f);
const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
const vfloat<K> u = select(flags,u1,u0);
const vfloat<K> v = select(flags,v1,v0);
const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
return std::make_tuple(u,v,t,Ng);
}
private:
const vfloat<K> U;
const vfloat<K> V;
const vfloat<K> UVW;
const vfloat<K> t;
const vbool<K> flags;
const Vec3vf<K> tri_Ng;
};
struct PlueckerIntersectorTriangle1
{
template<int M, typename Epilog>
static __forceinline bool intersect(Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const vbool<M>& flags,
const Epilog& epilog)
{
/* calculate vertices relative to ray origin */
const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
const Vec3vf<M> v0 = tri_v0-O;
const Vec3vf<M> v1 = tri_v1-O;
const Vec3vf<M> v2 = tri_v2-O;
/* calculate triangle edges */
const Vec3vf<M> e0 = v2-v0;
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v1-v2;
/* perform edge tests */
const vfloat<M> U = dot(cross(e0,v2+v0),D);
const vfloat<M> V = dot(cross(e1,v0+v1),D);
const vfloat<M> W = dot(cross(e2,v1+v2),D);
const vfloat<M> UVW = U+V+W;
const vfloat<M> eps = float(ulp)*abs(UVW);
#if defined(EMBREE_BACKFACE_CULLING)
vbool<M> valid = max(U,V,W) <= eps;
#else
vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
#endif
if (unlikely(none(valid))) return false;
/* calculate geometry normal and denominator */
const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
const vfloat<M> den = twice(dot(Ng,D));
/* perform depth test */
const vfloat<M> T = twice(dot(v0,Ng));
const vfloat<M> t = rcp(den)*T;
valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
valid &= den != vfloat<M>(zero);
if (unlikely(none(valid))) return false;
/* update hit information */
QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
return epilog(valid,hit);
}
};
/*! Intersects M quads with 1 ray */
template<int M, bool filter>
struct QuadMIntersector1Pluecker
{
__forceinline QuadMIntersector1Pluecker() {}
__forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
__forceinline void intersect(RayHit& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
Intersect1EpilogM<M,filter> epilog(ray,context,geomID,primID);
PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog);
PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true),epilog);
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
Occluded1EpilogM<M,filter> epilog(ray,context,geomID,primID);
if (PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog)) return true;
if (PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true;
return false;
}
};
#if defined(__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<bool filter>
struct QuadMIntersector1Pluecker<4,filter>
{
__forceinline QuadMIntersector1Pluecker() {}
__forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
template<typename Epilog>
__forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
const vbool8 flags(0,0,0,0,1,1,1,1);
return PlueckerIntersectorTriangle1::intersect<8>(ray,vtx0,vtx1,vtx2,flags,epilog);
}
__forceinline bool intersect(RayHit& ray, RayQueryContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
}
};
#endif
/* ----------------------------- */
/* -- ray packet intersectors -- */
/* ----------------------------- */
struct PlueckerIntersector1KTriangleM
{
/*! Intersect k'th ray from ray packet of size K with M triangles. */
template<int M, int K, typename Epilog>
static __forceinline bool intersect1(RayK<K>& ray,
size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const vbool<M>& flags,
const Epilog& epilog)
{
/* calculate vertices relative to ray origin */
const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
const Vec3vf<M> v0 = tri_v0-O;
const Vec3vf<M> v1 = tri_v1-O;
const Vec3vf<M> v2 = tri_v2-O;
/* calculate triangle edges */
const Vec3vf<M> e0 = v2-v0;
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v1-v2;
/* perform edge tests */
const vfloat<M> U = dot(cross(e0,v2+v0),D);
const vfloat<M> V = dot(cross(e1,v0+v1),D);
const vfloat<M> W = dot(cross(e2,v1+v2),D);
const vfloat<M> UVW = U+V+W;
const vfloat<M> eps = float(ulp)*abs(UVW);
#if defined(EMBREE_BACKFACE_CULLING)
vbool<M> valid = max(U,V,W) <= eps;
#else
vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
#endif
if (unlikely(none(valid))) return false;
/* calculate geometry normal and denominator */
const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
const vfloat<M> den = twice(dot(Ng,D));
/* perform depth test */
const vfloat<M> T = twice(dot(v0,Ng));
const vfloat<M> t = rcp(den)*T;
valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
if (unlikely(none(valid))) return false;
/* avoid division by 0 */
valid &= den != vfloat<M>(zero);
if (unlikely(none(valid))) return false;
/* update hit information */
QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
return epilog(valid,hit);
}
};
template<int M, int K, bool filter>
struct QuadMIntersectorKPlueckerBase
{
__forceinline QuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {}
/*! Intersects K rays with one of M triangles. */
template<typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const vbool<K>& flags,
const Epilog& epilog) const
{
/* calculate vertices relative to ray origin */
vbool<K> valid = valid0;
const Vec3vf<K> O = ray.org;
const Vec3vf<K> D = ray.dir;
const Vec3vf<K> v0 = tri_v0-O;
const Vec3vf<K> v1 = tri_v1-O;
const Vec3vf<K> v2 = tri_v2-O;
/* calculate triangle edges */
const Vec3vf<K> e0 = v2-v0;
const Vec3vf<K> e1 = v0-v1;
const Vec3vf<K> e2 = v1-v2;
/* perform edge tests */
const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
const vfloat<K> UVW = U+V+W;
const vfloat<K> eps = float(ulp)*abs(UVW);
#if defined(EMBREE_BACKFACE_CULLING)
valid &= max(U,V,W) <= eps;
#else
valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
#endif
if (unlikely(none(valid))) return false;
/* calculate geometry normal and denominator */
const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
/* perform depth test */
const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
const vfloat<K> t = rcp(den)*T;
valid &= ray.tnear() <= t & t <= ray.tfar;
valid &= den != vfloat<K>(zero);
if (unlikely(none(valid))) return false;
/* calculate hit information */
QuadHitPlueckerK<K> hit(U,V,UVW,t,Ng,flags);
return epilog(valid,hit);
}
/*! Intersects K rays with one of M quads. */
template<typename Epilog>
__forceinline bool intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& v0,
const Vec3vf<K>& v1,
const Vec3vf<K>& v2,
const Vec3vf<K>& v3,
const Epilog& epilog) const
{
intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
if (none(valid0)) return true;
intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
return none(valid0);
}
};
template<int M, int K, bool filter>
struct QuadMIntersectorKPluecker : public QuadMIntersectorKPlueckerBase<M,K,filter>
{
__forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
: QuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {}
__forceinline void intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
Intersect1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID);
PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog);
PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const vuint<M>& geomID, const vuint<M>& primID) const
{
Occluded1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID);
if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
return false;
}
};
#if defined(__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<int K, bool filter>
struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter>
{
__forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
: QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {}
template<typename Epilog>
__forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
const vbool8 flags(0,0,0,0,1,1,1,1);
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
return PlueckerIntersector1KTriangleM::intersect1<8,K>(ray,k,vtx0,vtx1,vtx2,flags,epilog);
}
__forceinline bool intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const vuint4& geomID, const vuint4& primID) const
{
return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
}
};
#endif
}
}

View File

@@ -0,0 +1,483 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "../common/scene.h"
namespace embree
{
/* Stores M quads from an indexed face set */
template <int M>
struct QuadMi
{
/* Virtual interface to query information about the quad type */
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored quads */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
public:
/* Default constructor */
__forceinline QuadMi() { }
/* Construction from vertices and IDs */
__forceinline QuadMi(const vuint<M>& v0,
const vuint<M>& v1,
const vuint<M>& v2,
const vuint<M>& v3,
const vuint<M>& geomIDs,
const vuint<M>& primIDs)
#if defined(EMBREE_COMPACT_POLYS)
: geomIDs(geomIDs), primIDs(primIDs) {}
#else
: v0_(v0),v1_(v1), v2_(v2), v3_(v3), geomIDs(geomIDs), primIDs(primIDs) {}
#endif
/* Returns a mask that tells which quads are valid */
__forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
/* Returns if the specified quad is valid */
__forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
/* Returns the number of stored quads */
__forceinline size_t size() const { return bsf(~movemask(valid())); }
/* Returns the geometry IDs */
__forceinline vuint<M>& geomID() { return geomIDs; }
__forceinline const vuint<M>& geomID() const { return geomIDs; }
__forceinline unsigned int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; }
/* Returns the primitive IDs */
__forceinline vuint<M>& primID() { return primIDs; }
__forceinline const vuint<M>& primID() const { return primIDs; }
__forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
/* Calculate the bounds of the quads */
__forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
{
BBox3fa bounds = empty;
for (size_t i=0; i<M && valid(i); i++) {
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
bounds.extend(mesh->bounds(primID(i),itime));
}
return bounds;
}
/* Calculate the linear bounds of the primitive */
__forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) {
return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
{
LBBox3fa allBounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps));
}
return allBounds;
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
{
LBBox3fa allBounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
allBounds.extend(mesh->linearBounds(primID(i), time_range));
}
return allBounds;
}
/* Fill quad from quad list */
template<typename PrimRefT>
__forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
{
vuint<M> geomID = -1, primID = -1;
const PrimRefT* prim = &prims[begin];
vuint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
for (size_t i=0; i<M; i++)
{
if (begin<end) {
geomID[i] = prim->geomID();
primID[i] = prim->primID();
#if !defined(EMBREE_COMPACT_POLYS)
const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID());
const QuadMesh::Quad& q = mesh->quad(prim->primID());
unsigned int_stride = mesh->vertices0.getStride()/4;
v0[i] = q.v[0] * int_stride;
v1[i] = q.v[1] * int_stride;
v2[i] = q.v[2] * int_stride;
v3[i] = q.v[3] * int_stride;
#endif
begin++;
} else {
assert(i);
if (likely(i > 0)) {
geomID[i] = geomID[0]; // always valid geomIDs
primID[i] = -1; // indicates invalid data
v0[i] = v0[0];
v1[i] = v0[0];
v2[i] = v0[0];
v3[i] = v0[0];
}
}
if (begin<end) prim = &prims[begin];
}
new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store
}
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
{
fill(prims, begin, end, scene);
return linearBounds(scene, itime);
}
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
{
fill(prims, begin, end, scene);
return linearBounds(scene, time_range);
}
friend embree_ostream operator<<(embree_ostream cout, const QuadMi& quad) {
return cout << "QuadMi<" << M << ">( "
#if !defined(EMBREE_COMPACT_POLYS)
<< "v0 = " << quad.v0_ << ", v1 = " << quad.v1_ << ", v2 = " << quad.v2_ << ", v3 = " << quad.v3_ << ", "
#endif
<< "geomID = " << quad.geomIDs << ", primID = " << quad.primIDs << " )";
}
protected:
#if !defined(EMBREE_COMPACT_POLYS)
vuint<M> v0_; // 4 byte offset of 1st vertex
vuint<M> v1_; // 4 byte offset of 2nd vertex
vuint<M> v2_; // 4 byte offset of 3rd vertex
vuint<M> v3_; // 4 byte offset of 4th vertex
#endif
vuint<M> geomIDs; // geometry ID of mesh
vuint<M> primIDs; // primitive ID of primitive inside mesh
};
namespace isa
{
template<int M>
struct QuadMi : public embree::QuadMi<M>
{
#if !defined(EMBREE_COMPACT_POLYS)
using embree::QuadMi<M>::v0_;
using embree::QuadMi<M>::v1_;
using embree::QuadMi<M>::v2_;
using embree::QuadMi<M>::v3_;
#endif
using embree::QuadMi<M>::geomIDs;
using embree::QuadMi<M>::primIDs;
using embree::QuadMi<M>::geomID;
using embree::QuadMi<M>::primID;
using embree::QuadMi<M>::valid;
template<int vid>
__forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const
{
#if defined(EMBREE_COMPACT_POLYS)
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
const QuadMesh::Quad& quad = mesh->quad(primID(index));
return (Vec3f) mesh->vertices[0][quad.v[vid]];
#else
const vuint<M>& v = getVertexOffset<vid>();
const float* vertices = scene->vertices[geomID(index)];
return (Vec3f&) vertices[v[index]];
#endif
}
template<int vid, typename T>
__forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const
{
#if defined(EMBREE_COMPACT_POLYS)
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
const QuadMesh::Quad& quad = mesh->quad(primID(index));
const Vec3fa v0 = mesh->vertices[itime+0][quad.v[vid]];
const Vec3fa v1 = mesh->vertices[itime+1][quad.v[vid]];
#else
const vuint<M>& v = getVertexOffset<vid>();
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0);
const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1);
const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
#endif
const Vec3<T> p0(v0.x,v0.y,v0.z);
const Vec3<T> p1(v1.x,v1.y,v1.z);
return lerp(p0,p1,ftime);
}
template<int vid, int K, typename T>
__forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const
{
Vec3<T> p0, p1;
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask))
{
#if defined(EMBREE_COMPACT_POLYS)
const QuadMesh::Quad& quad = mesh->quad(primID(index));
const Vec3fa v0 = mesh->vertices[itime[i]+0][quad.v[vid]];
const Vec3fa v1 = mesh->vertices[itime[i]+1][quad.v[vid]];
#else
const vuint<M>& v = getVertexOffset<vid>();
const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0);
const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1);
const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
#endif
p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z;
p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z;
}
return (T(one)-ftime)*p0 + ftime*p1;
}
struct Quad {
vfloat4 v0,v1,v2,v3;
};
#if defined(EMBREE_COMPACT_POLYS)
__forceinline Quad loadQuad(const int i, const Scene* const scene) const
{
const unsigned int geomID = geomIDs[i];
const unsigned int primID = primIDs[i];
if (unlikely(primID == -1)) return { zero, zero, zero, zero };
const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
const QuadMesh::Quad& quad = mesh->quad(primID);
const vfloat4 v0 = (vfloat4) mesh->vertices0[quad.v[0]];
const vfloat4 v1 = (vfloat4) mesh->vertices0[quad.v[1]];
const vfloat4 v2 = (vfloat4) mesh->vertices0[quad.v[2]];
const vfloat4 v3 = (vfloat4) mesh->vertices0[quad.v[3]];
return { v0, v1, v2, v3 };
}
__forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
{
const unsigned int geomID = geomIDs[i];
const unsigned int primID = primIDs[i];
if (unlikely(primID == -1)) return { zero, zero, zero, zero };
const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
const QuadMesh::Quad& quad = mesh->quad(primID);
const vfloat4 v0 = (vfloat4) mesh->vertices[itime][quad.v[0]];
const vfloat4 v1 = (vfloat4) mesh->vertices[itime][quad.v[1]];
const vfloat4 v2 = (vfloat4) mesh->vertices[itime][quad.v[2]];
const vfloat4 v3 = (vfloat4) mesh->vertices[itime][quad.v[3]];
return { v0, v1, v2, v3 };
}
#else
__forceinline Quad loadQuad(const int i, const Scene* const scene) const
{
const float* vertices = scene->vertices[geomID(i)];
const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
return { v0, v1, v2, v3 };
}
__forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
{
const unsigned int geomID = geomIDs[i];
const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
const float* vertices = (const float*) mesh->vertexPtr(0,itime);
const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
return { v0, v1, v2, v3 };
}
#endif
/* Gather the quads */
__forceinline void gather(Vec3vf<M>& p0,
Vec3vf<M>& p1,
Vec3vf<M>& p2,
Vec3vf<M>& p3,
const Scene *const scene) const;
#if defined(__AVX512F__)
__forceinline void gather(Vec3vf16& p0,
Vec3vf16& p1,
Vec3vf16& p2,
Vec3vf16& p3,
const Scene *const scene) const;
#endif
template<int K>
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019
__noinline
#else
__forceinline
#endif
void gather(const vbool<K>& valid,
Vec3vf<K>& p0,
Vec3vf<K>& p1,
Vec3vf<K>& p2,
Vec3vf<K>& p3,
const size_t index,
const Scene* const scene,
const vfloat<K>& time) const
{
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
vfloat<K> ftime;
const vint<K> itime = mesh->timeSegment<K>(time, ftime);
const size_t first = bsf(movemask(valid));
if (likely(all(valid,itime[first] == itime)))
{
p0 = getVertex<0>(index, scene, itime[first], ftime);
p1 = getVertex<1>(index, scene, itime[first], ftime);
p2 = getVertex<2>(index, scene, itime[first], ftime);
p3 = getVertex<3>(index, scene, itime[first], ftime);
}
else
{
p0 = getVertex<0,K>(valid, index, scene, itime, ftime);
p1 = getVertex<1,K>(valid, index, scene, itime, ftime);
p2 = getVertex<2,K>(valid, index, scene, itime, ftime);
p3 = getVertex<3,K>(valid, index, scene, itime, ftime);
}
}
__forceinline void gather(Vec3vf<M>& p0,
Vec3vf<M>& p1,
Vec3vf<M>& p2,
Vec3vf<M>& p3,
const QuadMesh* mesh,
const Scene *const scene,
const int itime) const;
__forceinline void gather(Vec3vf<M>& p0,
Vec3vf<M>& p1,
Vec3vf<M>& p2,
Vec3vf<M>& p3,
const Scene *const scene,
const float time) const;
/* Updates the primitive */
__forceinline BBox3fa update(QuadMesh* mesh)
{
BBox3fa bounds = empty;
for (size_t i=0; i<M; i++)
{
if (!valid(i)) break;
const unsigned primId = primID(i);
const QuadMesh::Quad& q = mesh->quad(primId);
const Vec3fa p0 = mesh->vertex(q.v[0]);
const Vec3fa p1 = mesh->vertex(q.v[1]);
const Vec3fa p2 = mesh->vertex(q.v[2]);
const Vec3fa p3 = mesh->vertex(q.v[3]);
bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
}
return bounds;
}
private:
#if !defined(EMBREE_COMPACT_POLYS)
template<int N> const vuint<M>& getVertexOffset() const;
#endif
};
#if !defined(EMBREE_COMPACT_POLYS)
template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<0>() const { return v0_; }
template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<1>() const { return v1_; }
template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<2>() const { return v2_; }
template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<3>() const { return v3_; }
#endif
template<>
__forceinline void QuadMi<4>::gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
Vec3vf4& p3,
const Scene *const scene) const
{
prefetchL1(((char*)this)+0*64);
prefetchL1(((char*)this)+1*64);
const Quad tri0 = loadQuad(0,scene);
const Quad tri1 = loadQuad(1,scene);
const Quad tri2 = loadQuad(2,scene);
const Quad tri3 = loadQuad(3,scene);
transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
}
template<>
__forceinline void QuadMi<4>::gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
Vec3vf4& p3,
const QuadMesh* mesh,
const Scene *const scene,
const int itime) const
{
// FIXME: for trianglei there all geometries are identical, is this the case here too?
const Quad tri0 = loadQuad(0,itime,scene);
const Quad tri1 = loadQuad(1,itime,scene);
const Quad tri2 = loadQuad(2,itime,scene);
const Quad tri3 = loadQuad(3,itime,scene);
transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
}
template<>
__forceinline void QuadMi<4>::gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
Vec3vf4& p3,
const Scene *const scene,
const float time) const
{
const QuadMesh* mesh = scene->get<QuadMesh>(geomID(0)); // in mblur mode all geometries are identical
float ftime;
const int itime = mesh->timeSegment(time, ftime);
Vec3vf4 a0,a1,a2,a3; gather(a0,a1,a2,a3,mesh,scene,itime);
Vec3vf4 b0,b1,b2,b3; gather(b0,b1,b2,b3,mesh,scene,itime+1);
p0 = lerp(a0,b0,vfloat4(ftime));
p1 = lerp(a1,b1,vfloat4(ftime));
p2 = lerp(a2,b2,vfloat4(ftime));
p3 = lerp(a3,b3,vfloat4(ftime));
}
}
template<int M>
typename QuadMi<M>::Type QuadMi<M>::type;
typedef QuadMi<4> Quad4i;
}

View File

@@ -0,0 +1,350 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "quadi.h"
#include "quad_intersector_moeller.h"
#include "quad_intersector_pluecker.h"
namespace embree
{
namespace isa
{
/*! Intersects M quads with 1 ray */
template<int M, bool filter>
struct QuadMiIntersector1Moeller
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
/*! Intersect a ray with the M quads and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of M quads. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
}
};
/*! Intersects M triangles with K rays. */
template<int M, int K, bool filter>
struct QuadMiIntersectorKMoeller
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
Scene* scene = context->scene;
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
Scene* scene = context->scene;
vbool<K> valid0 = valid_i;
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
};
/*! Intersects M quads with 1 ray */
template<int M, bool filter>
struct QuadMiIntersector1Pluecker
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
/*! Intersect a ray with the M quads and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of M quads. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
}
};
/*! Intersects M triangles with K rays. */
template<int M, int K, bool filter>
struct QuadMiIntersectorKPluecker
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
Scene* scene = context->scene;
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
Scene* scene = context->scene;
vbool<K> valid0 = valid_i;
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
};
/*! Intersects M motion blur quads with 1 ray */
template<int M, bool filter>
struct QuadMiMBIntersector1Moeller
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
/*! Intersect a ray with the M quads and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of M quads. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
}
};
/*! Intersects M motion blur quads with K rays. */
template<int M, int K, bool filter>
struct QuadMiMBIntersectorKMoeller
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
/*! Intersects K rays with M quads. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M quads. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
break;
}
return !valid0;
}
/*! Intersect a ray with M quads and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of the M quads. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
};
/*! Intersects M motion blur quads with 1 ray */
template<int M, bool filter>
struct QuadMiMBIntersector1Pluecker
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
/*! Intersect a ray with the M quads and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of M quads. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
}
};
/*! Intersects M motion blur quads with K rays. */
template<int M, int K, bool filter>
struct QuadMiMBIntersectorKPluecker
{
typedef QuadMi<M> Primitive;
typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
/*! Intersects K rays with M quads. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M quads. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const QuadMi<M>& quad)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<QuadMi<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
break;
}
return !valid0;
}
/*! Intersect a ray with M quads and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of the M quads. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const QuadMi<M>& quad)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
}
};
}
}

View File

@@ -0,0 +1,165 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
namespace embree
{
/* Stores the vertices of M quads in struct of array layout */
template <int M>
struct QuadMv
{
public:
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* Returns maximum number of stored quads */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
public:
/* Default constructor */
__forceinline QuadMv() {}
/* Construction from vertices and IDs */
__forceinline QuadMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomIDs, const vuint<M>& primIDs)
: v0(v0), v1(v1), v2(v2), v3(v3), geomIDs(geomIDs), primIDs(primIDs) {}
/* Returns a mask that tells which quads are valid */
__forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
/* Returns true if the specified quad is valid */
__forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
/* Returns the number of stored quads */
__forceinline size_t size() const { return bsf(~movemask(valid())); }
/* Returns the geometry IDs */
__forceinline vuint<M>& geomID() { return geomIDs; }
__forceinline const vuint<M>& geomID() const { return geomIDs; }
__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
/* Returns the primitive IDs */
__forceinline vuint<M> primID() { return primIDs; }
__forceinline const vuint<M> primID() const { return primIDs; }
__forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
/* Calculate the bounds of the quads */
__forceinline BBox3fa bounds() const
{
Vec3vf<M> lower = min(v0,v1,v2,v3);
Vec3vf<M> upper = max(v0,v1,v2,v3);
vbool<M> mask = valid();
lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
}
/* Non temporal store */
__forceinline static void store_nt(QuadMv* dst, const QuadMv& src)
{
vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
vfloat<M>::store_nt(&dst->v1.x,src.v1.x);
vfloat<M>::store_nt(&dst->v1.y,src.v1.y);
vfloat<M>::store_nt(&dst->v1.z,src.v1.z);
vfloat<M>::store_nt(&dst->v2.x,src.v2.x);
vfloat<M>::store_nt(&dst->v2.y,src.v2.y);
vfloat<M>::store_nt(&dst->v2.z,src.v2.z);
vfloat<M>::store_nt(&dst->v3.x,src.v3.x);
vfloat<M>::store_nt(&dst->v3.y,src.v3.y);
vfloat<M>::store_nt(&dst->v3.z,src.v3.z);
vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
vuint<M>::store_nt(&dst->primIDs,src.primIDs);
}
/* Fill quad from quad list */
__forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
{
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
for (size_t i=0; i<M && begin<end; i++, begin++)
{
const PrimRef& prim = prims[begin];
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const QuadMesh* __restrict__ const mesh = scene->get<QuadMesh>(geomID);
const QuadMesh::Quad& quad = mesh->quad(primID);
const Vec3fa& p0 = mesh->vertex(quad.v[0]);
const Vec3fa& p1 = mesh->vertex(quad.v[1]);
const Vec3fa& p2 = mesh->vertex(quad.v[2]);
const Vec3fa& p3 = mesh->vertex(quad.v[3]);
vgeomID [i] = geomID;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
}
QuadMv::store_nt(this,QuadMv(v0,v1,v2,v3,vgeomID,vprimID));
}
/* Updates the primitive */
__forceinline BBox3fa update(QuadMesh* mesh)
{
BBox3fa bounds = empty;
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
for (size_t i=0; i<M; i++)
{
if (primID(i) == -1) break;
const unsigned geomId = geomID(i);
const unsigned primId = primID(i);
const QuadMesh::Quad& quad = mesh->quad(primId);
const Vec3fa p0 = mesh->vertex(quad.v[0]);
const Vec3fa p1 = mesh->vertex(quad.v[1]);
const Vec3fa p2 = mesh->vertex(quad.v[2]);
const Vec3fa p3 = mesh->vertex(quad.v[3]);
bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
vgeomID [i] = geomId;
vprimID [i] = primId;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
}
new (this) QuadMv(v0,v1,v2,v3,vgeomID,vprimID);
return bounds;
}
public:
Vec3vf<M> v0; // 1st vertex of the quads
Vec3vf<M> v1; // 2nd vertex of the quads
Vec3vf<M> v2; // 3rd vertex of the quads
Vec3vf<M> v3; // 4th vertex of the quads
private:
vuint<M> geomIDs; // geometry ID
vuint<M> primIDs; // primitive ID
};
template<int M>
typename QuadMv<M>::Type QuadMv<M>::type;
typedef QuadMv<4> Quad4v;
}

View File

@@ -0,0 +1,181 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "quadv.h"
#include "quad_intersector_moeller.h"
#include "quad_intersector_pluecker.h"
namespace embree
{
namespace isa
{
/*! Intersects M quads with 1 ray */
template<int M, bool filter>
struct QuadMvIntersector1Moeller
{
typedef QuadMv<M> Primitive;
typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
/*! Intersect a ray with the M quads and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of M quads. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
}
};
/*! Intersects M triangles with K rays. */
template<int M, int K, bool filter>
struct QuadMvIntersectorKMoeller
{
typedef QuadMv<M> Primitive;
typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const QuadMv<M>& quad)
{
for (size_t i=0; i<QuadMv<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const QuadMv<M>& quad)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<QuadMv<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const QuadMv<M>& quad)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const QuadMv<M>& quad)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
};
/*! Intersects M quads with 1 ray */
template<int M, bool filter>
struct QuadMvIntersector1Pluecker
{
typedef QuadMv<M> Primitive;
typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
/*! Intersect a ray with the M quads and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of M quads. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& quad)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
}
};
/*! Intersects M triangles with K rays. */
template<int M, int K, bool filter>
struct QuadMvIntersectorKPluecker
{
typedef QuadMv<M> Primitive;
typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const QuadMv<M>& quad)
{
for (size_t i=0; i<QuadMv<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const QuadMv<M>& quad)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<QuadMv<M>::max_size(); i++)
{
if (!quad.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const QuadMv<M>& quad)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const QuadMv<M>& quad)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
}
};
}
}

View File

@@ -0,0 +1,715 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "curve_intersector_precalculations.h"
/*
This file implements the intersection of a ray with a round linear
curve segment. We define the geometry of such a round linear curve
segment from point p0 with radius r0 to point p1 with radius r1
using the cone that touches spheres p0/r0 and p1/r1 tangentially
plus the sphere p1/r1. We denote the tangentially touching cone from
p0/r0 to p1/r1 with cone(p0,r0,p1,r1) and the cone plus the ending
sphere with cone_sphere(p0,r0,p1,r1).
For multiple connected round linear curve segments this construction
yield a proper shape when viewed from the outside. Using the
following CSG we can also handle the interior in most common cases:
round_linear_curve(pl,rl,p0,r0,p1,r1,pr,rr) =
cone_sphere(p0,r0,p1,r1) - cone(pl,rl,p0,r0) - cone(p1,r1,pr,rr)
Thus by subtracting the neighboring cone geometries, we cut away
parts of the center cone_sphere surface which lie inside the
combined curve. This approach works as long as geometry of the
current cone_sphere penetrates into direct neighbor segments only,
and not into segments further away.
To construct a cone that touches two spheres at p0 and p1 with r0
and r1, one has to increase the cone radius at r0 and r1 to obtain
larger radii w0 and w1, such that the infinite cone properly touches
the spheres. From the paper "Ray Tracing Generalized Tube
Primitives: Method and Applications"
(https://www.researchgate.net/publication/334378683_Ray_Tracing_Generalized_Tube_Primitives_Method_and_Applications)
one can derive the following equations for these increased
radii:
sr = 1.0f / sqrt(1-sqr(dr)/sqr(p1-p0))
w0 = sr*r0
w1 = sr*r1
Further, we want the cone to start where it touches the sphere at p0
and to end where it touches sphere at p1. Therefore, we need to
construct clipping locations y0 and y1 for the start and end of the
cone. These start and end clipping location of the cone can get
calculated as:
Y0 = - r0 * (r1-r0) / length(p1-p0)
Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0)
Where the cone starts a distance Y0 and ends a distance Y1 away of
point p0 along the cone center. The distance between Y1-Y0 can get
calculated as:
dY = length(p1-p0) - (r1-r0)^2 / length(p1-p0)
In the code below, Y will always be scaled by length(p1-p0) to
obtain y and you will find the terms r0*(r1-r0) and
(p1-p0)^2-(r1-r0)^2.
*/
namespace embree
{
namespace isa
{
template<int M>
struct RoundLineIntersectorHitM
{
__forceinline RoundLineIntersectorHitM() {}
__forceinline RoundLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
: vu(u), vv(v), vt(t), vNg(Ng) {}
__forceinline void finalize() {}
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
__forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); }
__forceinline vfloat<M> t () const { return vt; }
__forceinline Vec3vf<M> Ng() const { return vNg; }
public:
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
};
namespace __roundline_internal
{
template<int M>
struct ConeGeometry
{
ConeGeometry (const Vec4vf<M>& a, const Vec4vf<M>& b)
: p0(a.xyz()), p1(b.xyz()), dP(p1-p0), dPdP(dot(dP,dP)), r0(a.w), sqr_r0(sqr(r0)), r1(b.w), dr(r1-r0), drdr(dr*dr), r0dr (r0*dr), g(dPdP - drdr) {}
/*
This function tests if a point is accepted by first cone
clipping plane.
First, we need to project the point onto the line p0->p1:
Y = (p-p0)*(p1-p0)/length(p1-p0)
This value y is the distance to the projection point from
p0. The clip distances are calculated as:
Y0 = - r0 * (r1-r0) / length(p1-p0)
Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0)
Thus to test if the point p is accepted by the first
clipping plane we need to test Y > Y0 and to test if it
is accepted by the second clipping plane we need to test
Y < Y1.
By multiplying the calculations with length(p1-p0) these
calculation can get simplied to:
y = (p-p0)*(p1-p0)
y0 = - r0 * (r1-r0)
y1 = (p1-p0)^2 - r1 * (r1-r0)
and the test y > y0 and y < y1.
*/
__forceinline vbool<M> isClippedByPlane (const vbool<M>& valid_i, const Vec3vf<M>& p) const
{
const Vec3vf<M> p0p = p - p0;
const vfloat<M> y = dot(p0p,dP);
const vfloat<M> cap0 = -r0dr;
const vbool<M> inside_cone = y > cap0;
return valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf)) & inside_cone;
}
/*
This function tests whether a point lies inside the capped cone
tangential to its ending spheres.
Therefore one has to check if the point is inside the
region defined by the cone clipping planes, which is
performed similar as in the previous function.
To perform the inside cone test we need to project the
point onto the line p0->p1:
dP = p1-p0
Y = (p-p0)*dP/length(dP)
This value Y is the distance to the projection point from
p0. To obtain a parameter value u going from 0 to 1 along
the line p0->p1 we calculate:
U = Y/length(dP)
The radii to use at points p0 and p1 are:
w0 = sr * r0
w1 = sr * r1
dw = w1-w0
Using these radii and u one can directly test if the point
lies inside the cone using the formula dP*dP < wy*wy with:
wy = w0 + u*dw
py = p0 + u*dP - p
By multiplying the calculations with length(p1-p0) and
inserting the definition of w can obtain simpler equations:
y = (p-p0)*dP
ry = r0 + y/dP^2 * dr
wy = sr*ry
py = p0 + y/dP^2*dP - p
y0 = - r0 * dr
y1 = dP^2 - r1 * dr
Thus for the in-cone test we get:
py^2 < wy^2
<=> py^2 < sr^2 * ry^2
<=> py^2 * ( dP^2 - dr^2 ) < dP^2 * ry^2
This can further get simplified to:
(p0-p)^2 * (dP^2 - dr^2) - y^2 < dP^2 * r0^2 + 2.0f*r0*dr*y;
*/
__forceinline vbool<M> isInsideCappedCone (const vbool<M>& valid_i, const Vec3vf<M>& p) const
{
const Vec3vf<M> p0p = p - p0;
const vfloat<M> y = dot(p0p,dP);
const vfloat<M> cap0 = -r0dr+vfloat<M>(ulp);
const vfloat<M> cap1 = -r1*dr + dPdP;
vbool<M> inside_cone = valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf));
inside_cone &= y > cap0; // start clipping plane
inside_cone &= y < cap1; // end clipping plane
inside_cone &= sqr(p0p)*g - sqr(y) < dPdP * sqr_r0 + 2.0f*r0dr*y; // in cone test
return inside_cone;
}
protected:
Vec3vf<M> p0;
Vec3vf<M> p1;
Vec3vf<M> dP;
vfloat<M> dPdP;
vfloat<M> r0;
vfloat<M> sqr_r0;
vfloat<M> r1;
vfloat<M> dr;
vfloat<M> drdr;
vfloat<M> r0dr;
vfloat<M> g;
};
template<int M>
struct ConeGeometryIntersector : public ConeGeometry<M>
{
using ConeGeometry<M>::p0;
using ConeGeometry<M>::p1;
using ConeGeometry<M>::dP;
using ConeGeometry<M>::dPdP;
using ConeGeometry<M>::r0;
using ConeGeometry<M>::sqr_r0;
using ConeGeometry<M>::r1;
using ConeGeometry<M>::dr;
using ConeGeometry<M>::r0dr;
using ConeGeometry<M>::g;
ConeGeometryIntersector (const Vec3vf<M>& ray_org, const Vec3vf<M>& ray_dir, const vfloat<M>& dOdO, const vfloat<M>& rcp_dOdO, const Vec4vf<M>& a, const Vec4vf<M>& b)
: ConeGeometry<M>(a,b), org(ray_org), O(ray_org-p0), dO(ray_dir), dOdO(dOdO), rcp_dOdO(rcp_dOdO), OdP(dot(dP,O)), dOdP(dot(dP,dO)), yp(OdP + r0dr) {}
/*
This function intersects a ray with a cone that touches a
start sphere p0/r0 and end sphere p1/r1.
To find this ray/cone intersections one could just
calculate radii w0 and w1 as described above and use a
standard ray/cone intersection routine with these
radii. However, it turns out that calculations can get
simplified when deriving a specialized ray/cone
intersection for this special case. We perform
calculations relative to the cone origin p0 and define:
O = ray_org - p0
dO = ray_dir
dP = p1-p0
dr = r1-r0
dw = w1-w0
For some t we can compute the potential hit point h = O + t*dO and
project it onto the cone vector dP to obtain u = (h*dP)/(dP*dP). In
case of an intersection, the squared distance from the hit point
projected onto the cone center line to the hit point should be equal
to the squared cone radius at u:
(u*dP - h)^2 = (w0 + u*dw)^2
Inserting the definition of h, u, w0, and dw into this formula, then
factoring out all terms, and sorting by t^2, t^1, and t^0 terms
yields a quadratic equation to solve.
Inserting u:
( (h*dP)*dP/dP^2 - h )^2 = ( w0 + (h*dP)*dw/dP^2 )^2
Multiplying by dP^4:
( (h*dP)*dP - h*dP^2 )^2 = ( w0*dP^2 + (h*dP)*dw )^2
Inserting w0 and dw:
( (h*dP)*dP - h*dP^2 )^2 = ( r0*dP^2 + (h*dP)*dr )^2 / (1-dr^2/dP^2)
( (h*dP)*dP - h*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (h*dP)*dr )^2
Now one can insert the definition of h, factor out, and presort by t:
( ((O + t*dO)*dP)*dP - (O + t*dO)*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + ((O + t*dO)*dP)*dr )^2
( (O*dP)*dP-O*dP^2 + t*( (dO*dP)*dP - dO*dP^2 ) )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (O*dP)*dr + t*(dO*dP)*dr )^2
Factoring out further and sorting by t^2, t^1 and t^0 yields:
0 = t^2 * [ ((dO*dP)*dP - dO-dP^2)^2 * (dP^2 - dr^2) - dP^2*(dO*dP)^2*dr^2 ]
+ 2*t^1 * [ ((O*dP)*dP - O*dP^2) * ((dO*dP)*dP - dO*dP^2) * (dP^2 - dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)*(dO*dP)*dr ]
+ t^0 * [ ( (O*dP)*dP - O*dP^2)^2 * (dP^2-dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)^2 ]
This can be simplified to:
0 = t^2 * [ (dP^2 - dr^2)*dO^2 - (dO*dP)^2 ]
+ 2*t^1 * [ (dP^2 - dr^2)*(O*dO) - (dO*dP)*(O*dP + r0*dr) ]
+ t^0 * [ (dP^2 - dr^2)*O^2 - (O*dP)^2 - r0^2*dP^2 - 2.0f*r0*dr*(O*dP) ]
Solving this quadratic equation yields the values for t at which the
ray intersects the cone.
*/
__forceinline bool intersectCone(vbool<M>& valid, vfloat<M>& lower, vfloat<M>& upper)
{
/* return no hit by default */
lower = pos_inf;
upper = neg_inf;
/* compute quadratic equation A*t^2 + B*t + C = 0 */
const vfloat<M> OO = dot(O,O);
const vfloat<M> OdO = dot(dO,O);
const vfloat<M> A = g * dOdO - sqr(dOdP);
const vfloat<M> B = 2.0f * (g*OdO - dOdP*yp);
const vfloat<M> C = g*OO - sqr(OdP) - sqr_r0*dPdP - 2.0f*r0dr*OdP;
/* we miss the cone if determinant is smaller than zero */
const vfloat<M> D = B*B - 4.0f*A*C;
valid &= (D >= 0.0f & g > 0.0f); // if g <= 0 then the cone is inside a sphere end
/* When rays are parallel to the cone surface, then the
* ray may be inside or outside the cone. We just assume a
* miss in that case, which is fine as rays inside the
* cone would anyway hit the ending spheres in that
* case. */
valid &= abs(A) > min_rcp_input;
if (unlikely(none(valid))) {
return false;
}
/* compute distance to front and back hit */
const vfloat<M> Q = sqrt(D);
const vfloat<M> rcp_2A = rcp(2.0f*A);
t_cone_front = (-B-Q)*rcp_2A;
y_cone_front = yp + t_cone_front*dOdP;
lower = select( (y_cone_front > -(float)ulp) & (y_cone_front <= g) & (g > 0.0f), t_cone_front, vfloat<M>(pos_inf));
#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
t_cone_back = (-B+Q)*rcp_2A;
y_cone_back = yp + t_cone_back *dOdP;
upper = select( (y_cone_back > -(float)ulp) & (y_cone_back <= g) & (g > 0.0f), t_cone_back , vfloat<M>(neg_inf));
#endif
return true;
}
/*
This function intersects the ray with the end sphere at
p1. We already clip away hits that are inside the
neighboring cone segment.
*/
__forceinline void intersectEndSphere(vbool<M>& valid,
const ConeGeometry<M>& coneR,
vfloat<M>& lower, vfloat<M>& upper)
{
/* calculate front and back hit with end sphere */
const Vec3vf<M> O1 = org - p1;
const vfloat<M> O1dO = dot(O1,dO);
const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r1));
const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) );
/* clip away front hit if it is inside next cone segment */
t_sph1_front = (-O1dO - rhs1)*rcp_dOdO;
const Vec3vf<M> hit_front = org + t_sph1_front*dO;
vbool<M> valid_sph1_front = h2 >= 0.0f & yp + t_sph1_front*dOdP > g & !coneR.isClippedByPlane (valid, hit_front);
lower = select(valid_sph1_front, t_sph1_front, vfloat<M>(pos_inf));
#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
/* clip away back hit if it is inside next cone segment */
t_sph1_back = (-O1dO + rhs1)*rcp_dOdO;
const Vec3vf<M> hit_back = org + t_sph1_back*dO;
vbool<M> valid_sph1_back = h2 >= 0.0f & yp + t_sph1_back*dOdP > g & !coneR.isClippedByPlane (valid, hit_back);
upper = select(valid_sph1_back, t_sph1_back, vfloat<M>(neg_inf));
#else
upper = vfloat<M>(neg_inf);
#endif
}
__forceinline void intersectBeginSphere(const vbool<M>& valid,
vfloat<M>& lower, vfloat<M>& upper)
{
/* calculate front and back hit with end sphere */
const Vec3vf<M> O1 = org - p0;
const vfloat<M> O1dO = dot(O1,dO);
const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r0));
const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) );
/* clip away front hit if it is inside next cone segment */
t_sph0_front = (-O1dO - rhs1)*rcp_dOdO;
vbool<M> valid_sph1_front = valid & h2 >= 0.0f & yp + t_sph0_front*dOdP < 0;
lower = select(valid_sph1_front, t_sph0_front, vfloat<M>(pos_inf));
#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
/* clip away back hit if it is inside next cone segment */
t_sph0_back = (-O1dO + rhs1)*rcp_dOdO;
vbool<M> valid_sph1_back = valid & h2 >= 0.0f & yp + t_sph0_back*dOdP < 0;
upper = select(valid_sph1_back, t_sph0_back, vfloat<M>(neg_inf));
#else
upper = vfloat<M>(neg_inf);
#endif
}
/*
This function calculates the geometry normal of some cone hit.
For a given hit point h (relative to p0) with a cone
starting at p0 with radius w0 and ending at p1 with
radius w1 one normally calculates the geometry normal by
first calculating the parmetric u hit location along the
cone:
u = dot(h,dP)/dP^2
Using this value one can now directly calculate the
geometry normal by bending the connection vector (h-u*dP)
from hit to projected hit with some cone dependent value
dw/sqrt(dP^2) * normalize(dP):
Ng = normalize(h-u*dP) - dw/length(dP) * normalize(dP)
The length of the vector (h-u*dP) can also get calculated
by interpolating the radii as w0+u*dw which yields:
Ng = (h-u*dP)/(w0+u*dw) - dw/dP^2 * dP
Multiplying with (w0+u*dw) yield a scaled Ng':
Ng' = (h-u*dP) - (w0+u*dw)*dw/dP^2*dP
Inserting the definition of w0 and dw and refactoring
yield a further scaled Ng'':
Ng'' = (dP^2 - dr^2) (h-q) - (r0+u*dr)*dr*dP
Now inserting the definition of u gives and multiplying
with the denominator yields:
Ng''' = (dP^2-dr^2)*(dP^2*h-dot(h,dP)*dP) - (dP^2*r0+dot(h,dP)*dr)*dr*dP
Factoring out, cancelling terms, dividing by dP^2, and
factoring again yields finally:
Ng'''' = (dP^2-dr^2)*h - dP*(dot(h,dP) + r0*dr)
*/
__forceinline Vec3vf<M> Ng_cone(const vbool<M>& front_hit) const
{
#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back);
const vfloat<M> t = select(front_hit, t_cone_front, t_cone_back);
const Vec3vf<M> h = O + t*dO;
return g*h-dP*y;
#else
const Vec3vf<M> h = O + t_cone_front*dO;
return g*h-dP*y_cone_front;
#endif
}
/* compute geometry normal of sphere hit as the difference
* vector from hit point to sphere center */
__forceinline Vec3vf<M> Ng_sphere1(const vbool<M>& front_hit) const
{
#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
const vfloat<M> t_sph1 = select(front_hit, t_sph1_front, t_sph1_back);
return org+t_sph1*dO-p1;
#else
return org+t_sph1_front*dO-p1;
#endif
}
__forceinline Vec3vf<M> Ng_sphere0(const vbool<M>& front_hit) const
{
#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
const vfloat<M> t_sph0 = select(front_hit, t_sph0_front, t_sph0_back);
return org+t_sph0*dO-p0;
#else
return org+t_sph0_front*dO-p0;
#endif
}
/*
This function calculates the u coordinate of a
hit. Therefore we use the hit distance y (which is zero
at the first cone clipping plane) and divide by distance
g between the clipping planes.
*/
__forceinline vfloat<M> u_cone(const vbool<M>& front_hit) const
{
#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back);
return clamp(y*rcp(g));
#else
return clamp(y_cone_front*rcp(g));
#endif
}
private:
Vec3vf<M> org;
Vec3vf<M> O;
Vec3vf<M> dO;
vfloat<M> dOdO;
vfloat<M> rcp_dOdO;
vfloat<M> OdP;
vfloat<M> dOdP;
/* for ray/cone intersection */
private:
vfloat<M> yp;
vfloat<M> y_cone_front;
vfloat<M> t_cone_front;
#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
vfloat<M> y_cone_back;
vfloat<M> t_cone_back;
#endif
/* for ray/sphere intersection */
private:
vfloat<M> t_sph1_front;
vfloat<M> t_sph0_front;
#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
vfloat<M> t_sph1_back;
vfloat<M> t_sph0_back;
#endif
};
template<int M, typename Epilog, typename ray_tfar_func>
static __forceinline bool intersectConeSphere(const vbool<M>& valid_i,
const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir,
const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar,
const Vec4vf<M>& v0, const Vec4vf<M>& v1,
const Vec4vf<M>& vL, const Vec4vf<M>& vR,
const Epilog& epilog)
{
vbool<M> valid = valid_i;
/* move ray origin closer to make calculations numerically stable */
const vfloat<M> dOdO = sqr(ray_dir);
const vfloat<M> rcp_dOdO = rcp(dOdO);
const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz());
const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO;
const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir;
/* intersect with cone from v0 to v1 */
vfloat<M> t_cone_lower, t_cone_upper;
ConeGeometryIntersector<M> cone (ray_org, ray_dir, dOdO, rcp_dOdO, v0, v1);
vbool<M> validCone = valid;
cone.intersectCone(validCone, t_cone_lower, t_cone_upper);
valid &= (validCone | (cone.g <= 0.0f)); // if cone is entirely in sphere end - check sphere
if (unlikely(none(valid)))
return false;
/* cone hits inside the neighboring capped cones are inside the geometry and thus ignored */
const ConeGeometry<M> coneL (v0, vL);
const ConeGeometry<M> coneR (v1, vR);
#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
const Vec3vf<M> hit_lower = ray_org + t_cone_lower*ray_dir;
const Vec3vf<M> hit_upper = ray_org + t_cone_upper*ray_dir;
t_cone_lower = select (!coneL.isInsideCappedCone (validCone, hit_lower) & !coneR.isInsideCappedCone (validCone, hit_lower), t_cone_lower, vfloat<M>(pos_inf));
t_cone_upper = select (!coneL.isInsideCappedCone (validCone, hit_upper) & !coneR.isInsideCappedCone (validCone, hit_upper), t_cone_upper, vfloat<M>(neg_inf));
#endif
/* intersect ending sphere */
vfloat<M> t_sph1_lower, t_sph1_upper;
vfloat<M> t_sph0_lower = vfloat<M>(pos_inf);
vfloat<M> t_sph0_upper = vfloat<M>(neg_inf);
cone.intersectEndSphere(valid, coneR, t_sph1_lower, t_sph1_upper);
const vbool<M> isBeginPoint = valid & (vL[0] == vfloat<M>(pos_inf));
if (unlikely(any(isBeginPoint))) {
cone.intersectBeginSphere (isBeginPoint, t_sph0_lower, t_sph0_upper);
}
/* CSG union of cone and end sphere */
vfloat<M> t_sph_lower = min(t_sph0_lower, t_sph1_lower);
vfloat<M> t_cone_sphere_lower = min(t_cone_lower, t_sph_lower);
#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
vfloat<M> t_sph_upper = max(t_sph0_upper, t_sph1_upper);
vfloat<M> t_cone_sphere_upper = max(t_cone_upper, t_sph_upper);
/* filter out hits that are not in tnear/tfar range */
const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf);
const vbool<M> valid_upper = valid & ray_tnear <= dt+t_cone_sphere_upper & dt+t_cone_sphere_upper <= ray_tfar() & t_cone_sphere_upper != vfloat<M>(neg_inf);
/* check if there is a first hit */
const vbool<M> valid_first = valid_lower | valid_upper;
if (unlikely(none(valid_first)))
return false;
/* construct first hit */
const vfloat<M> t_first = select(valid_lower, t_cone_sphere_lower, t_cone_sphere_upper);
const vbool<M> cone_hit_first = t_first == t_cone_lower | t_first == t_cone_upper;
const vbool<M> sph0_hit_first = t_first == t_sph0_lower | t_first == t_sph0_upper;
const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower)));
const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
/* invoke intersection filter for first hit */
RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first);
const bool is_hit_first = epilog(valid_first, hit);
/* check for possible second hits before potentially accepted hit */
const vfloat<M> t_second = t_cone_sphere_upper;
const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_cone_sphere_upper <= ray_tfar());
if (unlikely(none(valid_second)))
return is_hit_first;
/* invoke intersection filter for second hit */
const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper;
const vbool<M> sph0_hit_second = t_second == t_sph0_lower | t_second == t_sph0_upper;
const Vec3vf<M> Ng_second = select(cone_hit_second, cone.Ng_cone(false), select (sph0_hit_second, cone.Ng_sphere0(false), cone.Ng_sphere1(false)));
const vfloat<M> u_second = select(cone_hit_second, cone.u_cone(false), select (sph0_hit_second, vfloat<M>(zero), vfloat<M>(one)));
hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second);
const bool is_hit_second = epilog(valid_second, hit);
return is_hit_first | is_hit_second;
#else
/* filter out hits that are not in tnear/tfar range */
const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf);
/* check if there is a valid hit */
if (unlikely(none(valid_lower)))
return false;
/* construct first hit */
const vbool<M> cone_hit_first = t_cone_sphere_lower == t_cone_lower | t_cone_sphere_lower == t_cone_upper;
const vbool<M> sph0_hit_first = t_cone_sphere_lower == t_sph0_lower | t_cone_sphere_lower == t_sph0_upper;
const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower)));
const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
/* invoke intersection filter for first hit */
RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_cone_sphere_lower,Ng_first);
const bool is_hit_first = epilog(valid_lower, hit);
return is_hit_first;
#endif
}
} // end namespace __roundline_internal
template<int M>
struct RoundLinearCurveIntersector1
{
typedef CurvePrecalculations1 Precalculations;
template<typename Ray>
struct ray_tfar {
Ray& ray;
__forceinline ray_tfar(Ray& ray) : ray(ray) {}
__forceinline vfloat<M> operator() () const { return ray.tfar; };
};
template<typename Ray, typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
Ray& ray,
RayQueryContext* context,
const LineSegments* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
const Vec4vf<M>& vLi, const Vec4vf<M>& vRi,
const Epilog& epilog)
{
const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
const vfloat<M> ray_tnear(ray.tnear());
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i);
const Vec4vf<M> vL = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vLi);
const Vec4vf<M> vR = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vRi);
return __roundline_internal::intersectConeSphere<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar<Ray>(ray),v0,v1,vL,vR,epilog);
}
};
template<int M, int K>
struct RoundLinearCurveIntersectorK
{
typedef CurvePrecalculationsK<K> Precalculations;
struct ray_tfar {
RayK<K>& ray;
size_t k;
__forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {}
__forceinline vfloat<M> operator() () const { return ray.tfar[k]; };
};
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
RayK<K>& ray, size_t k,
RayQueryContext* context,
const LineSegments* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
const Vec4vf<M>& vLi, const Vec4vf<M>& vRi,
const Epilog& epilog)
{
const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
const vfloat<M> ray_tnear = ray.tnear()[k];
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i);
const Vec4vf<M> vL = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vLi);
const Vec4vf<M> vR = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vRi);
return __roundline_internal::intersectConeSphere<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,vL,vR,epilog);
}
};
}
}

View File

@@ -0,0 +1,123 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "roundline_intersector.h"
#include "intersector_epilog.h"
namespace embree
{
namespace isa
{
template<int M, bool filter>
struct RoundLinearCurveMiIntersector1
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
const vbool<M> valid = line.valid();
RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
const vbool<M> valid = line.valid();
return RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
}
};
template<int M, bool filter>
struct RoundLinearCurveMiMBIntersector1
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time());
const vbool<M> valid = line.valid();
RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time());
const vbool<M> valid = line.valid();
return RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
}
};
template<int M, int K, bool filter>
struct RoundLinearCurveMiIntersectorK
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
const vbool<M> valid = line.valid();
RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
const vbool<M> valid = line.valid();
return RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
};
template<int M, int K, bool filter>
struct RoundLinearCurveMiMBIntersectorK
{
typedef LineMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(normal.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]);
const vbool<M> valid = line.valid();
RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& line)
{
STAT3(shadow.trav_prims,1,1,1);
const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]);
const vbool<M> valid = line.valid();
return RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID()));
}
};
}
}

View File

@@ -0,0 +1,235 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "../common/scene_points.h"
#include "curve_intersector_precalculations.h"
namespace embree
{
namespace isa
{
template<int M>
struct SphereIntersectorHitM
{
__forceinline SphereIntersectorHitM() {}
__forceinline SphereIntersectorHitM(const vfloat<M>& t, const Vec3vf<M>& Ng)
: vt(t), vNg(Ng) {}
__forceinline void finalize() {}
__forceinline Vec2f uv(const size_t i) const {
return Vec2f(0.0f, 0.0f);
}
__forceinline Vec2vf<M> uv() const {
return Vec2vf<M>(0.0f, 0.0f);
}
__forceinline float t(const size_t i) const {
return vt[i];
}
__forceinline vfloat<M> t() const {
return vt;
}
__forceinline Vec3fa Ng(const size_t i) const {
return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]);
}
__forceinline Vec3vf<M> Ng() const {
return vNg;
}
public:
vfloat<M> vt;
Vec3vf<M> vNg;
};
template<>
struct SphereIntersectorHitM<1>
{
__forceinline SphereIntersectorHitM() {}
__forceinline SphereIntersectorHitM(const float& t, const Vec3f& Ng)
: vt(t), vNg(Ng) {}
__forceinline void finalize() {}
__forceinline Vec2f uv() const {
return Vec2f(0.0f, 0.0f);
}
__forceinline float t() const {
return vt;
}
__forceinline Vec3f Ng() const {
return vNg;
}
public:
float vt;
Vec3f vNg;
};
template<int M>
struct SphereIntersector1
{
typedef CurvePrecalculations1 Precalculations;
template<typename Ray, typename Epilog>
static __forceinline bool intersect(
const vbool<M>& valid_i, Ray& ray,
const Precalculations& pre, const Vec4vf<M>& v0, const Epilog& epilog)
{
vbool<M> valid = valid_i;
const vfloat<M> rd2 = rcp(dot(ray.dir, ray.dir));
const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
const Vec3vf<M> center = v0.xyz();
const vfloat<M> radius = v0.w;
const Vec3vf<M> c0 = center - ray_org;
const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
const Vec3vf<M> perp = c0 - projC0 * ray_dir;
const vfloat<M> l2 = dot(perp, perp);
const vfloat<M> r2 = radius * radius;
valid &= (l2 <= r2);
if (unlikely(none(valid)))
return false;
const vfloat<M> td = sqrt((r2 - l2) * rd2);
const vfloat<M> t_front = projC0 - td;
const vfloat<M> t_back = projC0 + td;
const vbool<M> valid_front = valid & (ray.tnear() <= t_front) & (t_front <= ray.tfar);
/* check if there is a first hit */
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
/* check if there is a first hit */
const vbool<M> valid_first = valid_front;
#else
const vbool<M> valid_back = valid & (ray.tnear() <= t_back ) & (t_back <= ray.tfar);
const vbool<M> valid_first = valid_front | valid_back;
#endif
if (unlikely(none(valid_first)))
return false;
/* construct first hit */
const vfloat<M> td_front = -td;
const vfloat<M> td_back = +td;
const vfloat<M> t_first = select(valid_front, t_front, t_back);
const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp;
SphereIntersectorHitM<M> hit(t_first, Ng_first);
/* invoke intersection filter for first hit */
const bool is_hit_first = epilog(valid_first, hit);
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
return is_hit_first;
#else
/* check for possible second hits before potentially accepted hit */
const vfloat<M> t_second = t_back;
const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar);
if (unlikely(none(valid_second)))
return is_hit_first;
/* invoke intersection filter for second hit */
const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
hit = SphereIntersectorHitM<M> (t_second, Ng_second);
const bool is_hit_second = epilog(valid_second, hit);
return is_hit_first | is_hit_second;
#endif
}
template<typename Epilog>
static __forceinline bool intersect(
const vbool<M>& valid_i, Ray& ray, RayQueryContext* context, const Points* geom,
const Precalculations& pre, const Vec4vf<M>& v0i, const Epilog& epilog)
{
const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
return intersect(valid_i,ray,pre,v0,epilog);
}
};
template<int M, int K>
struct SphereIntersectorK
{
typedef CurvePrecalculationsK<K> Precalculations;
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid_i,
RayK<K>& ray, size_t k,
RayQueryContext* context,
const Points* geom,
const Precalculations& pre,
const Vec4vf<M>& v0i,
const Epilog& epilog)
{
vbool<M> valid = valid_i;
const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i);
const Vec3vf<M> center = v0.xyz();
const vfloat<M> radius = v0.w;
const Vec3vf<M> c0 = center - ray_org;
const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
const Vec3vf<M> perp = c0 - projC0 * ray_dir;
const vfloat<M> l2 = dot(perp, perp);
const vfloat<M> r2 = radius * radius;
valid &= (l2 <= r2);
if (unlikely(none(valid)))
return false;
const vfloat<M> td = sqrt((r2 - l2) * rd2);
const vfloat<M> t_front = projC0 - td;
const vfloat<M> t_back = projC0 + td;
const vbool<M> valid_front = valid & (ray.tnear()[k] <= t_front) & (t_front <= ray.tfar[k]);
/* check if there is a first hit */
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
const vbool<M> valid_first = valid_front;
#else
const vbool<M> valid_back = valid & (ray.tnear()[k] <= t_back ) & (t_back <= ray.tfar[k]);
const vbool<M> valid_first = valid_front | valid_back;
#endif
if (unlikely(none(valid_first)))
return false;
/* construct first hit */
const vfloat<M> td_front = -td;
const vfloat<M> td_back = +td;
const vfloat<M> t_first = select(valid_front, t_front, t_back);
const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp;
SphereIntersectorHitM<M> hit(t_first, Ng_first);
/* invoke intersection filter for first hit */
const bool is_hit_first = epilog(valid_first, hit);
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
return is_hit_first;
#else
/* check for possible second hits before potentially accepted hit */
const vfloat<M> t_second = t_back;
const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar[k]);
if (unlikely(none(valid_second)))
return is_hit_first;
/* invoke intersection filter for second hit */
const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
hit = SphereIntersectorHitM<M> (t_second, Ng_second);
const bool is_hit_second = epilog(valid_second, hit);
return is_hit_first | is_hit_second;
#endif
}
};
} // namespace isa
} // namespace embree

View File

@@ -0,0 +1,156 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "intersector_epilog.h"
#include "pointi.h"
#include "sphere_intersector.h"
namespace embree
{
namespace isa
{
template<int M, bool filter>
struct SphereMiIntersector1
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre,
RayHit& ray,
RayQueryContext* context,
const Primitive& sphere)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom);
const vbool<M> valid = sphere.valid();
SphereIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID()));
}
static __forceinline bool occluded(const Precalculations& pre,
Ray& ray,
RayQueryContext* context,
const Primitive& sphere)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom);
const vbool<M> valid = sphere.valid();
return SphereIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID()));
}
static __forceinline bool pointQuery(PointQuery* query,
PointQueryContext* context,
const Primitive& sphere)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere);
}
};
template<int M, bool filter>
struct SphereMiMBIntersector1
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculations1 Precalculations;
static __forceinline void intersect(const Precalculations& pre,
RayHit& ray,
RayQueryContext* context,
const Primitive& sphere)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom, ray.time());
const vbool<M> valid = sphere.valid();
SphereIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID()));
}
static __forceinline bool occluded(const Precalculations& pre,
Ray& ray,
RayQueryContext* context,
const Primitive& sphere)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom, ray.time());
const vbool<M> valid = sphere.valid();
return SphereIntersector1<M>::intersect(
valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID()));
}
static __forceinline bool pointQuery(PointQuery* query,
PointQueryContext* context,
const Primitive& sphere)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere);
}
};
template<int M, int K, bool filter>
struct SphereMiIntersectorK
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(
const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& sphere)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom);
const vbool<M> valid = sphere.valid();
SphereIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0,
Intersect1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
}
static __forceinline bool occluded(
const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& sphere)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom);
const vbool<M> valid = sphere.valid();
return SphereIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0,
Occluded1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
}
};
template<int M, int K, bool filter>
struct SphereMiMBIntersectorK
{
typedef PointMi<M> Primitive;
typedef CurvePrecalculationsK<K> Precalculations;
static __forceinline void intersect(
const Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& sphere)
{
STAT3(normal.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]);
const vbool<M> valid = sphere.valid();
SphereIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0,
Intersect1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
}
static __forceinline bool occluded(
const Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& sphere)
{
STAT3(shadow.trav_prims, 1, 1, 1);
const Points* geom = context->scene->get<Points>(sphere.geomID());
Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]);
const vbool<M> valid = sphere.valid();
return SphereIntersectorK<M, K>::intersect(
valid, ray, k, context, geom, pre, v0,
Occluded1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
}
};
} // namespace isa
} // namespace embree

View File

@@ -0,0 +1,38 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../geometry/primitive.h"
#include "../subdiv/subdivpatch1base.h"
namespace embree
{
struct __aligned(64) SubdivPatch1 : public SubdivPatch1Base
{
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/*! constructor for cached subdiv patch */
SubdivPatch1 (const unsigned int gID,
const unsigned int pID,
const unsigned int subPatch,
const SubdivMesh *const mesh,
const size_t time,
const Vec2f uv[4],
const float edge_level[4],
const int subdiv[4],
const int simd_width)
: SubdivPatch1Base(gID,pID,subPatch,mesh,time,uv,edge_level,subdiv,simd_width) {}
};
}

View File

@@ -0,0 +1,237 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "subdivpatch1.h"
#include "grid_soa.h"
#include "grid_soa_intersector1.h"
#include "grid_soa_intersector_packet.h"
#include "../common/ray.h"
namespace embree
{
namespace isa
{
template<typename T>
class SubdivPatch1Precalculations : public T
{
public:
__forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr)
: T(ray,ptr) {}
};
template<int K, typename T>
class SubdivPatch1PrecalculationsK : public T
{
public:
__forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray)
: T(valid,ray) {}
};
class SubdivPatch1Intersector1
{
public:
typedef GridSOA Primitive;
typedef SubdivPatch1Precalculations<GridSOAIntersector1::Precalculations> Precalculations;
static __forceinline bool processLazyNode(Precalculations& pre, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
lazy_node = prim->root(0);
pre.grid = (Primitive*)prim;
return false;
}
/*! Intersect a ray with the primitive. */
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node);
else processLazyNode(pre,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) {
intersect(This,pre,ray,context,prim,ty,tray,lazy_node);
}
/*! Test if the ray is occluded by the primitive */
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) return GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node);
else return processLazyNode(pre,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) {
return occluded(This,pre,ray,context,prim,ty,tray,lazy_node);
}
template<int N>
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node)
{
// TODO: PointQuery implement
assert(false && "not implemented");
return false;
}
template<int N>
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) {
return pointQuery(This,query,context,prim,ty,tquery,lazy_node);
}
};
class SubdivPatch1MBIntersector1
{
public:
typedef SubdivPatch1 Primitive;
typedef GridSOAMBIntersector1::Precalculations Precalculations;
static __forceinline bool processLazyNode(Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim_i, size_t& lazy_node)
{
Primitive* prim = (Primitive*) prim_i;
GridSOA* grid = nullptr;
grid = (GridSOA*) prim->root_ref.get();
pre.itime = getTimeSegment(ray.time(), float(grid->time_steps-1), pre.ftime);
lazy_node = grid->root(pre.itime);
pre.grid = grid;
return false;
}
/*! Intersect a ray with the primitive. */
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) GridSOAMBIntersector1::intersect(pre,ray,context,prim,lazy_node);
else processLazyNode(pre,ray,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) {
intersect(This,pre,ray,context,prim,ty,tray,lazy_node);
}
/*! Test if the ray is occluded by the primitive */
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) return GridSOAMBIntersector1::occluded(pre,ray,context,prim,lazy_node);
else return processLazyNode(pre,ray,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) {
return occluded(This,pre,ray,context,prim,ty,tray,lazy_node);
}
template<int N>
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node)
{
// TODO: PointQuery implement
assert(false && "not implemented");
return false;
}
template<int N, bool robust>
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) {
return pointQuery(This,query,context,prim,ty,tquery,lazy_node);
}
};
template <int K>
struct SubdivPatch1IntersectorK
{
typedef GridSOA Primitive;
typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations;
static __forceinline bool processLazyNode(Precalculations& pre, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
{
lazy_node = prim->root(0);
pre.grid = (Primitive*)prim;
return false;
}
template<bool robust>
static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
else processLazyNode(pre,context,prim,lazy_node);
}
template<bool robust>
static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
else return processLazyNode(pre,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
else processLazyNode(pre,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
else return processLazyNode(pre,context,prim,lazy_node);
}
};
typedef SubdivPatch1IntersectorK<4> SubdivPatch1Intersector4;
typedef SubdivPatch1IntersectorK<8> SubdivPatch1Intersector8;
typedef SubdivPatch1IntersectorK<16> SubdivPatch1Intersector16;
template <int K>
struct SubdivPatch1MBIntersectorK
{
typedef SubdivPatch1 Primitive;
//typedef GridSOAMBIntersectorK<K>::Precalculations Precalculations;
typedef SubdivPatch1PrecalculationsK<K,typename GridSOAMBIntersectorK<K>::Precalculations> Precalculations;
static __forceinline bool processLazyNode(Precalculations& pre, RayQueryContext* context, const Primitive* prim_i, size_t& lazy_node)
{
Primitive* prim = (Primitive*) prim_i;
GridSOA* grid = (GridSOA*) prim->root_ref.get();
lazy_node = grid->troot;
pre.grid = grid;
return false;
}
template<bool robust>
static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
else processLazyNode(pre,context,prim,lazy_node);
}
template<bool robust>
static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
else return processLazyNode(pre,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
else processLazyNode(pre,context,prim,lazy_node);
}
template<int N, bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node)
{
if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
else return processLazyNode(pre,context,prim,lazy_node);
}
};
typedef SubdivPatch1MBIntersectorK<4> SubdivPatch1MBIntersector4;
typedef SubdivPatch1MBIntersectorK<8> SubdivPatch1MBIntersector8;
typedef SubdivPatch1MBIntersectorK<16> SubdivPatch1MBIntersector16;
}
}

View File

@@ -0,0 +1,520 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/ray.h"
#include "../common/scene_grid_mesh.h"
#include "../bvh/bvh.h"
namespace embree
{
/* Stores M quads from an indexed face set */
struct SubGrid
{
/* Virtual interface to query information about the quad type */
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored quads */
static __forceinline size_t max_size() { return 1; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
public:
/* Default constructor */
__forceinline SubGrid() { }
/* Construction from vertices and IDs */
__forceinline SubGrid(const unsigned int x,
const unsigned int y,
const unsigned int geomID,
const unsigned int primID)
: _x(x), _y(y), _geomID(geomID), _primID(primID)
{
}
__forceinline bool invalid3x3X() const { return (unsigned int)_x & (1<<15); }
__forceinline bool invalid3x3Y() const { return (unsigned int)_y & (1<<15); }
/* Gather the quads */
__forceinline void gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
Vec3vf4& p3,
const GridMesh* const mesh,
const GridMesh::Grid &g) const
{
/* first quad always valid */
const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
const size_t vtxID01 = vtxID00 + 1;
const vfloat4 vtx00 = vfloat4::loadu(mesh->vertexPtr(vtxID00));
const vfloat4 vtx01 = vfloat4::loadu(mesh->vertexPtr(vtxID01));
const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
const vfloat4 vtx10 = vfloat4::loadu(mesh->vertexPtr(vtxID10));
const vfloat4 vtx11 = vfloat4::loadu(mesh->vertexPtr(vtxID11));
/* deltaX => vtx02, vtx12 */
const size_t deltaX = invalid3x3X() ? 0 : 1;
const size_t vtxID02 = vtxID01 + deltaX;
const vfloat4 vtx02 = vfloat4::loadu(mesh->vertexPtr(vtxID02));
const size_t vtxID12 = vtxID11 + deltaX;
const vfloat4 vtx12 = vfloat4::loadu(mesh->vertexPtr(vtxID12));
/* deltaY => vtx20, vtx21 */
const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
const size_t vtxID20 = vtxID10 + deltaY;
const size_t vtxID21 = vtxID11 + deltaY;
const vfloat4 vtx20 = vfloat4::loadu(mesh->vertexPtr(vtxID20));
const vfloat4 vtx21 = vfloat4::loadu(mesh->vertexPtr(vtxID21));
/* deltaX/deltaY => vtx22 */
const size_t vtxID22 = vtxID11 + deltaX + deltaY;
const vfloat4 vtx22 = vfloat4::loadu(mesh->vertexPtr(vtxID22));
transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z);
transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z);
transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z);
transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z);
}
template<typename T>
__forceinline vfloat4 getVertexMB(const GridMesh* const mesh, const size_t offset, const size_t itime, const float ftime) const
{
const T v0 = T::loadu(mesh->vertexPtr(offset,itime+0));
const T v1 = T::loadu(mesh->vertexPtr(offset,itime+1));
return lerp(v0,v1,ftime);
}
/* Gather the quads */
__forceinline void gatherMB(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
Vec3vf4& p3,
const GridMesh* const mesh,
const GridMesh::Grid &g,
const size_t itime,
const float ftime) const
{
/* first quad always valid */
const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
const size_t vtxID01 = vtxID00 + 1;
const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime);
const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime);
const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime);
const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime);
/* deltaX => vtx02, vtx12 */
const size_t deltaX = invalid3x3X() ? 0 : 1;
const size_t vtxID02 = vtxID01 + deltaX;
const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime);
const size_t vtxID12 = vtxID11 + deltaX;
const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime);
/* deltaY => vtx20, vtx21 */
const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
const size_t vtxID20 = vtxID10 + deltaY;
const size_t vtxID21 = vtxID11 + deltaY;
const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime);
const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime);
/* deltaX/deltaY => vtx22 */
const size_t vtxID22 = vtxID11 + deltaX + deltaY;
const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime);
transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z);
transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z);
transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z);
transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z);
}
/* Gather the quads */
__forceinline void gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
Vec3vf4& p3,
const Scene *const scene) const
{
const GridMesh* const mesh = scene->get<GridMesh>(geomID());
const GridMesh::Grid &g = mesh->grid(primID());
gather(p0,p1,p2,p3,mesh,g);
}
/* Gather the quads in the motion blur case */
__forceinline void gatherMB(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
Vec3vf4& p3,
const Scene *const scene,
const size_t itime,
const float ftime) const
{
const GridMesh* const mesh = scene->get<GridMesh>(geomID());
const GridMesh::Grid &g = mesh->grid(primID());
gatherMB(p0,p1,p2,p3,mesh,g,itime,ftime);
}
/* Gather the quads */
__forceinline void gather(Vec3fa vtx[16], const Scene *const scene) const
{
const GridMesh* mesh = scene->get<GridMesh>(geomID());
const GridMesh::Grid &g = mesh->grid(primID());
/* first quad always valid */
const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
const size_t vtxID01 = vtxID00 + 1;
const Vec3fa vtx00 = Vec3fa::loadu(mesh->vertexPtr(vtxID00));
const Vec3fa vtx01 = Vec3fa::loadu(mesh->vertexPtr(vtxID01));
const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
const Vec3fa vtx10 = Vec3fa::loadu(mesh->vertexPtr(vtxID10));
const Vec3fa vtx11 = Vec3fa::loadu(mesh->vertexPtr(vtxID11));
/* deltaX => vtx02, vtx12 */
const size_t deltaX = invalid3x3X() ? 0 : 1;
const size_t vtxID02 = vtxID01 + deltaX;
const Vec3fa vtx02 = Vec3fa::loadu(mesh->vertexPtr(vtxID02));
const size_t vtxID12 = vtxID11 + deltaX;
const Vec3fa vtx12 = Vec3fa::loadu(mesh->vertexPtr(vtxID12));
/* deltaY => vtx20, vtx21 */
const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
const size_t vtxID20 = vtxID10 + deltaY;
const size_t vtxID21 = vtxID11 + deltaY;
const Vec3fa vtx20 = Vec3fa::loadu(mesh->vertexPtr(vtxID20));
const Vec3fa vtx21 = Vec3fa::loadu(mesh->vertexPtr(vtxID21));
/* deltaX/deltaY => vtx22 */
const size_t vtxID22 = vtxID11 + deltaX + deltaY;
const Vec3fa vtx22 = Vec3fa::loadu(mesh->vertexPtr(vtxID22));
vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10;
vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11;
vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20;
vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21;
}
/* Gather the quads */
__forceinline void gatherMB(vfloat4 vtx[16], const Scene *const scene, const size_t itime, const float ftime) const
{
const GridMesh* mesh = scene->get<GridMesh>(geomID());
const GridMesh::Grid &g = mesh->grid(primID());
/* first quad always valid */
const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
const size_t vtxID01 = vtxID00 + 1;
const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime);
const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime);
const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime);
const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime);
/* deltaX => vtx02, vtx12 */
const size_t deltaX = invalid3x3X() ? 0 : 1;
const size_t vtxID02 = vtxID01 + deltaX;
const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime);
const size_t vtxID12 = vtxID11 + deltaX;
const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime);
/* deltaY => vtx20, vtx21 */
const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
const size_t vtxID20 = vtxID10 + deltaY;
const size_t vtxID21 = vtxID11 + deltaY;
const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime);
const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime);
/* deltaX/deltaY => vtx22 */
const size_t vtxID22 = vtxID11 + deltaX + deltaY;
const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime);
vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10;
vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11;
vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20;
vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21;
}
/* Calculate the bounds of the subgrid */
__forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
{
BBox3fa bounds = empty;
FATAL("not implemented yet");
return bounds;
}
/* Calculate the linear bounds of the primitive */
__forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime)
{
return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
{
LBBox3fa allBounds = empty;
FATAL("not implemented yet");
return allBounds;
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
{
LBBox3fa allBounds = empty;
FATAL("not implemented yet");
return allBounds;
}
friend embree_ostream operator<<(embree_ostream cout, const SubGrid& sg) {
return cout << "SubGrid " << " ( x = " << sg.x() << ", y = " << sg.y() << ", geomID = " << sg.geomID() << ", primID = " << sg.primID() << ", invalid3x3X() " << (int)sg.invalid3x3X() << ", invalid3x3Y() " << (int)sg.invalid3x3Y();
}
__forceinline unsigned int geomID() const { return _geomID; }
__forceinline unsigned int primID() const { return _primID; }
__forceinline unsigned int x() const { return (unsigned int)_x & 0x7fff; }
__forceinline unsigned int y() const { return (unsigned int)_y & 0x7fff; }
private:
unsigned short _x;
unsigned short _y;
unsigned int _geomID; // geometry ID of mesh
unsigned int _primID; // primitive ID of primitive inside mesh
};
struct SubGridID {
unsigned short x;
unsigned short y;
unsigned int primID;
__forceinline SubGridID() {}
__forceinline SubGridID(const unsigned int x, const unsigned int y, const unsigned int primID) :
x(x), y(y), primID(primID) {}
};
/* QuantizedBaseNode as large subgrid leaf */
template<int N>
struct SubGridQBVHN
{
/* Virtual interface to query information about the quad type */
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
__forceinline size_t size() const
{
for (size_t i=0;i<N;i++)
if (primID(i) == -1) return i;
return N;
}
__forceinline void clear() {
for (size_t i=0;i<N;i++)
subgridIDs[i] = SubGridID(0,0,(unsigned int)-1);
qnode.clear();
}
/* Default constructor */
__forceinline SubGridQBVHN() { }
/* Construction from vertices and IDs */
__forceinline SubGridQBVHN(const unsigned int x[N],
const unsigned int y[N],
const unsigned int primID[N],
const BBox3fa * const subGridBounds,
const unsigned int geomID,
const unsigned int items)
{
clear();
_geomID = geomID;
__aligned(64) typename BVHN<N>::AABBNode node;
node.clear();
for (size_t i=0;i<items;i++)
{
subgridIDs[i] = SubGridID(x[i],y[i],primID[i]);
node.setBounds(i,subGridBounds[i]);
}
qnode.init_dim(node);
}
__forceinline unsigned int geomID() const { return _geomID; }
__forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; }
__forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; }
__forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; }
__forceinline SubGrid subgrid(const size_t i) const {
assert(i < N);
assert(primID(i) != -1);
return SubGrid(x(i),y(i),geomID(),primID(i));
}
public:
SubGridID subgridIDs[N];
typename BVHN<N>::QuantizedBaseNode qnode;
unsigned int _geomID; // geometry ID of mesh
friend embree_ostream operator<<(embree_ostream cout, const SubGridQBVHN& sg) {
cout << "SubGridQBVHN " << embree_endl;
for (size_t i=0;i<N;i++)
cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl;
cout << "geomID " << sg._geomID << embree_endl;
cout << "lowerX " << sg.qnode.dequantizeLowerX() << embree_endl;
cout << "upperX " << sg.qnode.dequantizeUpperX() << embree_endl;
cout << "lowerY " << sg.qnode.dequantizeLowerY() << embree_endl;
cout << "upperY " << sg.qnode.dequantizeUpperY() << embree_endl;
cout << "lowerZ " << sg.qnode.dequantizeLowerZ() << embree_endl;
cout << "upperZ " << sg.qnode.dequantizeUpperZ() << embree_endl;
return cout;
}
};
template<int N>
typename SubGridQBVHN<N>::Type SubGridQBVHN<N>::type;
typedef SubGridQBVHN<4> SubGridQBVH4;
typedef SubGridQBVHN<8> SubGridQBVH8;
/* QuantizedBaseNode as large subgrid leaf */
template<int N>
struct SubGridMBQBVHN
{
/* Virtual interface to query information about the quad type */
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
__forceinline size_t size() const
{
for (size_t i=0;i<N;i++)
if (primID(i) == -1) return i;
return N;
}
__forceinline void clear() {
for (size_t i=0;i<N;i++)
subgridIDs[i] = SubGridID(0,0,(unsigned int)-1);
qnode.clear();
}
/* Default constructor */
__forceinline SubGridMBQBVHN() { }
/* Construction from vertices and IDs */
__forceinline SubGridMBQBVHN(const unsigned int x[N],
const unsigned int y[N],
const unsigned int primID[N],
const BBox3fa * const subGridBounds0,
const BBox3fa * const subGridBounds1,
const unsigned int geomID,
const float toffset,
const float tscale,
const unsigned int items)
{
clear();
_geomID = geomID;
time_offset = toffset;
time_scale = tscale;
__aligned(64) typename BVHN<N>::AABBNode node0,node1;
node0.clear();
node1.clear();
for (size_t i=0;i<items;i++)
{
subgridIDs[i] = SubGridID(x[i],y[i],primID[i]);
node0.setBounds(i,subGridBounds0[i]);
node1.setBounds(i,subGridBounds1[i]);
}
qnode.node0.init_dim(node0);
qnode.node1.init_dim(node1);
}
__forceinline unsigned int geomID() const { return _geomID; }
__forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; }
__forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; }
__forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; }
__forceinline SubGrid subgrid(const size_t i) const {
assert(i < N);
assert(primID(i) != -1);
return SubGrid(x(i),y(i),geomID(),primID(i));
}
__forceinline float adjustTime(const float t) const { return time_scale * (t-time_offset); }
template<int K>
__forceinline vfloat<K> adjustTime(const vfloat<K> &t) const { return time_scale * (t-time_offset); }
public:
SubGridID subgridIDs[N];
typename BVHN<N>::QuantizedBaseNodeMB qnode;
float time_offset;
float time_scale;
unsigned int _geomID; // geometry ID of mesh
friend embree_ostream operator<<(embree_ostream cout, const SubGridMBQBVHN& sg) {
cout << "SubGridMBQBVHN " << embree_endl;
for (size_t i=0;i<N;i++)
cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl;
cout << "geomID " << sg._geomID << embree_endl;
cout << "time_offset " << sg.time_offset << embree_endl;
cout << "time_scale " << sg.time_scale << embree_endl;
cout << "lowerX " << sg.qnode.node0.dequantizeLowerX() << embree_endl;
cout << "upperX " << sg.qnode.node0.dequantizeUpperX() << embree_endl;
cout << "lowerY " << sg.qnode.node0.dequantizeLowerY() << embree_endl;
cout << "upperY " << sg.qnode.node0.dequantizeUpperY() << embree_endl;
cout << "lowerZ " << sg.qnode.node0.dequantizeLowerZ() << embree_endl;
cout << "upperZ " << sg.qnode.node0.dequantizeUpperZ() << embree_endl;
cout << "lowerX " << sg.qnode.node1.dequantizeLowerX() << embree_endl;
cout << "upperX " << sg.qnode.node1.dequantizeUpperX() << embree_endl;
cout << "lowerY " << sg.qnode.node1.dequantizeLowerY() << embree_endl;
cout << "upperY " << sg.qnode.node1.dequantizeUpperY() << embree_endl;
cout << "lowerZ " << sg.qnode.node1.dequantizeLowerZ() << embree_endl;
cout << "upperZ " << sg.qnode.node1.dequantizeUpperZ() << embree_endl;
return cout;
}
};
}

View File

@@ -0,0 +1,515 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "subgrid.h"
#include "subgrid_intersector_moeller.h"
#include "subgrid_intersector_pluecker.h"
namespace embree
{
namespace isa
{
// =======================================================================================
// =================================== SubGridIntersectors ===============================
// =======================================================================================
template<int N, bool filter>
struct SubGridIntersector1Moeller
{
typedef SubGridQBVHN<N> Primitive;
typedef SubGridQuadMIntersector1MoellerTrumbore<4,filter> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(normal.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(shadow.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
{
STAT3(point_query.trav_prims,1,1,1);
AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID());
assert(accel);
context->geomID = subgrid.geomID();
context->primID = subgrid.primID();
return accel->pointQuery(query, context);
}
template<bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
#if defined(__AVX__)
STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
#endif
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (unlikely(dist[ID] > ray.tfar)) continue;
intersect(pre,ray,context,prim[i].subgrid(ID));
}
}
}
template<bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (occluded(pre,ray,context,prim[i].subgrid(ID)))
return true;
}
}
return false;
}
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
{
bool changed = false;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask;
if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist);
} else {
mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist);
}
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
changed |= pointQuery(query, context, prim[i].subgrid(ID));
}
}
return changed;
}
};
template<int N, bool filter>
struct SubGridIntersector1Pluecker
{
typedef SubGridQBVHN<N> Primitive;
typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(normal.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(shadow.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
{
STAT3(point_query.trav_prims,1,1,1);
AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID());
context->geomID = subgrid.geomID();
context->primID = subgrid.primID();
return accel->pointQuery(query, context);
}
template<bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
#if defined(__AVX__)
STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
#endif
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (unlikely(dist[ID] > ray.tfar)) continue;
intersect(pre,ray,context,prim[i].subgrid(ID));
}
}
}
template<bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (occluded(pre,ray,context,prim[i].subgrid(ID)))
return true;
}
}
return false;
}
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
{
bool changed = false;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask;
if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist);
} else {
mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist);
}
#if defined(__AVX__)
STAT3(point_query.trav_hit_boxes[popcnt(mask)],1,1,1);
#endif
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
changed |= pointQuery(query, context, prim[i].subgrid(ID));
}
}
return changed;
}
};
template<int N, int K, bool filter>
struct SubGridIntersectorKMoeller
{
typedef SubGridQBVHN<N> Primitive;
typedef SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> Precalculations;
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const SubGrid& subgrid)
{
Vec3fa vtx[16];
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
subgrid.gather(vtx,context->scene);
for (unsigned int i=0; i<4; i++)
{
const Vec3vf<K> p0 = vtx[i*4+0];
const Vec3vf<K> p1 = vtx[i*4+1];
const Vec3vf<K> p2 = vtx[i*4+2];
const Vec3vf<K> p3 = vtx[i*4+3];
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i));
}
}
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const SubGrid& subgrid)
{
vbool<K> valid0 = valid_i;
Vec3fa vtx[16];
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
subgrid.gather(vtx,context->scene);
for (unsigned int i=0; i<4; i++)
{
const Vec3vf<K> p0 = vtx[i*4+0];
const Vec3vf<K> p1 = vtx[i*4+1];
const Vec3vf<K> p2 = vtx[i*4+2];
const Vec3vf<K> p3 = vtx[i*4+3];
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(normal.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(shadow.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
}
template<bool robust>
static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
for (size_t j=0;j<num;j++)
{
size_t m_valid = movemask(prim[j].qnode.validMask());
vfloat<K> dist;
while(m_valid)
{
const size_t i = bscf(m_valid);
if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
intersect(valid,pre,ray,context,prim[j].subgrid(i));
}
}
}
template<bool robust>
static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
vbool<K> valid0 = valid;
for (size_t j=0;j<num;j++)
{
size_t m_valid = movemask(prim[j].qnode.validMask());
vfloat<K> dist;
while(m_valid)
{
const size_t i = bscf(m_valid);
if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
if (none(valid0)) break;
}
}
return !valid0;
}
template<bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (unlikely(dist[ID] > ray.tfar[k])) continue;
intersect(pre,ray,k,context,prim[i].subgrid(ID));
}
}
}
template<bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
return true;
}
}
return false;
}
};
template<int N, int K, bool filter>
struct SubGridIntersectorKPluecker
{
typedef SubGridQBVHN<N> Primitive;
typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations;
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const SubGrid& subgrid)
{
Vec3fa vtx[16];
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
subgrid.gather(vtx,context->scene);
for (unsigned int i=0; i<4; i++)
{
const Vec3vf<K> p0 = vtx[i*4+0];
const Vec3vf<K> p1 = vtx[i*4+1];
const Vec3vf<K> p2 = vtx[i*4+2];
const Vec3vf<K> p3 = vtx[i*4+3];
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i));
}
}
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const SubGrid& subgrid)
{
vbool<K> valid0 = valid_i;
Vec3fa vtx[16];
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
subgrid.gather(vtx,context->scene);
for (unsigned int i=0; i<4; i++)
{
const Vec3vf<K> p0 = vtx[i*4+0];
const Vec3vf<K> p1 = vtx[i*4+1];
const Vec3vf<K> p2 = vtx[i*4+2];
const Vec3vf<K> p3 = vtx[i*4+3];
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
pre.occludedK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(normal.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(shadow.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
}
template<bool robust>
static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
for (size_t j=0;j<num;j++)
{
size_t m_valid = movemask(prim[j].qnode.validMask());
vfloat<K> dist;
while(m_valid)
{
const size_t i = bscf(m_valid);
if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
intersect(valid,pre,ray,context,prim[j].subgrid(i));
}
}
}
template<bool robust>
static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
vbool<K> valid0 = valid;
for (size_t j=0;j<num;j++)
{
size_t m_valid = movemask(prim[j].qnode.validMask());
vfloat<K> dist;
while(m_valid)
{
const size_t i = bscf(m_valid);
if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
if (none(valid0)) break;
}
}
return !valid0;
}
template<bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (unlikely(dist[ID] > ray.tfar[k])) continue;
intersect(pre,ray,k,context,prim[i].subgrid(ID));
}
}
}
template<bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
return true;
}
}
return false;
}
};
}
}

View File

@@ -0,0 +1,382 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "subgrid.h"
#include "quad_intersector_moeller.h"
namespace embree
{
namespace isa
{
/* ----------------------------- */
/* -- single ray intersectors -- */
/* ----------------------------- */
template<int M>
__forceinline void interpolateUV(MoellerTrumboreHitM<M,UVIdentity<M>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY)
{
/* correct U,V interpolation across the entire grid */
const vint<M> sx((int)subgrid.x());
const vint<M> sy((int)subgrid.y());
const vint<M> sxM(sx + stepX);
const vint<M> syM(sy + stepY);
const float inv_resX = rcp((float)((int)g.resX-1));
const float inv_resY = rcp((float)((int)g.resY-1));
hit.U = (hit.U + (vfloat<M>)sxM * hit.absDen) * inv_resX;
hit.V = (hit.V + (vfloat<M>)syM * hit.absDen) * inv_resY;
}
template<int M, bool filter>
struct SubGridQuadMIntersector1MoellerTrumbore;
template<int M, bool filter>
struct SubGridQuadMIntersector1MoellerTrumbore
{
__forceinline SubGridQuadMIntersector1MoellerTrumbore() {}
__forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
__forceinline void intersect(RayHit& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
UVIdentity<M> mapUV;
MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV);
MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
/* intersect first triangle */
if (intersector.intersect(ray,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
/* intersect second triangle */
if (intersector.intersect(ray,v2,v3,v1,mapUV,hit))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
UVIdentity<M> mapUV;
MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV);
MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
/* intersect first triangle */
if (intersector.intersect(ray,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit))
return true;
}
/* intersect second triangle */
if (intersector.intersect(ray,v2,v3,v1,mapUV,hit))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit))
return true;
}
return false;
}
};
#if defined (__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<bool filter>
struct SubGridQuadMIntersector1MoellerTrumbore<4,filter>
{
__forceinline SubGridQuadMIntersector1MoellerTrumbore() {}
__forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
template<typename Epilog>
__forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
UVIdentity<8> mapUV;
MoellerTrumboreHitM<8,UVIdentity<8>> hit(mapUV);
MoellerTrumboreIntersector1<8> intersector(ray,nullptr);
const vbool8 flags(0,0,0,0,1,1,1,1);
if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,mapUV,hit)))
{
/* correct U,V interpolation across the entire grid */
const vfloat8 U = select(flags,hit.absDen - hit.V,hit.U);
const vfloat8 V = select(flags,hit.absDen - hit.U,hit.V);
hit.U = U;
hit.V = V;
hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1));
if (unlikely(epilog(hit.valid,hit)))
return true;
}
return false;
}
__forceinline bool intersect(RayHit& ray, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
}
};
#endif
// ============================================================================================================================
// ============================================================================================================================
// ============================================================================================================================
/* ----------------------------- */
/* -- ray packet intersectors -- */
/* ----------------------------- */
template<int K>
__forceinline void interpolateUV(const vbool<K>& valid, MoellerTrumboreHitK<K,UVIdentity<K>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const unsigned int i)
{
/* correct U,V interpolation across the entire grid */
const unsigned int sx = subgrid.x() + (unsigned int)(i % 2);
const unsigned int sy = subgrid.y() + (unsigned int)(i >>1);
const float inv_resX = rcp((float)(int)(g.resX-1));
const float inv_resY = rcp((float)(int)(g.resY-1));
hit.U = select(valid,(hit.U + vfloat<K>((float)sx) * hit.absDen) * inv_resX,hit.U);
hit.V = select(valid,(hit.V + vfloat<K>((float)sy) * hit.absDen) * inv_resY,hit.V);
}
template<int M, int K, bool filter>
struct SubGridQuadMIntersectorKMoellerTrumboreBase
{
__forceinline SubGridQuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {}
template<typename Epilog>
__forceinline bool intersectK(const vbool<K>& valid,
RayK<K>& ray,
const Vec3vf<K>& v0,
const Vec3vf<K>& v1,
const Vec3vf<K>& v2,
const Vec3vf<K>& v3,
const GridMesh::Grid &g,
const SubGrid &subgrid,
const unsigned int i,
const Epilog& epilog) const
{
UVIdentity<K> mapUV;
MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV);
MoellerTrumboreIntersectorK<M,K> intersector;
const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit);
if (any(valid0))
{
interpolateUV(valid0,hit,g,subgrid,i);
epilog(valid0,hit);
}
const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit);
if (any(valid1))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
interpolateUV(valid1,hit,g,subgrid,i);
epilog(valid1,hit);
}
return any(valid0|valid1);
}
template<typename Epilog>
__forceinline bool occludedK(const vbool<K>& valid,
RayK<K>& ray,
const Vec3vf<K>& v0,
const Vec3vf<K>& v1,
const Vec3vf<K>& v2,
const Vec3vf<K>& v3,
const GridMesh::Grid &g,
const SubGrid &subgrid,
const unsigned int i,
const Epilog& epilog) const
{
UVIdentity<K> mapUV;
MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV);
MoellerTrumboreIntersectorK<M,K> intersector;
vbool<K> valid_final = valid;
const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit);
if (any(valid0))
{
interpolateUV(valid0,hit,g,subgrid,i);
epilog(valid0,hit);
valid_final &= !valid0;
}
if (none(valid_final)) return true;
const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit);
if (any(valid1))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
interpolateUV(valid1,hit,g,subgrid,i);
epilog(valid1,hit);
valid_final &= !valid1;
}
return none(valid_final);
}
static __forceinline bool intersect1(RayK<K>& ray,
size_t k,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
MoellerTrumboreHitM<M,UVIdentity<M>> &hit)
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
MoellerTrumboreIntersectorK<8,K> intersector;
UVIdentity<M> mapUV;
return intersector.intersectEdge(ray,k,v0,e1,e2,mapUV,hit);
}
};
template<int M, int K, bool filter>
struct SubGridQuadMIntersectorKMoellerTrumbore : public SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>
{
__forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
: SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {}
__forceinline void intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
UVIdentity<M> mapUV;
MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV);
Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
MoellerTrumboreIntersectorK<M,K> intersector;
/* intersect first triangle */
if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
/* intersect second triangle */
if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
UVIdentity<M> mapUV;
MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV);
Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
MoellerTrumboreIntersectorK<M,K> intersector;
/* intersect first triangle */
if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit)) return true;
}
/* intersect second triangle */
if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit))
{
hit.U = hit.absDen - hit.U;
hit.V = hit.absDen - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit)) return true;
}
return false;
}
};
#if defined (__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<int K, bool filter>
struct SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> : public SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>
{
__forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
: SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
template<typename Epilog>
__forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
const vbool8 flags(0,0,0,0,1,1,1,1);
UVIdentity<8> mapUV;
MoellerTrumboreHitM<8,UVIdentity<8>> hit(mapUV);
if (SubGridQuadMIntersectorKMoellerTrumboreBase<8,K,filter>::intersect1(ray,k,vtx0,vtx1,vtx2,hit))
{
const vfloat8 U = select(flags,hit.absDen - hit.V,hit.U);
const vfloat8 V = select(flags,hit.absDen - hit.U,hit.V);
hit.U = U;
hit.V = V;
hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1));
if (unlikely(epilog(hit.valid,hit)))
return true;
}
return false;
}
__forceinline bool intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
}
};
#endif
}
}

View File

@@ -0,0 +1,367 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "subgrid.h"
#include "quad_intersector_moeller.h"
#include "quad_intersector_pluecker.h"
namespace embree
{
namespace isa
{
template<int M>
__forceinline void interpolateUV(PlueckerHitM<M,UVIdentity<M>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY)
{
/* correct U,V interpolation across the entire grid */
const vint<M> sx((int)subgrid.x());
const vint<M> sy((int)subgrid.y());
const vint<M> sxM(sx + stepX);
const vint<M> syM(sy + stepY);
const float inv_resX = rcp((float)((int)g.resX-1));
const float inv_resY = rcp((float)((int)g.resY-1));
hit.U = (hit.U + vfloat<M>(sxM) * hit.UVW) * inv_resX;
hit.V = (hit.V + vfloat<M>(syM) * hit.UVW) * inv_resY;
}
template<int M, bool filter>
struct SubGridQuadMIntersector1Pluecker;
template<int M, bool filter>
struct SubGridQuadMIntersector1Pluecker
{
__forceinline SubGridQuadMIntersector1Pluecker() {}
__forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
__forceinline void intersect(RayHit& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
UVIdentity<M> mapUV;
PlueckerHitM<M,UVIdentity<M>> hit(mapUV);
PlueckerIntersector1<M> intersector(ray,nullptr);
Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
/* intersect first triangle */
if (intersector.intersect(ray,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
/* intersect second triangle */
if (intersector.intersect(ray,v2,v3,v1,mapUV,hit))
{
hit.U = hit.UVW - hit.U;
hit.V = hit.UVW - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
UVIdentity<M> mapUV;
PlueckerHitM<M,UVIdentity<M>> hit(mapUV);
PlueckerIntersector1<M> intersector(ray,nullptr);
Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
/* intersect first triangle */
if (intersector.intersect(ray,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit))
return true;
}
/* intersect second triangle */
if (intersector.intersect(ray,v2,v3,v1,mapUV,hit))
{
hit.U = hit.UVW - hit.U;
hit.V = hit.UVW - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit))
return true;
}
return false;
}
};
#if defined (__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<bool filter>
struct SubGridQuadMIntersector1Pluecker<4,filter>
{
__forceinline SubGridQuadMIntersector1Pluecker() {}
__forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
template<typename Epilog>
__forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
UVIdentity<8> mapUV;
PlueckerHitM<8,UVIdentity<8>> hit(mapUV);
PlueckerIntersector1<8> intersector(ray,nullptr);
const vbool8 flags(0,0,0,0,1,1,1,1);
if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,mapUV,hit)))
{
/* correct U,V interpolation across the entire grid */
const vfloat8 U = select(flags,hit.UVW - hit.V,hit.U);
const vfloat8 V = select(flags,hit.UVW - hit.U,hit.V);
hit.U = U;
hit.V = V;
hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1));
if (unlikely(epilog(hit.valid,hit)))
return true;
}
return false;
}
__forceinline bool intersect(RayHit& ray, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
}
__forceinline bool occluded(Ray& ray, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const GridMesh::Grid &g, const SubGrid& subgrid) const
{
return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
}
};
#endif
/* ----------------------------- */
/* -- ray packet intersectors -- */
/* ----------------------------- */
template<int K>
__forceinline void interpolateUV(const vbool<K>& valid, PlueckerHitK<K,UVIdentity<K>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const unsigned int i)
{
/* correct U,V interpolation across the entire grid */
const unsigned int sx = subgrid.x() + (unsigned int)(i % 2);
const unsigned int sy = subgrid.y() + (unsigned int)(i >>1);
const float inv_resX = rcp((float)(int)(g.resX-1));
const float inv_resY = rcp((float)(int)(g.resY-1));
hit.U = select(valid,(hit.U + vfloat<K>((float)sx) * hit.UVW) * inv_resX,hit.U);
hit.V = select(valid,(hit.V + vfloat<K>((float)sy) * hit.UVW) * inv_resY,hit.V);
}
template<int M, int K, bool filter>
struct SubGridQuadMIntersectorKPlueckerBase
{
__forceinline SubGridQuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {}
template<typename Epilog>
__forceinline bool intersectK(const vbool<K>& valid,
RayK<K>& ray,
const Vec3vf<K>& v0,
const Vec3vf<K>& v1,
const Vec3vf<K>& v2,
const Vec3vf<K>& v3,
const GridMesh::Grid &g,
const SubGrid &subgrid,
const unsigned int i,
const Epilog& epilog) const
{
UVIdentity<K> mapUV;
PlueckerHitK<K,UVIdentity<K>> hit(mapUV);
PlueckerIntersectorK<M,K> intersector;
const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit);
if (any(valid0))
{
interpolateUV(valid0,hit,g,subgrid,i);
epilog(valid0,hit);
}
const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit);
if (any(valid1))
{
hit.U = hit.UVW - hit.U;
hit.V = hit.UVW - hit.V;
interpolateUV(valid1,hit,g,subgrid,i);
epilog(valid1,hit);
}
return any(valid0|valid1);
}
template<typename Epilog>
__forceinline bool occludedK(const vbool<K>& valid,
RayK<K>& ray,
const Vec3vf<K>& v0,
const Vec3vf<K>& v1,
const Vec3vf<K>& v2,
const Vec3vf<K>& v3,
const GridMesh::Grid &g,
const SubGrid &subgrid,
const unsigned int i,
const Epilog& epilog) const
{
UVIdentity<K> mapUV;
PlueckerHitK<K,UVIdentity<K>> hit(mapUV);
PlueckerIntersectorK<M,K> intersector;
vbool<K> valid_final = valid;
const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit);
if (any(valid0))
{
interpolateUV(valid0,hit,g,subgrid,i);
epilog(valid0,hit);
valid_final &= !valid0;
}
if (none(valid_final)) return true;
const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit);
if (any(valid1))
{
hit.U = hit.UVW - hit.U;
hit.V = hit.UVW - hit.V;
interpolateUV(valid1,hit,g,subgrid,i);
epilog(valid1,hit);
valid_final &= !valid1;
}
return none(valid_final);
}
};
template<int M, int K, bool filter>
struct SubGridQuadMIntersectorKPluecker : public SubGridQuadMIntersectorKPlueckerBase<M,K,filter>
{
__forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
: SubGridQuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {}
__forceinline void intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
UVIdentity<M> mapUV;
PlueckerHitM<M,UVIdentity<M>> hit(mapUV);
Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
PlueckerIntersectorK<M,K> intersector;
/* intersect first triangle */
if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
/* intersect second triangle */
if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit))
{
hit.U = hit.UVW - hit.U;
hit.V = hit.UVW - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
epilog(hit.valid,hit);
}
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
UVIdentity<M> mapUV;
PlueckerHitM<M,UVIdentity<M>> hit(mapUV);
Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
PlueckerIntersectorK<M,K> intersector;
/* intersect first triangle */
if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit))
{
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit)) return true;
}
/* intersect second triangle */
if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit))
{
hit.U = hit.UVW - hit.U;
hit.V = hit.UVW - hit.V;
interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
if (epilog(hit.valid,hit)) return true;
}
return false;
}
};
#if defined (__AVX__)
/*! Intersects 4 quads with 1 ray using AVX */
template<int K, bool filter>
struct SubGridQuadMIntersectorKPluecker<4,K,filter> : public SubGridQuadMIntersectorKPlueckerBase<4,K,filter>
{
__forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
: SubGridQuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {}
template<typename Epilog>
__forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const
{
const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
#if !defined(EMBREE_BACKFACE_CULLING)
const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
#else
const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
#endif
UVIdentity<8> mapUV;
PlueckerHitM<8,UVIdentity<8>> hit(mapUV);
PlueckerIntersectorK<8,K> intersector;
const vbool8 flags(0,0,0,0,1,1,1,1);
if (unlikely(intersector.intersect(ray,k,vtx0,vtx1,vtx2,mapUV,hit)))
{
/* correct U,V interpolation across the entire grid */
const vfloat8 U = select(flags,hit.UVW - hit.V,hit.U);
const vfloat8 V = select(flags,hit.UVW - hit.U,hit.V);
hit.U = U;
hit.V = V;
hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1));
if (unlikely(epilog(hit.valid,hit)))
return true;
}
return false;
}
__forceinline bool intersect1(RayHitK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
}
__forceinline bool occluded1(RayK<K>& ray, size_t k, RayQueryContext* context,
const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
{
return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
}
};
#endif
}
}

View File

@@ -0,0 +1,236 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "subgrid_intersector.h"
namespace embree
{
namespace isa
{
template<int N, bool filter>
struct SubGridMBIntersector1Pluecker
{
typedef SubGridMBQBVHN<N> Primitive;
typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(normal.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
float ftime;
const int itime = mesh->timeSegment(ray.time(), ftime);
Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime);
pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(shadow.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
float ftime;
const int itime = mesh->timeSegment(ray.time(), ftime);
Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime);
return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, subgrid);
}
template<bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
const float time = prim[i].adjustTime(ray.time());
assert(time <= 1.0f);
size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
#if defined(__AVX__)
STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
#endif
while(mask != 0)
{
const size_t ID = bscf(mask);
if (unlikely(dist[ID] > ray.tfar)) continue;
intersect(pre,ray,context,prim[i].subgrid(ID));
}
}
}
template<bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
const float time = prim[i].adjustTime(ray.time());
assert(time <= 1.0f);
vfloat<N> dist;
size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
if (occluded(pre,ray,context,prim[i].subgrid(ID)))
return true;
}
}
return false;
}
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
{
assert(false && "not implemented");
return false;
}
};
template<int N, int K, bool filter>
struct SubGridMBIntersectorKPluecker
{
typedef SubGridMBQBVHN<N> Primitive;
typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations;
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const SubGrid& subgrid)
{
size_t m_valid = movemask(valid_i);
while(m_valid)
{
size_t ID = bscf(m_valid);
intersect(pre,ray,ID,context,subgrid);
}
}
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const SubGrid& subgrid)
{
vbool<K> valid0 = valid_i;
size_t m_valid = movemask(valid_i);
while(m_valid)
{
size_t ID = bscf(m_valid);
if (occluded(pre,ray,ID,context,subgrid))
clear(valid0,ID);
}
return !valid0;
}
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(normal.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
vfloat<K> ftime;
const vint<K> itime = mesh->timeSegment<K>(ray.time(), ftime);
Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]);
pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const SubGrid& subgrid)
{
STAT3(shadow.trav_prims,1,1,1);
const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
const GridMesh::Grid &g = mesh->grid(subgrid.primID());
vfloat<K> ftime;
const vint<K> itime = mesh->timeSegment<K>(ray.time(), ftime);
Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]);
return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
}
template<bool robust>
static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
for (size_t j=0;j<num;j++)
{
size_t m_valid = movemask(prim[j].qnode.validMask());
const vfloat<K> time = prim[j].template adjustTime<K>(ray.time());
vfloat<K> dist;
while(m_valid)
{
const size_t i = bscf(m_valid);
if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue;
intersect(valid,pre,ray,context,prim[j].subgrid(i));
}
}
}
template<bool robust>
static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
vbool<K> valid0 = valid;
for (size_t j=0;j<num;j++)
{
size_t m_valid = movemask(prim[j].qnode.validMask());
const vfloat<K> time = prim[j].template adjustTime<K>(ray.time());
vfloat<K> dist;
while(m_valid)
{
const size_t i = bscf(m_valid);
if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue;
valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
if (none(valid0)) break;
}
}
return !valid0;
}
template<bool robust>
static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
const float time = prim[i].adjustTime(ray.time()[k]);
assert(time <= 1.0f);
size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
if (unlikely(dist[ID] > ray.tfar[k])) continue;
intersect(pre,ray,k,context,prim[i].subgrid(ID));
}
}
}
template<bool robust>
static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node)
{
BVHNQuantizedBaseNodeIntersector1<N,robust> isec1;
for (size_t i=0;i<num;i++)
{
vfloat<N> dist;
const float time = prim[i].adjustTime(ray.time()[k]);
assert(time <= 1.0f);
size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
while(mask != 0)
{
const size_t ID = bscf(mask);
if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
return true;
}
}
return false;
}
};
}
}

View File

@@ -0,0 +1,162 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
namespace embree
{
/* Precalculated representation for M triangles. Stores for each
triangle a base vertex, two edges, and the geometry normal to
speed up intersection calculations */
template<int M>
struct TriangleM
{
public:
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* Returns maximum number of stored triangles */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
public:
/* Default constructor */
__forceinline TriangleM() {}
/* Construction from vertices and IDs */
__forceinline TriangleM(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs)
: v0(v0), e1(v0-v1), e2(v2-v0), geomIDs(geomIDs), primIDs(primIDs) {}
/* Returns a mask that tells which triangles are valid */
__forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
/* Returns true if the specified triangle is valid */
__forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
/* Returns the number of stored triangles */
__forceinline size_t size() const { return bsf(~movemask(valid())); }
/* Returns the geometry IDs */
__forceinline vuint<M>& geomID() { return geomIDs; }
__forceinline const vuint<M>& geomID() const { return geomIDs; }
__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
/* Returns the primitive IDs */
__forceinline vuint<M>& primID() { return primIDs; }
__forceinline const vuint<M>& primID() const { return primIDs; }
__forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
/* Calculate the bounds of the triangle */
__forceinline BBox3fa bounds() const
{
Vec3vf<M> p0 = v0;
Vec3vf<M> p1 = v0-e1;
Vec3vf<M> p2 = v0+e2;
Vec3vf<M> lower = min(p0,p1,p2);
Vec3vf<M> upper = max(p0,p1,p2);
vbool<M> mask = valid();
lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
}
/* Non temporal store */
__forceinline static void store_nt(TriangleM* dst, const TriangleM& src)
{
vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
vfloat<M>::store_nt(&dst->e1.x,src.e1.x);
vfloat<M>::store_nt(&dst->e1.y,src.e1.y);
vfloat<M>::store_nt(&dst->e1.z,src.e1.z);
vfloat<M>::store_nt(&dst->e2.x,src.e2.x);
vfloat<M>::store_nt(&dst->e2.y,src.e2.y);
vfloat<M>::store_nt(&dst->e2.z,src.e2.z);
vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
vuint<M>::store_nt(&dst->primIDs,src.primIDs);
}
/* Fill triangle from triangle list */
__forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
{
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
for (size_t i=0; i<M && begin<end; i++, begin++)
{
const PrimRef& prim = prims[begin];
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
vgeomID [i] = geomID;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID));
}
/* Updates the primitive */
__forceinline BBox3fa update(TriangleMesh* mesh)
{
BBox3fa bounds = empty;
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
for (size_t i=0; i<M; i++)
{
if (unlikely(geomID(i) == -1)) break;
const unsigned geomId = geomID(i);
const unsigned primId = primID(i);
const TriangleMesh::Triangle& tri = mesh->triangle(primId);
const Vec3fa p0 = mesh->vertex(tri.v[0]);
const Vec3fa p1 = mesh->vertex(tri.v[1]);
const Vec3fa p2 = mesh->vertex(tri.v[2]);
bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
vgeomID [i] = geomId;
vprimID [i] = primId;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID));
return bounds;
}
public:
Vec3vf<M> v0; // base vertex of the triangles
Vec3vf<M> e1; // 1st edge of the triangles (v0-v1)
Vec3vf<M> e2; // 2nd edge of the triangles (v2-v0)
private:
vuint<M> geomIDs; // geometry IDs
vuint<M> primIDs; // primitive IDs
};
template<int M>
typename TriangleM<M>::Type TriangleM<M>::type;
typedef TriangleM<4> Triangle4;
}

View File

@@ -0,0 +1,96 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "triangle.h"
#include "triangle_intersector_moeller.h"
namespace embree
{
namespace isa
{
/*! Intersects M triangles with 1 ray */
template<int M, bool filter>
struct TriangleMIntersector1Moeller
{
typedef TriangleM<M> Primitive;
typedef MoellerTrumboreIntersector1<M> Precalculations;
/*! Intersect a ray with the M triangles and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const TriangleM<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const TriangleM<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M triangles with K rays. */
template<int M, int K, bool filter>
struct TriangleMIntersectorKMoeller
{
typedef TriangleM<M> Primitive;
typedef MoellerTrumboreIntersectorK<M,K> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const TriangleM<M>& tri)
{
STAT_USER(0,TriangleM<M>::max_size());
for (size_t i=0; i<TriangleM<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i);
const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i);
const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i);
pre.intersectEdgeK(valid_i,ray,p0,e1,e2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const TriangleM<M>& tri)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<TriangleM<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i);
const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i);
const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i);
pre.intersectEdgeK(valid0,ray,p0,e1,e2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const TriangleM<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const TriangleM<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
}
}

View File

@@ -0,0 +1,525 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "triangle.h"
#include "intersector_epilog.h"
/*! This intersector implements a modified version of the Moeller
* Trumbore intersector from the paper "Fast, Minimum Storage
* Ray-Triangle Intersection". In contrast to the paper we
* precalculate some factors and factor the calculations differently
* to allow precalculating the cross product e1 x e2. The resulting
* algorithm is similar to the fastest one of the paper "Optimizing
* Ray-Triangle Intersection via Automated Search". */
namespace embree
{
namespace isa
{
template<int M, typename UVMapper>
struct MoellerTrumboreHitM
{
__forceinline MoellerTrumboreHitM(const UVMapper& mapUV) : mapUV(mapUV) {}
__forceinline MoellerTrumboreHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const vfloat<M>& absDen, const Vec3vf<M>& Ng, const UVMapper& mapUV)
: U(U), V(V), T(T), absDen(absDen), mapUV(mapUV), valid(valid), vNg(Ng) {}
__forceinline void finalize()
{
const vfloat<M> rcpAbsDen = rcp(absDen);
vt = T * rcpAbsDen;
vu = U * rcpAbsDen;
vv = V * rcpAbsDen;
mapUV(vu,vv,vNg);
}
__forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); }
__forceinline vfloat<M> t () const { return vt; }
__forceinline Vec3vf<M> Ng() const { return vNg; }
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
public:
vfloat<M> U;
vfloat<M> V;
vfloat<M> T;
vfloat<M> absDen;
UVMapper mapUV;
public:
vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
};
template<int M, bool early_out = true>
struct MoellerTrumboreIntersector1
{
__forceinline MoellerTrumboreIntersector1() {}
__forceinline MoellerTrumboreIntersector1(const Ray& ray, const void* ptr) {}
template<typename UVMapper>
__forceinline bool intersect(const vbool<M>& valid0,
Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const Vec3vf<M>& tri_Ng,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
/* calculate denominator */
vbool<M> valid = valid0;
const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
const Vec3vf<M> R = cross(C,D);
const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
const vfloat<M> absDen = abs(den);
const vfloat<M> sgnDen = signmsk(den);
/* perform edge tests */
const vfloat<M> U = asFloat(asInt(dot(R,Vec3vf<M>(tri_e2))) ^ asInt(sgnDen));
const vfloat<M> V = asFloat(asInt(dot(R,Vec3vf<M>(tri_e1))) ^ asInt(sgnDen));
/* perform backface culling */
#if defined(EMBREE_BACKFACE_CULLING)
valid &= (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#else
valid &= (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#endif
if (likely(early_out && none(valid))) return false;
/* perform depth test */
const vfloat<M> T = asFloat(asInt(dot(Vec3vf<M>(tri_Ng),C)) ^ asInt(sgnDen));
valid &= (absDen*vfloat<M>(ray.tnear()) < T) & (T <= absDen*vfloat<M>(ray.tfar));
if (likely(early_out && none(valid))) return false;
/* update hit information */
new (&hit) MoellerTrumboreHitM<M,UVMapper>(valid,U,V,T,absDen,tri_Ng,mapUV);
return early_out || any(valid);
}
template<typename UVMapper>
__forceinline bool intersectEdge(const vbool<M>& valid,
Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1);
return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,mapUV,hit);
}
template<typename UVMapper>
__forceinline bool intersectEdge(Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
vbool<M> valid = true;
const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1);
return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,mapUV,hit);
}
template<typename UVMapper>
__forceinline bool intersect(Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
return intersectEdge(ray,v0,e1,e2,mapUV,hit);
}
template<typename UVMapper>
__forceinline bool intersect(const vbool<M>& valid,
Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
return intersectEdge(valid,ray,v0,e1,e2,mapUV,hit);
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersectEdge(Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& e1,
const Vec3vf<M>& e2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
MoellerTrumboreHitM<M,UVMapper> hit(mapUV);
if (likely(intersectEdge(ray,v0,e1,e2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersect(Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
MoellerTrumboreHitM<M,UVMapper> hit(mapUV);
if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename Epilog>
__forceinline bool intersect(Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const Epilog& epilog) const
{
auto mapUV = UVIdentity<M>();
MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV);
if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersect(const vbool<M>& valid,
Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
MoellerTrumboreHitM<M,UVMapper> hit(mapUV);
if (likely(intersect(valid,ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
};
template<int K, typename UVMapper>
struct MoellerTrumboreHitK
{
__forceinline MoellerTrumboreHitK(const UVMapper& mapUV) : mapUV(mapUV) {}
__forceinline MoellerTrumboreHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng, const UVMapper& mapUV)
: U(U), V(V), T(T), absDen(absDen), Ng(Ng), mapUV(mapUV) {}
__forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
{
const vfloat<K> rcpAbsDen = rcp(absDen);
const vfloat<K> t = T * rcpAbsDen;
vfloat<K> u = U * rcpAbsDen;
vfloat<K> v = V * rcpAbsDen;
Vec3vf<K> vNg = Ng;
mapUV(u,v,vNg);
return std::make_tuple(u,v,t,vNg);
}
vfloat<K> U;
vfloat<K> V;
const vfloat<K> T;
const vfloat<K> absDen;
const Vec3vf<K> Ng;
const UVMapper& mapUV;
};
template<int M, int K>
struct MoellerTrumboreIntersectorK
{
__forceinline MoellerTrumboreIntersectorK() {}
__forceinline MoellerTrumboreIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
/*! Intersects K rays with one of M triangles. */
template<typename UVMapper>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
//RayK<K>& ray,
const Vec3vf<K>& ray_org,
const Vec3vf<K>& ray_dir,
const vfloat<K>& ray_tnear,
const vfloat<K>& ray_tfar,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_e1,
const Vec3vf<K>& tri_e2,
const Vec3vf<K>& tri_Ng,
const UVMapper& mapUV,
MoellerTrumboreHitK<K,UVMapper> &hit) const
{
/* calculate denominator */
vbool<K> valid = valid0;
const Vec3vf<K> C = tri_v0 - ray_org;
const Vec3vf<K> R = cross(C,ray_dir);
const vfloat<K> den = dot(tri_Ng,ray_dir);
const vfloat<K> absDen = abs(den);
const vfloat<K> sgnDen = signmsk(den);
/* test against edge p2 p0 */
const vfloat<K> U = dot(tri_e2,R) ^ sgnDen;
valid &= U >= 0.0f;
if (likely(none(valid))) return false;
/* test against edge p0 p1 */
const vfloat<K> V = dot(tri_e1,R) ^ sgnDen;
valid &= V >= 0.0f;
if (likely(none(valid))) return false;
/* test against edge p1 p2 */
const vfloat<K> W = absDen-U-V;
valid &= W >= 0.0f;
if (likely(none(valid))) return false;
/* perform depth test */
const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar);
if (unlikely(none(valid))) return false;
/* perform backface culling */
#if defined(EMBREE_BACKFACE_CULLING)
valid &= den < vfloat<K>(zero);
if (unlikely(none(valid))) return false;
#else
valid &= den != vfloat<K>(zero);
if (unlikely(none(valid))) return false;
#endif
/* calculate hit information */
new (&hit) MoellerTrumboreHitK<K,UVMapper>(U,V,T,absDen,tri_Ng,mapUV);
return valid;
}
/*! Intersects K rays with one of M triangles. */
template<typename UVMapper>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const UVMapper& mapUV,
MoellerTrumboreHitK<K,UVMapper> &hit) const
{
const Vec3vf<K> e1 = tri_v0-tri_v1;
const Vec3vf<K> e2 = tri_v2-tri_v0;
const Vec3vf<K> Ng = cross(e2,e1);
return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,mapUV,hit);
}
/*! Intersects K rays with one of M triangles. */
template<typename UVMapper, typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV);
const Vec3vf<K> e1 = tri_v0-tri_v1;
const Vec3vf<K> e2 = tri_v2-tri_v0;
const Vec3vf<K> Ng = cross(e2,e1);
const vbool<K> valid = intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,mapUV,hit);
return epilog(valid,hit);
}
template<typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const Epilog& epilog) const
{
UVIdentity<K> mapUV;
MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV);
const Vec3vf<K> e1 = tri_v0-tri_v1;
const Vec3vf<K> e2 = tri_v2-tri_v0;
const Vec3vf<K> Ng = cross(e2,e1);
const vbool<K> valid = intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,mapUV,hit);
return epilog(valid,hit);
}
/*! Intersects K rays with one of M triangles. */
template<typename UVMapper, typename Epilog>
__forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_e1,
const Vec3vf<K>& tri_e2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV);
const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1);
const vbool<K> valid = intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,mapUV,hit);
return epilog(valid,hit);
}
/*! Intersect k'th ray from ray packet of size K with M triangles. */
template<typename UVMapper>
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
/* calculate denominator */
typedef Vec3vf<M> Vec3vfM;
const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1);
const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
const Vec3vfM C = Vec3vfM(tri_v0) - O;
const Vec3vfM R = cross(C,D);
const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
const vfloat<M> absDen = abs(den);
const vfloat<M> sgnDen = signmsk(den);
/* perform edge tests */
const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen;
const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen;
/* perform backface culling */
#if defined(EMBREE_BACKFACE_CULLING)
vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#else
vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#endif
if (likely(none(valid))) return false;
/* perform depth test */
const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
if (likely(none(valid))) return false;
/* calculate hit information */
new (&hit) MoellerTrumboreHitM<M,UVMapper>(valid,U,V,T,absDen,tri_Ng,mapUV);
return true;
}
template<typename UVMapper>
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const BBox<vfloat<M>>& time_range,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,mapUV,hit)))
{
hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]);
hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper;
return any(hit.valid);
}
return false;
}
template<typename UVMapper>
__forceinline bool intersect(RayK<K>& ray,
size_t k,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
MoellerTrumboreHitM<M,UVMapper>& hit) const
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
return intersectEdge(ray,k,v0,e1,e2,mapUV,hit);
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
MoellerTrumboreHitM<M,UVMapper> hit(mapUV);
if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const BBox<vfloat<M>>& time_range,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
MoellerTrumboreHitM<M,UVMapper> hit(mapUV);
if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersect(RayK<K>& ray,
size_t k,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
return intersectEdge(ray,k,v0,e1,e2,mapUV,epilog);
}
template<typename Epilog>
__forceinline bool intersect(RayK<K>& ray,
size_t k,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const Epilog& epilog) const
{
return intersect(ray,k,v0,v1,v2,UVIdentity<M>(),epilog);
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersect(RayK<K>& ray,
size_t k,
const BBox<vfloat<M>>& time_range,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
return intersectEdge(ray,k,time_range,v0,e1,e2,mapUV,epilog);
}
};
}
}

View File

@@ -0,0 +1,407 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "triangle.h"
#include "trianglev.h"
#include "trianglev_mb.h"
#include "intersector_epilog.h"
/*! Modified Pluecker ray/triangle intersector. The test first shifts
* the ray origin into the origin of the coordinate system and then
* uses Pluecker coordinates for the intersection. Due to the shift,
* the Pluecker coordinate calculation simplifies and the tests get
* numerically stable. The edge equations are watertight along the
* edge for neighboring triangles. */
namespace embree
{
namespace isa
{
template<int M, typename UVMapper>
struct PlueckerHitM
{
__forceinline PlueckerHitM(const UVMapper& mapUV) : mapUV(mapUV) {}
__forceinline PlueckerHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& UVW, const vfloat<M>& t, const Vec3vf<M>& Ng, const UVMapper& mapUV)
: U(U), V(V), UVW(UVW), mapUV(mapUV), valid(valid), vt(t), vNg(Ng) {}
__forceinline void finalize()
{
const vbool<M> invalid = abs(UVW) < min_rcp_input;
const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
vu = min(U * rcpUVW,1.0f);
vv = min(V * rcpUVW,1.0f);
mapUV(vu,vv,vNg);
}
__forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); }
__forceinline vfloat<M> t () const { return vt; }
__forceinline Vec3vf<M> Ng() const { return vNg; }
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
public:
vfloat<M> U;
vfloat<M> V;
vfloat<M> UVW;
const UVMapper& mapUV;
public:
vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
};
template<int M, bool early_out = true>
struct PlueckerIntersector1
{
__forceinline PlueckerIntersector1() {}
__forceinline PlueckerIntersector1(const Ray& ray, const void* ptr) {}
template<typename UVMapper>
__forceinline bool intersect(const vbool<M>& valid0,
Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const UVMapper& mapUV,
PlueckerHitM<M,UVMapper>& hit) const
{
vbool<M> valid = valid0;
/* calculate vertices relative to ray origin */
const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
const Vec3vf<M> v0 = tri_v0-O;
const Vec3vf<M> v1 = tri_v1-O;
const Vec3vf<M> v2 = tri_v2-O;
/* calculate triangle edges */
const Vec3vf<M> e0 = v2-v0;
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v1-v2;
/* perform edge tests */
const vfloat<M> U = dot(cross(e0,v2+v0),D);
const vfloat<M> V = dot(cross(e1,v0+v1),D);
const vfloat<M> W = dot(cross(e2,v1+v2),D);
const vfloat<M> UVW = U+V+W;
const vfloat<M> eps = float(ulp)*abs(UVW);
#if defined(EMBREE_BACKFACE_CULLING)
valid &= max(U,V,W) <= eps;
#else
valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
#endif
if (unlikely(early_out && none(valid))) return false;
/* calculate geometry normal and denominator */
const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
const vfloat<M> den = twice(dot(Ng,D));
/* perform depth test */
const vfloat<M> T = twice(dot(v0,Ng));
const vfloat<M> t = rcp(den)*T;
valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
valid &= den != vfloat<M>(zero);
if (unlikely(early_out && none(valid))) return false;
/* update hit information */
new (&hit) PlueckerHitM<M,UVMapper>(valid,U,V,UVW,t,Ng,mapUV);
return early_out || any(valid);
}
template<typename UVMapper>
__forceinline bool intersectEdge(const vbool<M>& valid,
Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const UVMapper& mapUV,
PlueckerHitM<M,UVMapper>& hit) const
{
return intersect(valid,ray,tri_v0,tri_v1,tri_v2,mapUV,hit);
}
template<typename UVMapper>
__forceinline bool intersectEdge(Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const UVMapper& mapUV,
PlueckerHitM<M,UVMapper>& hit) const
{
vbool<M> valid = true;
return intersect(valid,ray,tri_v0,tri_v1,tri_v2,mapUV,hit);
}
template<typename UVMapper>
__forceinline bool intersect(Ray& ray,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const UVMapper& mapUV,
PlueckerHitM<M,UVMapper>& hit) const
{
return intersectEdge(ray,tri_v0,tri_v1,tri_v2,mapUV,hit);
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersectEdge(Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& e1,
const Vec3vf<M>& e2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
PlueckerHitM<M,UVMapper> hit(mapUV);
if (likely(intersectEdge(ray,v0,e1,e2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersect(Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
PlueckerHitM<M,UVMapper> hit(mapUV);
if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename Epilog>
__forceinline bool intersect(Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const Epilog& epilog) const
{
auto mapUV = UVIdentity<M>();
PlueckerHitM<M,UVIdentity<M>> hit(mapUV);
if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersect(const vbool<M>& valid,
Ray& ray,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
PlueckerHitM<M,UVMapper> hit(mapUV);
if (likely(intersect(valid,ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit);
return false;
}
};
template<int K, typename UVMapper>
struct PlueckerHitK
{
__forceinline PlueckerHitK(const UVMapper& mapUV) : mapUV(mapUV) {}
__forceinline PlueckerHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& UVW, const vfloat<K>& t, const Vec3vf<K>& Ng, const UVMapper& mapUV)
: U(U), V(V), UVW(UVW), t(t), Ng(Ng), mapUV(mapUV) {}
__forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
{
const vbool<K> invalid = abs(UVW) < min_rcp_input;
const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
vfloat<K> u = min(U * rcpUVW,1.0f);
vfloat<K> v = min(V * rcpUVW,1.0f);
Vec3vf<K> vNg = Ng;
mapUV(u,v,vNg);
return std::make_tuple(u,v,t,vNg);
}
vfloat<K> U;
vfloat<K> V;
const vfloat<K> UVW;
const vfloat<K> t;
const Vec3vf<K> Ng;
const UVMapper& mapUV;
};
template<int M, int K>
struct PlueckerIntersectorK
{
__forceinline PlueckerIntersectorK() {}
__forceinline PlueckerIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
/*! Intersects K rays with one of M triangles. */
template<typename UVMapper>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const UVMapper& mapUV,
PlueckerHitK<K,UVMapper> &hit) const
{
/* calculate vertices relative to ray origin */
vbool<K> valid = valid0;
const Vec3vf<K> O = ray.org;
const Vec3vf<K> D = ray.dir;
const Vec3vf<K> v0 = tri_v0-O;
const Vec3vf<K> v1 = tri_v1-O;
const Vec3vf<K> v2 = tri_v2-O;
/* calculate triangle edges */
const Vec3vf<K> e0 = v2-v0;
const Vec3vf<K> e1 = v0-v1;
const Vec3vf<K> e2 = v1-v2;
/* perform edge tests */
const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
const vfloat<K> UVW = U+V+W;
const vfloat<K> eps = float(ulp)*abs(UVW);
#if defined(EMBREE_BACKFACE_CULLING)
valid &= max(U,V,W) <= eps;
#else
valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
#endif
if (unlikely(none(valid))) return valid;
/* calculate geometry normal and denominator */
const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
/* perform depth test */
const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
const vfloat<K> t = rcp(den)*T;
valid &= ray.tnear() <= t & t <= ray.tfar;
valid &= den != vfloat<K>(zero);
if (unlikely(none(valid))) return valid;
/* calculate hit information */
new (&hit) PlueckerHitK<K,UVMapper>(U,V,UVW,t,Ng,mapUV);
return valid;
}
template<typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const Epilog& epilog) const
{
UVIdentity<K> mapUV;
PlueckerHitK<K,UVIdentity<K>> hit(mapUV);
const vbool<K> valid = intersectK(valid0,ray,tri_v0,tri_v1,tri_v2,mapUV,hit);
return epilog(valid,hit);
}
template<typename UVMapper, typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
PlueckerHitK<K,UVMapper> hit(mapUV);
const vbool<K> valid = intersectK(valid0,ray,tri_v0,tri_v1,tri_v2,mapUV,hit);
return epilog(valid,hit);
}
/*! Intersect k'th ray from ray packet of size K with M triangles. */
template<typename UVMapper>
__forceinline bool intersect(RayK<K>& ray, size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const UVMapper& mapUV,
PlueckerHitM<M,UVMapper> &hit) const
{
/* calculate vertices relative to ray origin */
const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
const Vec3vf<M> v0 = tri_v0-O;
const Vec3vf<M> v1 = tri_v1-O;
const Vec3vf<M> v2 = tri_v2-O;
/* calculate triangle edges */
const Vec3vf<M> e0 = v2-v0;
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v1-v2;
/* perform edge tests */
const vfloat<M> U = dot(cross(e0,v2+v0),D);
const vfloat<M> V = dot(cross(e1,v0+v1),D);
const vfloat<M> W = dot(cross(e2,v1+v2),D);
const vfloat<M> UVW = U+V+W;
const vfloat<M> eps = float(ulp)*abs(UVW);
#if defined(EMBREE_BACKFACE_CULLING)
vbool<M> valid = max(U,V,W) <= eps;
#else
vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
#endif
if (unlikely(none(valid))) return false;
/* calculate geometry normal and denominator */
const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
const vfloat<M> den = twice(dot(Ng,D));
/* perform depth test */
const vfloat<M> T = twice(dot(v0,Ng));
const vfloat<M> t = rcp(den)*T;
valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
if (unlikely(none(valid))) return false;
/* avoid division by 0 */
valid &= den != vfloat<M>(zero);
if (unlikely(none(valid))) return false;
/* update hit information */
new (&hit) PlueckerHitM<M,UVMapper>(valid,U,V,UVW,t,Ng,mapUV);
return true;
}
template<typename UVMapper, typename Epilog>
__forceinline bool intersect(RayK<K>& ray, size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const UVMapper& mapUV,
const Epilog& epilog) const
{
PlueckerHitM<M,UVMapper> hit(mapUV);
if (intersect(ray,k,tri_v0,tri_v1,tri_v2,mapUV,hit))
return epilog(hit.valid,hit);
return false;
}
template<typename Epilog>
__forceinline bool intersect(RayK<K>& ray, size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
const Epilog& epilog) const
{
UVIdentity<M> mapUV;
PlueckerHitM<M,UVIdentity<M>> hit(mapUV);
if (intersect(ray,k,tri_v0,tri_v1,tri_v2,mapUV,hit))
return epilog(hit.valid,hit);
return false;
}
};
}
}

View File

@@ -0,0 +1,418 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "triangle.h"
#include "intersector_epilog.h"
/*! This intersector implements a modified version of the Woop's ray-triangle intersection test */
namespace embree
{
namespace isa
{
template<int M>
struct WoopHitM
{
__forceinline WoopHitM() {}
__forceinline WoopHitM(const vbool<M>& valid,
const vfloat<M>& U,
const vfloat<M>& V,
const vfloat<M>& T,
const vfloat<M>& inv_det,
const Vec3vf<M>& Ng)
: U(U), V(V), T(T), inv_det(inv_det), valid(valid), vNg(Ng) {}
__forceinline void finalize()
{
vt = T;
vu = U*inv_det;
vv = V*inv_det;
}
__forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
__forceinline float t (const size_t i) const { return vt[i]; }
__forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
private:
const vfloat<M> U;
const vfloat<M> V;
const vfloat<M> T;
const vfloat<M> inv_det;
public:
const vbool<M> valid;
vfloat<M> vu;
vfloat<M> vv;
vfloat<M> vt;
Vec3vf<M> vNg;
};
template<int M>
struct WoopPrecalculations1
{
unsigned int kx,ky,kz;
Vec3vf<M> org;
Vec3fa S;
__forceinline WoopPrecalculations1() {}
__forceinline WoopPrecalculations1(const Ray& ray, const void* ptr)
{
kz = maxDim(abs(ray.dir));
kx = (kz+1) % 3;
ky = (kx+1) % 3;
const float inv_dir_kz = rcp(ray.dir[kz]);
if (ray.dir[kz] < 0.0f) std::swap(kx,ky);
S.x = ray.dir[kx] * inv_dir_kz;
S.y = ray.dir[ky] * inv_dir_kz;
S.z = inv_dir_kz;
org = Vec3vf<M>(ray.org[kx],ray.org[ky],ray.org[kz]);
}
};
template<int M>
struct WoopIntersector1
{
typedef WoopPrecalculations1<M> Precalculations;
__forceinline WoopIntersector1() {}
__forceinline WoopIntersector1(const Ray& ray, const void* ptr) {}
static __forceinline bool intersect(const vbool<M>& valid0,
Ray& ray,
const Precalculations& pre,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_v1,
const Vec3vf<M>& tri_v2,
WoopHitM<M>& hit)
{
vbool<M> valid = valid0;
/* vertices relative to ray origin */
const Vec3vf<M> org = Vec3vf<M>(pre.org.x,pre.org.y,pre.org.z);
const Vec3vf<M> A = Vec3vf<M>(tri_v0[pre.kx],tri_v0[pre.ky],tri_v0[pre.kz]) - org;
const Vec3vf<M> B = Vec3vf<M>(tri_v1[pre.kx],tri_v1[pre.ky],tri_v1[pre.kz]) - org;
const Vec3vf<M> C = Vec3vf<M>(tri_v2[pre.kx],tri_v2[pre.ky],tri_v2[pre.kz]) - org;
/* shear and scale vertices */
const vfloat<M> Ax = nmadd(A.z,pre.S.x,A.x);
const vfloat<M> Ay = nmadd(A.z,pre.S.y,A.y);
const vfloat<M> Bx = nmadd(B.z,pre.S.x,B.x);
const vfloat<M> By = nmadd(B.z,pre.S.y,B.y);
const vfloat<M> Cx = nmadd(C.z,pre.S.x,C.x);
const vfloat<M> Cy = nmadd(C.z,pre.S.y,C.y);
/* scaled barycentric */
const vfloat<M> U0 = Cx*By;
const vfloat<M> U1 = Cy*Bx;
const vfloat<M> V0 = Ax*Cy;
const vfloat<M> V1 = Ay*Cx;
const vfloat<M> W0 = Bx*Ay;
const vfloat<M> W1 = By*Ax;
#if !defined(__AVX512F__)
valid &= (U0 >= U1) & (V0 >= V1) & (W0 >= W1) |
(U0 <= U1) & (V0 <= V1) & (W0 <= W1);
#else
valid &= ge(ge(U0 >= U1,V0,V1),W0,W1) | le(le(U0 <= U1,V0,V1),W0,W1);
#endif
if (likely(none(valid))) return false;
const vfloat<M> U = U0-U1;
const vfloat<M> V = V0-V1;
const vfloat<M> W = W0-W1;
const vfloat<M> det = U+V+W;
valid &= det != 0.0f;
const vfloat<M> inv_det = rcp(det);
const vfloat<M> Az = pre.S.z * A.z;
const vfloat<M> Bz = pre.S.z * B.z;
const vfloat<M> Cz = pre.S.z * C.z;
const vfloat<M> T = madd(U,Az,madd(V,Bz,W*Cz));
const vfloat<M> t = T * inv_det;
/* perform depth test */
valid &= (vfloat<M>(ray.tnear()) < t) & (t <= vfloat<M>(ray.tfar));
if (likely(none(valid))) return false;
const Vec3vf<M> tri_Ng = cross(tri_v2-tri_v0,tri_v0-tri_v1);
/* update hit information */
new (&hit) WoopHitM<M>(valid,U,V,t,inv_det,tri_Ng);
return true;
}
static __forceinline bool intersect(Ray& ray,
const Precalculations& pre,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
WoopHitM<M>& hit)
{
vbool<M> valid = true;
return intersect(valid,ray,pre,v0,v1,v2,hit);
}
template<typename Epilog>
static __forceinline bool intersect(Ray& ray,
const Precalculations& pre,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const Epilog& epilog)
{
WoopHitM<M> hit;
if (likely(intersect(ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename Epilog>
static __forceinline bool intersect(const vbool<M>& valid,
Ray& ray,
const Precalculations& pre,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const Epilog& epilog)
{
WoopHitM<M> hit;
if (likely(intersect(valid,ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
return false;
}
};
#if 0
template<int K>
struct WoopHitK
{
__forceinline WoopHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng)
: U(U), V(V), T(T), absDen(absDen), Ng(Ng) {}
__forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
{
const vfloat<K> rcpAbsDen = rcp(absDen);
const vfloat<K> t = T * rcpAbsDen;
const vfloat<K> u = U * rcpAbsDen;
const vfloat<K> v = V * rcpAbsDen;
return std::make_tuple(u,v,t,Ng);
}
private:
const vfloat<K> U;
const vfloat<K> V;
const vfloat<K> T;
const vfloat<K> absDen;
const Vec3vf<K> Ng;
};
template<int M, int K>
struct WoopIntersectorK
{
__forceinline WoopIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
/*! Intersects K rays with one of M triangles. */
template<typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
//RayK<K>& ray,
const Vec3vf<K>& ray_org,
const Vec3vf<K>& ray_dir,
const vfloat<K>& ray_tnear,
const vfloat<K>& ray_tfar,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_e1,
const Vec3vf<K>& tri_e2,
const Vec3vf<K>& tri_Ng,
const Epilog& epilog) const
{
/* calculate denominator */
vbool<K> valid = valid0;
const Vec3vf<K> C = tri_v0 - ray_org;
const Vec3vf<K> R = cross(C,ray_dir);
const vfloat<K> den = dot(tri_Ng,ray_dir);
const vfloat<K> absDen = abs(den);
const vfloat<K> sgnDen = signmsk(den);
/* test against edge p2 p0 */
const vfloat<K> U = dot(tri_e2,R) ^ sgnDen;
valid &= U >= 0.0f;
if (likely(none(valid))) return false;
/* test against edge p0 p1 */
const vfloat<K> V = dot(tri_e1,R) ^ sgnDen;
valid &= V >= 0.0f;
if (likely(none(valid))) return false;
/* test against edge p1 p2 */
const vfloat<K> W = absDen-U-V;
valid &= W >= 0.0f;
if (likely(none(valid))) return false;
/* perform depth test */
const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar);
if (unlikely(none(valid))) return false;
/* perform backface culling */
#if defined(EMBREE_BACKFACE_CULLING)
valid &= den < vfloat<K>(zero);
if (unlikely(none(valid))) return false;
#else
valid &= den != vfloat<K>(zero);
if (unlikely(none(valid))) return false;
#endif
/* calculate hit information */
WoopHitK<K> hit(U,V,T,absDen,tri_Ng);
return epilog(valid,hit);
}
/*! Intersects K rays with one of M triangles. */
template<typename Epilog>
__forceinline vbool<K> intersectK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_v1,
const Vec3vf<K>& tri_v2,
const Epilog& epilog) const
{
const Vec3vf<K> e1 = tri_v0-tri_v1;
const Vec3vf<K> e2 = tri_v2-tri_v0;
const Vec3vf<K> Ng = cross(e2,e1);
return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog);
}
/*! Intersects K rays with one of M triangles. */
template<typename Epilog>
__forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0,
RayK<K>& ray,
const Vec3vf<K>& tri_v0,
const Vec3vf<K>& tri_e1,
const Vec3vf<K>& tri_e2,
const Epilog& epilog) const
{
const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1);
return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog);
}
/*! Intersect k'th ray from ray packet of size K with M triangles. */
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
WoopHitM<M>& hit) const
{
/* calculate denominator */
typedef Vec3vf<M> Vec3vfM;
const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1);
const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
const Vec3vfM C = Vec3vfM(tri_v0) - O;
const Vec3vfM R = cross(C,D);
const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
const vfloat<M> absDen = abs(den);
const vfloat<M> sgnDen = signmsk(den);
/* perform edge tests */
const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen;
const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen;
/* perform backface culling */
#if defined(EMBREE_BACKFACE_CULLING)
vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#else
vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
#endif
if (likely(none(valid))) return false;
/* perform depth test */
const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
if (likely(none(valid))) return false;
/* calculate hit information */
new (&hit) WoopHitM<M>(valid,U,V,T,absDen,tri_Ng);
return true;
}
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const BBox<vfloat<M>>& time_range,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
WoopHitM<M>& hit) const
{
if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit)))
{
hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]);
hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper;
return any(hit.valid);
}
return false;
}
template<typename Epilog>
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const Epilog& epilog) const
{
WoopHitM<M> hit;
if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename Epilog>
__forceinline bool intersectEdge(RayK<K>& ray,
size_t k,
const BBox<vfloat<M>>& time_range,
const Vec3vf<M>& tri_v0,
const Vec3vf<M>& tri_e1,
const Vec3vf<M>& tri_e2,
const Epilog& epilog) const
{
WoopHitM<M> hit;
if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
return false;
}
template<typename Epilog>
__forceinline bool intersect(RayK<K>& ray,
size_t k,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const Epilog& epilog) const
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
return intersectEdge(ray,k,v0,e1,e2,epilog);
}
template<typename Epilog>
__forceinline bool intersect(RayK<K>& ray,
size_t k,
const BBox<vfloat<M>>& time_range,
const Vec3vf<M>& v0,
const Vec3vf<M>& v1,
const Vec3vf<M>& v2,
const Epilog& epilog) const
{
const Vec3vf<M> e1 = v0-v1;
const Vec3vf<M> e2 = v2-v0;
return intersectEdge(ray,k,time_range,v0,e1,e2,epilog);
}
};
#endif
}
}

View File

@@ -0,0 +1,132 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "primitive.h"
namespace embree
{
namespace isa
{
struct TriangleTriangleIntersector
{
__forceinline static float T(float pa0, float pa1, float da0, float da1) {
return pa0 + (pa1-pa0)*da0/(da0-da1);
}
__forceinline static bool point_line_side(const Vec2f& p, const Vec2f& a0, const Vec2f& a1) {
return det(p-a0,a0-a1) >= 0.0f;
}
__forceinline static bool point_inside_triangle(const Vec2f& p, const Vec2f& a, const Vec2f& b, const Vec2f& c)
{
const bool pab = point_line_side(p,a,b);
const bool pbc = point_line_side(p,b,c);
const bool pca = point_line_side(p,c,a);
return pab == pbc && pab == pca;
}
__forceinline static bool intersect_line_line(const Vec2f& a0, const Vec2f& a1, const Vec2f& b0, const Vec2f& b1)
{
const bool different_sides0 = point_line_side(b0,a0,a1) != point_line_side(b1,a0,a1);
const bool different_sides1 = point_line_side(a0,b0,b1) != point_line_side(a1,b0,b1);
return different_sides0 && different_sides1;
}
__forceinline static bool intersect_triangle_triangle (const Vec2f& a0, const Vec2f& a1, const Vec2f& a2,
const Vec2f& b0, const Vec2f& b1, const Vec2f& b2)
{
const bool a01_b01 = intersect_line_line(a0,a1,b0,b1);
if (a01_b01) return true;
const bool a01_b12 = intersect_line_line(a0,a1,b1,b2);
if (a01_b12) return true;
const bool a01_b20 = intersect_line_line(a0,a1,b2,b0);
if (a01_b20) return true;
const bool a12_b01 = intersect_line_line(a1,a2,b0,b1);
if (a12_b01) return true;
const bool a12_b12 = intersect_line_line(a1,a2,b1,b2);
if (a12_b12) return true;
const bool a12_b20 = intersect_line_line(a1,a2,b2,b0);
if (a12_b20) return true;
const bool a20_b01 = intersect_line_line(a2,a0,b0,b1);
if (a20_b01) return true;
const bool a20_b12 = intersect_line_line(a2,a0,b1,b2);
if (a20_b12) return true;
const bool a20_b20 = intersect_line_line(a2,a0,b2,b0);
if (a20_b20) return true;
bool a_in_b = point_inside_triangle(a0,b0,b1,b2) && point_inside_triangle(a1,b0,b1,b2) && point_inside_triangle(a2,b0,b1,b2);
if (a_in_b) return true;
bool b_in_a = point_inside_triangle(b0,a0,a1,a2) && point_inside_triangle(b1,a0,a1,a2) && point_inside_triangle(b2,a0,a1,a2);
if (b_in_a) return true;
return false;
}
static bool intersect_triangle_triangle (const Vec3fa& a0, const Vec3fa& a1, const Vec3fa& a2,
const Vec3fa& b0, const Vec3fa& b1, const Vec3fa& b2)
{
const float eps = 1E-5f;
/* calculate triangle planes */
const Vec3fa Na = cross(a1-a0,a2-a0);
const float Ca = dot(Na,a0);
const Vec3fa Nb = cross(b1-b0,b2-b0);
const float Cb = dot(Nb,b0);
/* project triangle A onto plane B */
const float da0 = dot(Nb,a0)-Cb;
const float da1 = dot(Nb,a1)-Cb;
const float da2 = dot(Nb,a2)-Cb;
if (max(da0,da1,da2) < -eps) return false;
if (min(da0,da1,da2) > +eps) return false;
//CSTAT(bvh_collide_prim_intersections4++);
/* project triangle B onto plane A */
const float db0 = dot(Na,b0)-Ca;
const float db1 = dot(Na,b1)-Ca;
const float db2 = dot(Na,b2)-Ca;
if (max(db0,db1,db2) < -eps) return false;
if (min(db0,db1,db2) > +eps) return false;
//CSTAT(bvh_collide_prim_intersections5++);
if (unlikely((std::fabs(da0) < eps && std::fabs(da1) < eps && std::fabs(da2) < eps) ||
(std::fabs(db0) < eps && std::fabs(db1) < eps && std::fabs(db2) < eps)))
{
const size_t dz = maxDim(Na);
const size_t dx = (dz+1)%3;
const size_t dy = (dx+1)%3;
const Vec2f A0(a0[dx],a0[dy]);
const Vec2f A1(a1[dx],a1[dy]);
const Vec2f A2(a2[dx],a2[dy]);
const Vec2f B0(b0[dx],b0[dy]);
const Vec2f B1(b1[dx],b1[dy]);
const Vec2f B2(b2[dx],b2[dy]);
return intersect_triangle_triangle(A0,A1,A2,B0,B1,B2);
}
const Vec3fa D = cross(Na,Nb);
const float pa0 = dot(D,a0);
const float pa1 = dot(D,a1);
const float pa2 = dot(D,a2);
const float pb0 = dot(D,b0);
const float pb1 = dot(D,b1);
const float pb2 = dot(D,b2);
BBox1f ba = empty;
if (min(da0,da1) <= 0.0f && max(da0,da1) >= 0.0f && abs(da0-da1) > 0.0f) ba.extend(T(pa0,pa1,da0,da1));
if (min(da1,da2) <= 0.0f && max(da1,da2) >= 0.0f && abs(da1-da2) > 0.0f) ba.extend(T(pa1,pa2,da1,da2));
if (min(da2,da0) <= 0.0f && max(da2,da0) >= 0.0f && abs(da2-da0) > 0.0f) ba.extend(T(pa2,pa0,da2,da0));
BBox1f bb = empty;
if (min(db0,db1) <= 0.0f && max(db0,db1) >= 0.0f && abs(db0-db1) > 0.0f) bb.extend(T(pb0,pb1,db0,db1));
if (min(db1,db2) <= 0.0f && max(db1,db2) >= 0.0f && abs(db1-db2) > 0.0f) bb.extend(T(pb1,pb2,db1,db2));
if (min(db2,db0) <= 0.0f && max(db2,db0) >= 0.0f && abs(db2-db0) > 0.0f) bb.extend(T(pb2,pb0,db2,db0));
return conjoint(ba,bb);
}
};
}
}

View File

@@ -0,0 +1,442 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
#include "../common/scene.h"
namespace embree
{
/* Stores M triangles from an indexed face set */
template <int M>
struct TriangleMi
{
/* Virtual interface to query information about the triangle type */
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports multiple time segments */
static const bool singleTimeSegment = false;
/* Returns maximum number of stored triangles */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
public:
/* Default constructor */
__forceinline TriangleMi() { }
/* Construction from vertices and IDs */
__forceinline TriangleMi(const vuint<M>& v0,
const vuint<M>& v1,
const vuint<M>& v2,
const vuint<M>& geomIDs,
const vuint<M>& primIDs)
#if defined(EMBREE_COMPACT_POLYS)
: geomIDs(geomIDs), primIDs(primIDs) {}
#else
: v0_(v0), v1_(v1), v2_(v2), geomIDs(geomIDs), primIDs(primIDs) {}
#endif
/* Returns a mask that tells which triangles are valid */
__forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
/* Returns if the specified triangle is valid */
__forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
/* Returns the number of stored triangles */
__forceinline size_t size() const { return bsf(~movemask(valid())); }
/* Returns the geometry IDs */
__forceinline vuint<M> geomID() const { return geomIDs; }
__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
/* Returns the primitive IDs */
__forceinline vuint<M> primID() const { return primIDs; }
__forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
/* Calculate the bounds of the triangles */
__forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
{
BBox3fa bounds = empty;
for (size_t i=0; i<M && valid(i); i++) {
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
bounds.extend(mesh->bounds(primID(i),itime));
}
return bounds;
}
/* Calculate the linear bounds of the primitive */
__forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime) {
return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
{
LBBox3fa allBounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps));
}
return allBounds;
}
__forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
{
LBBox3fa allBounds = empty;
for (size_t i=0; i<M && valid(i); i++)
{
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
allBounds.extend(mesh->linearBounds(primID(i), time_range));
}
return allBounds;
}
/* Non-temporal store */
__forceinline static void store_nt(TriangleMi* dst, const TriangleMi& src)
{
#if !defined(EMBREE_COMPACT_POLYS)
vuint<M>::store_nt(&dst->v0_,src.v0_);
vuint<M>::store_nt(&dst->v1_,src.v1_);
vuint<M>::store_nt(&dst->v2_,src.v2_);
#endif
vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
vuint<M>::store_nt(&dst->primIDs,src.primIDs);
}
/* Fill triangle from triangle list */
template<typename PrimRefT>
__forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
{
vuint<M> v0 = zero, v1 = zero, v2 = zero;
vuint<M> geomID = -1, primID = -1;
const PrimRefT* prim = &prims[begin];
for (size_t i=0; i<M; i++)
{
if (begin<end) {
geomID[i] = prim->geomID();
primID[i] = prim->primID();
#if !defined(EMBREE_COMPACT_POLYS)
const TriangleMesh* mesh = scene->get<TriangleMesh>(prim->geomID());
const TriangleMesh::Triangle& tri = mesh->triangle(prim->primID());
unsigned int int_stride = mesh->vertices0.getStride()/4;
v0[i] = tri.v[0] * int_stride;
v1[i] = tri.v[1] * int_stride;
v2[i] = tri.v[2] * int_stride;
#endif
begin++;
} else {
assert(i);
if (likely(i > 0)) {
geomID[i] = geomID[0];
primID[i] = -1;
v0[i] = v0[0];
v1[i] = v0[0];
v2[i] = v0[0];
}
}
if (begin<end) prim = &prims[begin];
}
new (this) TriangleMi(v0,v1,v2,geomID,primID); // FIXME: use non temporal store
}
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
{
fill(prims, begin, end, scene);
return linearBounds(scene, itime);
}
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
{
fill(prims, begin, end, scene);
return linearBounds(scene, time_range);
}
/* Updates the primitive */
__forceinline BBox3fa update(TriangleMesh* mesh)
{
BBox3fa bounds = empty;
for (size_t i=0; i<M; i++)
{
if (primID(i) == -1) break;
const unsigned int primId = primID(i);
const TriangleMesh::Triangle& tri = mesh->triangle(primId);
const Vec3fa p0 = mesh->vertex(tri.v[0]);
const Vec3fa p1 = mesh->vertex(tri.v[1]);
const Vec3fa p2 = mesh->vertex(tri.v[2]);
bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
}
return bounds;
}
protected:
#if !defined(EMBREE_COMPACT_POLYS)
vuint<M> v0_; // 4 byte offset of 1st vertex
vuint<M> v1_; // 4 byte offset of 2nd vertex
vuint<M> v2_; // 4 byte offset of 3rd vertex
#endif
vuint<M> geomIDs; // geometry ID of mesh
vuint<M> primIDs; // primitive ID of primitive inside mesh
};
namespace isa
{
template<int M>
struct TriangleMi : public embree::TriangleMi<M>
{
#if !defined(EMBREE_COMPACT_POLYS)
using embree::TriangleMi<M>::v0_;
using embree::TriangleMi<M>::v1_;
using embree::TriangleMi<M>::v2_;
#endif
using embree::TriangleMi<M>::geomIDs;
using embree::TriangleMi<M>::primIDs;
using embree::TriangleMi<M>::geomID;
using embree::TriangleMi<M>::primID;
using embree::TriangleMi<M>::valid;
/* loads a single vertex */
template<int vid>
__forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const
{
#if defined(EMBREE_COMPACT_POLYS)
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
return (Vec3f) mesh->vertices[0][tri.v[vid]];
#else
const vuint<M>& v = getVertexOffset<vid>();
const float* vertices = scene->vertices[geomID(index)];
return (Vec3f&) vertices[v[index]];
#endif
}
template<int vid, typename T>
__forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const
{
#if defined(EMBREE_COMPACT_POLYS)
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
const Vec3fa v0 = mesh->vertices[itime+0][tri.v[vid]];
const Vec3fa v1 = mesh->vertices[itime+1][tri.v[vid]];
#else
const vuint<M>& v = getVertexOffset<vid>();
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0);
const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1);
const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
#endif
const Vec3<T> p0(v0.x,v0.y,v0.z);
const Vec3<T> p1(v1.x,v1.y,v1.z);
return lerp(p0,p1,ftime);
}
template<int vid, int K, typename T>
__forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const
{
Vec3<T> p0, p1;
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask))
{
#if defined(EMBREE_COMPACT_POLYS)
const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
const Vec3fa v0 = mesh->vertices[itime[i]+0][tri.v[vid]];
const Vec3fa v1 = mesh->vertices[itime[i]+1][tri.v[vid]];
#else
const vuint<M>& v = getVertexOffset<vid>();
const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0);
const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1);
const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
#endif
p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z;
p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z;
}
return (T(one)-ftime)*p0 + ftime*p1;
}
struct Triangle {
vfloat4 v0,v1,v2;
};
#if defined(EMBREE_COMPACT_POLYS)
__forceinline Triangle loadTriangle(const int i, const Scene* const scene) const
{
const unsigned int geomID = geomIDs[i];
const unsigned int primID = primIDs[i];
if (unlikely(primID == -1)) return { zero, zero, zero };
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID);
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const vfloat4 v0 = (vfloat4) mesh->vertices0[tri.v[0]];
const vfloat4 v1 = (vfloat4) mesh->vertices0[tri.v[1]];
const vfloat4 v2 = (vfloat4) mesh->vertices0[tri.v[2]];
return { v0, v1, v2 };
}
__forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const
{
const unsigned int primID = primIDs[i];
if (unlikely(primID == -1)) return { zero, zero, zero };
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const vfloat4 v0 = (vfloat4) mesh->vertices[itime][tri.v[0]];
const vfloat4 v1 = (vfloat4) mesh->vertices[itime][tri.v[1]];
const vfloat4 v2 = (vfloat4) mesh->vertices[itime][tri.v[2]];
return { v0, v1, v2 };
}
#else
__forceinline Triangle loadTriangle(const int i, const Scene* const scene) const
{
const float* vertices = scene->vertices[geomID(i)];
const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
return { v0, v1, v2 };
}
__forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const
{
const float* vertices = (const float*) mesh->vertexPtr(0,itime);
const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
return { v0, v1, v2 };
}
#endif
/* Gather the triangles */
__forceinline void gather(Vec3vf<M>& p0, Vec3vf<M>& p1, Vec3vf<M>& p2, const Scene* const scene) const;
template<int K>
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019
__noinline
#else
__forceinline
#endif
void gather(const vbool<K>& valid,
Vec3vf<K>& p0,
Vec3vf<K>& p1,
Vec3vf<K>& p2,
const size_t index,
const Scene* const scene,
const vfloat<K>& time) const
{
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
vfloat<K> ftime;
const vint<K> itime = mesh->timeSegment<K>(time, ftime);
const size_t first = bsf(movemask(valid));
if (likely(all(valid,itime[first] == itime)))
{
p0 = getVertex<0>(index, scene, itime[first], ftime);
p1 = getVertex<1>(index, scene, itime[first], ftime);
p2 = getVertex<2>(index, scene, itime[first], ftime);
} else {
p0 = getVertex<0,K>(valid, index, scene, itime, ftime);
p1 = getVertex<1,K>(valid, index, scene, itime, ftime);
p2 = getVertex<2,K>(valid, index, scene, itime, ftime);
}
}
__forceinline void gather(Vec3vf<M>& p0,
Vec3vf<M>& p1,
Vec3vf<M>& p2,
const TriangleMesh* mesh,
const Scene *const scene,
const int itime) const;
__forceinline void gather(Vec3vf<M>& p0,
Vec3vf<M>& p1,
Vec3vf<M>& p2,
const Scene *const scene,
const float time) const;
#if !defined(EMBREE_COMPACT_POLYS)
template<int N> const vuint<M>& getVertexOffset() const;
#endif
};
#if !defined(EMBREE_COMPACT_POLYS)
template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<0>() const { return v0_; }
template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<1>() const { return v1_; }
template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<2>() const { return v2_; }
#endif
template<>
__forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
const Scene* const scene) const
{
const Triangle tri0 = loadTriangle(0,scene);
const Triangle tri1 = loadTriangle(1,scene);
const Triangle tri2 = loadTriangle(2,scene);
const Triangle tri3 = loadTriangle(3,scene);
transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
}
template<>
__forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
const TriangleMesh* mesh,
const Scene *const scene,
const int itime) const
{
const Triangle tri0 = loadTriangle(0,itime,mesh);
const Triangle tri1 = loadTriangle(1,itime,mesh);
const Triangle tri2 = loadTriangle(2,itime,mesh);
const Triangle tri3 = loadTriangle(3,itime,mesh);
transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
}
template<>
__forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
Vec3vf4& p1,
Vec3vf4& p2,
const Scene *const scene,
const float time) const
{
const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(0)); // in mblur mode all geometries are identical
float ftime;
const int itime = mesh->timeSegment(time, ftime);
Vec3vf4 a0,a1,a2; gather(a0,a1,a2,mesh,scene,itime);
Vec3vf4 b0,b1,b2; gather(b0,b1,b2,mesh,scene,itime+1);
p0 = lerp(a0,b0,vfloat4(ftime));
p1 = lerp(a1,b1,vfloat4(ftime));
p2 = lerp(a2,b2,vfloat4(ftime));
}
}
template<int M>
typename TriangleMi<M>::Type TriangleMi<M>::type;
typedef TriangleMi<4> Triangle4i;
}

View File

@@ -0,0 +1,336 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "trianglei.h"
#include "triangle_intersector_moeller.h"
#include "triangle_intersector_pluecker.h"
namespace embree
{
namespace isa
{
/*! Intersects M triangles with 1 ray */
template<int M, bool filter>
struct TriangleMiIntersector1Moeller
{
typedef TriangleMi<M> Primitive;
typedef MoellerTrumboreIntersector1<M> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M triangles with K rays */
template<int M, int K, bool filter>
struct TriangleMiIntersectorKMoeller
{
typedef TriangleMi<M> Primitive;
typedef MoellerTrumboreIntersectorK<M,K> Precalculations;
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
const Scene* scene = context->scene;
for (size_t i=0; i<Primitive::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
vbool<K> valid0 = valid_i;
const Scene* scene = context->scene;
for (size_t i=0; i<Primitive::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
/*! Intersects M triangles with 1 ray */
template<int M, bool filter>
struct TriangleMiIntersector1Pluecker
{
typedef TriangleMi<M> Primitive;
typedef PlueckerIntersector1<M> Precalculations;
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M triangles with K rays */
template<int M, int K, bool filter>
struct TriangleMiIntersectorKPluecker
{
typedef TriangleMi<M> Primitive;
typedef PlueckerIntersectorK<M,K> Precalculations;
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
const Scene* scene = context->scene;
for (size_t i=0; i<Primitive::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
vbool<K> valid0 = valid_i;
const Scene* scene = context->scene;
for (size_t i=0; i<Primitive::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
/*! Intersects M motion blur triangles with 1 ray */
template<int M, bool filter>
struct TriangleMiMBIntersector1Moeller
{
typedef TriangleMi<M> Primitive;
typedef MoellerTrumboreIntersector1<M> Precalculations;
/*! Intersect a ray with the M triangles and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M motion blur triangles with K rays. */
template<int M, int K, bool filter>
struct TriangleMiMBIntersectorKMoeller
{
typedef TriangleMi<M> Primitive;
typedef MoellerTrumboreIntersectorK<M,K> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const TriangleMi<M>& tri)
{
for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time());
pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const TriangleMi<M>& tri)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time());
pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const TriangleMi<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const TriangleMi<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
/*! Intersects M motion blur triangles with 1 ray */
template<int M, bool filter>
struct TriangleMiMBIntersector1Pluecker
{
typedef TriangleMi<M> Primitive;
typedef PlueckerIntersector1<M> Precalculations;
/*! Intersect a ray with the M triangles and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M motion blur triangles with K rays. */
template<int M, int K, bool filter>
struct TriangleMiMBIntersectorKPluecker
{
typedef TriangleMi<M> Primitive;
typedef PlueckerIntersectorK<M,K> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const TriangleMi<M>& tri)
{
for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time());
pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const TriangleMi<M>& tri)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time());
pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const TriangleMi<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const TriangleMi<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
}
}

View File

@@ -0,0 +1,157 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
namespace embree
{
/* Stores the vertices of M triangles in struct of array layout */
template <int M>
struct TriangleMv
{
public:
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* Returns maximum number of stored triangles */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
public:
/* Default constructor */
__forceinline TriangleMv() {}
/* Construction from vertices and IDs */
__forceinline TriangleMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs)
: v0(v0), v1(v1), v2(v2), geomIDs(geomIDs), primIDs(primIDs) {}
/* Returns a mask that tells which triangles are valid */
__forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
/* Returns true if the specified triangle is valid */
__forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
/* Returns the number of stored triangles */
__forceinline size_t size() const { return bsf(~movemask(valid())); }
/* Returns the geometry IDs */
__forceinline vuint<M>& geomID() { return geomIDs; }
__forceinline const vuint<M>& geomID() const { return geomIDs; }
__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
/* Returns the primitive IDs */
__forceinline vuint<M>& primID() { return primIDs; }
__forceinline const vuint<M>& primID() const { return primIDs; }
__forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
/* Calculate the bounds of the triangles */
__forceinline BBox3fa bounds() const
{
Vec3vf<M> lower = min(v0,v1,v2);
Vec3vf<M> upper = max(v0,v1,v2);
vbool<M> mask = valid();
lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
}
/* Non temporal store */
__forceinline static void store_nt(TriangleMv* dst, const TriangleMv& src)
{
vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
vfloat<M>::store_nt(&dst->v1.x,src.v1.x);
vfloat<M>::store_nt(&dst->v1.y,src.v1.y);
vfloat<M>::store_nt(&dst->v1.z,src.v1.z);
vfloat<M>::store_nt(&dst->v2.x,src.v2.x);
vfloat<M>::store_nt(&dst->v2.y,src.v2.y);
vfloat<M>::store_nt(&dst->v2.z,src.v2.z);
vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
vuint<M>::store_nt(&dst->primIDs,src.primIDs);
}
/* Fill triangle from triangle list */
__forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
{
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
for (size_t i=0; i<M && begin<end; i++, begin++)
{
const PrimRef& prim = prims[begin];
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
vgeomID [i] = geomID;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
TriangleMv::store_nt(this,TriangleMv(v0,v1,v2,vgeomID,vprimID));
}
/* Updates the primitive */
__forceinline BBox3fa update(TriangleMesh* mesh)
{
BBox3fa bounds = empty;
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
for (size_t i=0; i<M; i++)
{
if (primID(i) == -1) break;
const unsigned geomId = geomID(i);
const unsigned primId = primID(i);
const TriangleMesh::Triangle& tri = mesh->triangle(primId);
const Vec3fa p0 = mesh->vertex(tri.v[0]);
const Vec3fa p1 = mesh->vertex(tri.v[1]);
const Vec3fa p2 = mesh->vertex(tri.v[2]);
bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
vgeomID [i] = geomId;
vprimID [i] = primId;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
new (this) TriangleMv(v0,v1,v2,vgeomID,vprimID);
return bounds;
}
public:
Vec3vf<M> v0; // 1st vertex of the triangles
Vec3vf<M> v1; // 2nd vertex of the triangles
Vec3vf<M> v2; // 3rd vertex of the triangles
private:
vuint<M> geomIDs; // geometry ID
vuint<M> primIDs; // primitive ID
};
template<int M>
typename TriangleMv<M>::Type TriangleMv<M>::type;
typedef TriangleMv<4> Triangle4v;
}

View File

@@ -0,0 +1,206 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "triangle.h"
#include "triangle_intersector_pluecker.h"
#include "triangle_intersector_moeller.h"
#include "triangle_intersector_woop.h"
namespace embree
{
namespace isa
{
/*! Intersects M triangles with 1 ray */
template<int M, bool filter>
struct TriangleMvIntersector1Moeller
{
typedef TriangleMv<M> Primitive;
typedef MoellerTrumboreIntersector1<M> Precalculations;
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
template<int M, bool filter>
struct TriangleMvIntersector1Woop
{
typedef TriangleMv<M> Primitive;
typedef WoopIntersector1<M> intersec;
typedef WoopPrecalculations1<M> Precalculations;
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
return intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M triangles with K rays */
template<int M, int K, bool filter>
struct TriangleMvIntersectorKMoeller
{
typedef TriangleMv<M> Primitive;
typedef MoellerTrumboreIntersectorK<M,K> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
for (size_t i=0; i<M; i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<M; i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M
}
};
/*! Intersects M triangles with 1 ray */
template<int M, bool filter>
struct TriangleMvIntersector1Pluecker
{
typedef TriangleMv<M> Primitive;
typedef PlueckerIntersector1<M> Precalculations;
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHit& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M triangles with K rays */
template<int M, int K, bool filter>
struct TriangleMvIntersectorKPluecker
{
typedef TriangleMv<M> Primitive;
typedef PlueckerIntersectorK<M,K> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
for (size_t i=0; i<M; i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive& tri)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<M; i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(normal.trav_prims,1,1,1);
pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive& tri)
{
STAT3(shadow.trav_prims,1,1,1);
return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
}
}

View File

@@ -0,0 +1,200 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primitive.h"
namespace embree
{
/* Stores the vertices of M triangles in struct of array layout */
template<int M>
struct TriangleMvMB
{
public:
struct Type : public PrimitiveType
{
const char* name() const;
size_t sizeActive(const char* This) const;
size_t sizeTotal(const char* This) const;
size_t getBytes(const char* This) const;
};
static Type type;
public:
/* primitive supports single time segments */
static const bool singleTimeSegment = true;
/* Returns maximum number of stored triangles */
static __forceinline size_t max_size() { return M; }
/* Returns required number of primitive blocks for N primitives */
static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
public:
/* Default constructor */
__forceinline TriangleMvMB() {}
/* Construction from vertices and IDs */
__forceinline TriangleMvMB(const Vec3vf<M>& a0, const Vec3vf<M>& a1,
const Vec3vf<M>& b0, const Vec3vf<M>& b1,
const Vec3vf<M>& c0, const Vec3vf<M>& c1,
const vuint<M>& geomIDs, const vuint<M>& primIDs)
: v0(a0), v1(b0), v2(c0), dv0(a1-a0), dv1(b1-b0), dv2(c1-c0), geomIDs(geomIDs), primIDs(primIDs) {}
/* Returns a mask that tells which triangles are valid */
__forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
/* Returns if the specified triangle is valid */
__forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
/* Returns the number of stored triangles */
__forceinline size_t size() const { return bsf(~movemask(valid())); }
/* Returns the geometry IDs */
__forceinline vuint<M>& geomID() { return geomIDs; }
__forceinline const vuint<M>& geomID() const { return geomIDs; }
__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
/* Returns the primitive IDs */
__forceinline vuint<M>& primID() { return primIDs; }
__forceinline const vuint<M>& primID() const { return primIDs; }
__forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
/* Calculate the bounds of the triangles at t0 */
__forceinline BBox3fa bounds0() const
{
Vec3vf<M> lower = min(v0,v1,v2);
Vec3vf<M> upper = max(v0,v1,v2);
const vbool<M> mask = valid();
lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
}
/* Calculate the bounds of the triangles at t1 */
__forceinline BBox3fa bounds1() const
{
const Vec3vf<M> p0 = v0+dv0;
const Vec3vf<M> p1 = v1+dv1;
const Vec3vf<M> p2 = v2+dv2;
Vec3vf<M> lower = min(p0,p1,p2);
Vec3vf<M> upper = max(p0,p1,p2);
const vbool<M> mask = valid();
lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
}
/* Calculate the linear bounds of the primitive */
__forceinline LBBox3fa linearBounds() const {
return LBBox3fa(bounds0(),bounds1());
}
/* Fill triangle from triangle list */
__forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
{
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero;
Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero;
BBox3fa bounds0 = empty;
BBox3fa bounds1 = empty;
for (size_t i=0; i<M && begin<end; i++, begin++)
{
const PrimRef& prim = prims[begin];
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& a0 = mesh->vertex(tri.v[0],size_t(itime+0)); bounds0.extend(a0);
const Vec3fa& a1 = mesh->vertex(tri.v[0],size_t(itime+1)); bounds1.extend(a1);
const Vec3fa& b0 = mesh->vertex(tri.v[1],size_t(itime+0)); bounds0.extend(b0);
const Vec3fa& b1 = mesh->vertex(tri.v[1],size_t(itime+1)); bounds1.extend(b1);
const Vec3fa& c0 = mesh->vertex(tri.v[2],size_t(itime+0)); bounds0.extend(c0);
const Vec3fa& c1 = mesh->vertex(tri.v[2],size_t(itime+1)); bounds1.extend(c1);
vgeomID [i] = geomID;
vprimID [i] = primID;
va0.x[i] = a0.x; va0.y[i] = a0.y; va0.z[i] = a0.z;
va1.x[i] = a1.x; va1.y[i] = a1.y; va1.z[i] = a1.z;
vb0.x[i] = b0.x; vb0.y[i] = b0.y; vb0.z[i] = b0.z;
vb1.x[i] = b1.x; vb1.y[i] = b1.y; vb1.z[i] = b1.z;
vc0.x[i] = c0.x; vc0.y[i] = c0.y; vc0.z[i] = c0.z;
vc1.x[i] = c1.x; vc1.y[i] = c1.y; vc1.z[i] = c1.z;
}
new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID);
return LBBox3fa(bounds0,bounds1);
}
/* Fill triangle from triangle list */
__forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
{
vuint<M> vgeomID = -1, vprimID = -1;
Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero;
Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero;
LBBox3fa allBounds = empty;
for (size_t i=0; i<M && begin<end; i++, begin++)
{
const PrimRefMB& prim = prims[begin];
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const TriangleMesh* const mesh = scene->get<TriangleMesh>(geomID);
const range<int> itime_range = mesh->timeSegmentRange(time_range);
assert(itime_range.size() == 1);
const int ilower = itime_range.begin();
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
allBounds.extend(mesh->linearBounds(primID, time_range));
const Vec3fa& a0 = mesh->vertex(tri.v[0],size_t(ilower+0));
const Vec3fa& a1 = mesh->vertex(tri.v[0],size_t(ilower+1));
const Vec3fa& b0 = mesh->vertex(tri.v[1],size_t(ilower+0));
const Vec3fa& b1 = mesh->vertex(tri.v[1],size_t(ilower+1));
const Vec3fa& c0 = mesh->vertex(tri.v[2],size_t(ilower+0));
const Vec3fa& c1 = mesh->vertex(tri.v[2],size_t(ilower+1));
const BBox1f time_range_v(mesh->timeStep(ilower+0),mesh->timeStep(ilower+1));
auto a01 = globalLinear(std::make_pair(a0,a1),time_range_v);
auto b01 = globalLinear(std::make_pair(b0,b1),time_range_v);
auto c01 = globalLinear(std::make_pair(c0,c1),time_range_v);
vgeomID [i] = geomID;
vprimID [i] = primID;
va0.x[i] = a01.first .x; va0.y[i] = a01.first .y; va0.z[i] = a01.first .z;
va1.x[i] = a01.second.x; va1.y[i] = a01.second.y; va1.z[i] = a01.second.z;
vb0.x[i] = b01.first .x; vb0.y[i] = b01.first .y; vb0.z[i] = b01.first .z;
vb1.x[i] = b01.second.x; vb1.y[i] = b01.second.y; vb1.z[i] = b01.second.z;
vc0.x[i] = c01.first .x; vc0.y[i] = c01.first .y; vc0.z[i] = c01.first .z;
vc1.x[i] = c01.second.x; vc1.y[i] = c01.second.y; vc1.z[i] = c01.second.z;
}
new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID);
return allBounds;
}
public:
Vec3vf<M> v0; // 1st vertex of the triangles
Vec3vf<M> v1; // 2nd vertex of the triangles
Vec3vf<M> v2; // 3rd vertex of the triangles
Vec3vf<M> dv0; // difference vector between time steps t0 and t1 for first vertex
Vec3vf<M> dv1; // difference vector between time steps t0 and t1 for second vertex
Vec3vf<M> dv2; // difference vector between time steps t0 and t1 for third vertex
private:
vuint<M> geomIDs; // geometry ID
vuint<M> primIDs; // primitive ID
};
template<int M>
typename TriangleMvMB<M>::Type TriangleMvMB<M>::type;
typedef TriangleMvMB<4> Triangle4vMB;
}

View File

@@ -0,0 +1,211 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "triangle.h"
#include "intersector_epilog.h"
namespace embree
{
namespace isa
{
/*! Intersects M motion blur triangles with 1 ray */
template<int M, bool filter>
struct TriangleMvMBIntersector1Moeller
{
typedef TriangleMvMB<M> Primitive;
typedef MoellerTrumboreIntersector1<M> Precalculations;
/*! Intersect a ray with the M triangles and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time());
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time());
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M motion blur triangles with K rays. */
template<int M, int K, bool filter>
struct TriangleMvMBIntersectorKMoeller
{
typedef TriangleMvMB<M> Primitive;
typedef MoellerTrumboreIntersectorK<M,K> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> time(ray.time());
const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
const Vec3vf<K> time(ray.time());
const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time()[k]);
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time()[k]);
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
/*! Intersects M motion blur triangles with 1 ray */
template<int M, bool filter>
struct TriangleMvMBIntersector1Pluecker
{
typedef TriangleMvMB<M> Primitive;
typedef PlueckerIntersector1<M> Precalculations;
/*! Intersect a ray with the M triangles and updates the hit. */
static __forceinline void intersect(const Precalculations& pre, RayHit& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time());
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
pre.intersect(ray,v0,v1,v2,UVIdentity<M>(),Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of M triangles. */
static __forceinline bool occluded(const Precalculations& pre, Ray& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time());
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
return pre.intersect(ray,v0,v1,v2,UVIdentity<M>(),Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID()));
}
static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
{
return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
}
};
/*! Intersects M motion blur triangles with K rays. */
template<int M, int K, bool filter>
struct TriangleMvMBIntersectorKPluecker
{
typedef TriangleMvMB<M> Primitive;
typedef PlueckerIntersectorK<M,K> Precalculations;
/*! Intersects K rays with M triangles. */
static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(normal.trav_prims,1,popcnt(valid_i),K);
const Vec3vf<K> time(ray.time());
const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
}
}
/*! Test for K rays if they are occluded by any of the M triangles. */
static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
vbool<K> valid0 = valid_i;
for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
{
if (!tri.valid(i)) break;
STAT3(shadow.trav_prims,1,popcnt(valid0),K);
const Vec3vf<K> time(ray.time());
const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
if (none(valid0)) break;
}
return !valid0;
}
/*! Intersect a ray with M triangles and updates the hit. */
static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(normal.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time()[k]);
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
pre.intersect(ray,k,v0,v1,v2,UVIdentity<M>(),Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
/*! Test if the ray is occluded by one of the M triangles. */
static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const TriangleMvMB<M>& tri)
{
STAT3(shadow.trav_prims,1,1,1);
const Vec3vf<M> time(ray.time()[k]);
const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0));
const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1));
const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2));
return pre.intersect(ray,k,v0,v1,v2,UVIdentity<M>(),Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
}
};
}
}