initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

View File

@@ -0,0 +1,361 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "linearspace2.h"
#include "linearspace3.h"
#include "quaternion.h"
#include "bbox.h"
#include "vec4.h"
namespace embree
{
#define VectorT typename L::Vector
#define ScalarT typename L::Vector::Scalar
////////////////////////////////////////////////////////////////////////////////
// Affine Space
////////////////////////////////////////////////////////////////////////////////
template<typename L>
struct AffineSpaceT
{
L l; /*< linear part of affine space */
VectorT p; /*< affine part of affine space */
////////////////////////////////////////////////////////////////////////////////
// Constructors, Assignment, Cast, Copy Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline AffineSpaceT ( ) { }
__forceinline AffineSpaceT ( const AffineSpaceT& other ) { l = other.l; p = other.p; }
__forceinline AffineSpaceT ( const L & other ) { l = other ; p = VectorT(zero); }
__forceinline AffineSpaceT& operator=( const AffineSpaceT& other ) { l = other.l; p = other.p; return *this; }
__forceinline AffineSpaceT( const VectorT& vx, const VectorT& vy, const VectorT& vz, const VectorT& p ) : l(vx,vy,vz), p(p) {}
__forceinline AffineSpaceT( const L& l, const VectorT& p ) : l(l), p(p) {}
template<typename L1> __forceinline AffineSpaceT( const AffineSpaceT<L1>& s ) : l(s.l), p(s.p) {}
////////////////////////////////////////////////////////////////////////////////
// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline AffineSpaceT( ZeroTy ) : l(zero), p(zero) {}
__forceinline AffineSpaceT( OneTy ) : l(one), p(zero) {}
/*! return matrix for scaling */
static __forceinline AffineSpaceT scale(const VectorT& s) { return L::scale(s); }
/*! return matrix for translation */
static __forceinline AffineSpaceT translate(const VectorT& p) { return AffineSpaceT(one,p); }
/*! return matrix for rotation, only in 2D */
static __forceinline AffineSpaceT rotate(const ScalarT& r) { return L::rotate(r); }
/*! return matrix for rotation around arbitrary point (2D) or axis (3D) */
static __forceinline AffineSpaceT rotate(const VectorT& u, const ScalarT& r) { return L::rotate(u,r); }
/*! return matrix for rotation around arbitrary axis and point, only in 3D */
static __forceinline AffineSpaceT rotate(const VectorT& p, const VectorT& u, const ScalarT& r) { return translate(+p) * rotate(u,r) * translate(-p); }
/*! return matrix for looking at given point, only in 3D */
static __forceinline AffineSpaceT lookat(const VectorT& eye, const VectorT& point, const VectorT& up) {
VectorT Z = normalize(point-eye);
VectorT U = normalize(cross(up,Z));
VectorT V = normalize(cross(Z,U));
return AffineSpaceT(L(U,V,Z),eye);
}
};
// template specialization to get correct identity matrix for type AffineSpace3fa
template<>
__forceinline AffineSpaceT<LinearSpace3ff>::AffineSpaceT( OneTy ) : l(one), p(0.f, 0.f, 0.f, 1.f) {}
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline AffineSpaceT<L> operator -( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(-a.l,-a.p); }
template<typename L> __forceinline AffineSpaceT<L> operator +( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(+a.l,+a.p); }
template<typename L> __forceinline AffineSpaceT<L> rcp( const AffineSpaceT<L>& a ) { L il = rcp(a.l); return AffineSpaceT<L>(il,-(il*a.p)); }
////////////////////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline const AffineSpaceT<L> operator +( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l+b.l,a.p+b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator -( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l-b.l,a.p-b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator *( const ScalarT & a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a*b.l,a*b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator *( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l*b.l,a.l*b.p+a.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a * rcp(b); }
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const ScalarT & b ) { return a * rcp(b); }
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a * b; }
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a * b; }
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a / b; }
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a / b; }
template<typename L> __forceinline VectorT xfmPoint (const AffineSpaceT<L>& m, const VectorT& p) { return madd(VectorT(p.x),m.l.vx,madd(VectorT(p.y),m.l.vy,madd(VectorT(p.z),m.l.vz,m.p))); }
template<typename L> __forceinline VectorT xfmVector(const AffineSpaceT<L>& m, const VectorT& v) { return xfmVector(m.l,v); }
template<typename L> __forceinline VectorT xfmNormal(const AffineSpaceT<L>& m, const VectorT& n) { return xfmNormal(m.l,n); }
__forceinline const BBox<Vec3fa> xfmBounds(const AffineSpaceT<LinearSpace3<Vec3fa> >& m, const BBox<Vec3fa>& b)
{
BBox3fa dst = empty;
const Vec3fa p0(b.lower.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p0));
const Vec3fa p1(b.lower.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p1));
const Vec3fa p2(b.lower.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p2));
const Vec3fa p3(b.lower.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p3));
const Vec3fa p4(b.upper.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p4));
const Vec3fa p5(b.upper.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p5));
const Vec3fa p6(b.upper.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p6));
const Vec3fa p7(b.upper.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p7));
return dst;
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline bool operator ==( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l == b.l && a.p == b.p; }
template<typename L> __forceinline bool operator !=( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l != b.l || a.p != b.p; }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline AffineSpaceT<L> select ( const typename L::Vector::Scalar::Bool& s, const AffineSpaceT<L>& t, const AffineSpaceT<L>& f ) {
return AffineSpaceT<L>(select(s,t.l,f.l),select(s,t.p,f.p));
}
////////////////////////////////////////////////////////////////////////////////
// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> static embree_ostream operator<<(embree_ostream cout, const AffineSpaceT<L>& m) {
return cout << "{ l = " << m.l << ", p = " << m.p << " }";
}
////////////////////////////////////////////////////////////////////////////////
// Template Instantiations
////////////////////////////////////////////////////////////////////////////////
typedef AffineSpaceT<LinearSpace2f> AffineSpace2f;
typedef AffineSpaceT<LinearSpace3f> AffineSpace3f;
typedef AffineSpaceT<LinearSpace3fa> AffineSpace3fa;
typedef AffineSpaceT<LinearSpace3fx> AffineSpace3fx;
typedef AffineSpaceT<LinearSpace3ff> AffineSpace3ff;
typedef AffineSpaceT<Quaternion3f > OrthonormalSpace3f;
template<int N> using AffineSpace3vf = AffineSpaceT<LinearSpace3<Vec3<vfloat<N>>>>;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<4>>>> AffineSpace3vf4;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<8>>>> AffineSpace3vf8;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<16>>>> AffineSpace3vf16;
template<int N> using AffineSpace3vff = AffineSpaceT<LinearSpace3<Vec4<vfloat<N>>>>;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<4>>>> AffineSpace3vfa4;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<8>>>> AffineSpace3vfa8;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<16>>>> AffineSpace3vfa16;
//////////////////////////////////////////////////////////////////////////////
/// Interpolation
//////////////////////////////////////////////////////////////////////////////
template<typename T, typename R>
__forceinline AffineSpaceT<T> lerp(const AffineSpaceT<T>& M0,
const AffineSpaceT<T>& M1,
const R& t)
{
return AffineSpaceT<T>(lerp(M0.l,M1.l,t),lerp(M0.p,M1.p,t));
}
// slerp interprets the 16 floats of the matrix M = D * R * S as components of
// three matrizes (D, R, S) that are interpolated individually.
template<typename T> __forceinline AffineSpaceT<LinearSpace3<Vec3<T>>>
slerp(const AffineSpaceT<LinearSpace3<Vec4<T>>>& M0,
const AffineSpaceT<LinearSpace3<Vec4<T>>>& M1,
const T& t)
{
QuaternionT<T> q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
QuaternionT<T> q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
QuaternionT<T> q = slerp(q0, q1, t);
AffineSpaceT<LinearSpace3<Vec3<T>>> S = lerp(M0, M1, t);
AffineSpaceT<LinearSpace3<Vec3<T>>> D(one);
D.p.x = S.l.vx.y;
D.p.y = S.l.vx.z;
D.p.z = S.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
AffineSpaceT<LinearSpace3<Vec3<T>>> R = LinearSpace3<Vec3<T>>(q);
return D * R * S;
}
// this is a specialized version for Vec3fa because that does
// not play along nicely with the other templated Vec3/Vec4 types
__forceinline AffineSpace3fa slerp(const AffineSpace3ff& M0,
const AffineSpace3ff& M1,
const float& t)
{
Quaternion3f q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
Quaternion3f q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
Quaternion3f q = slerp(q0, q1, t);
AffineSpace3fa S = lerp(M0, M1, t);
AffineSpace3fa D(one);
D.p.x = S.l.vx.y;
D.p.y = S.l.vx.z;
D.p.z = S.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
AffineSpace3fa R = LinearSpace3fa(q);
return D * R * S;
}
__forceinline AffineSpace3fa quaternionDecompositionToAffineSpace(const AffineSpace3ff& qd)
{
// compute affine transform from quaternion decomposition
Quaternion3f q(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
AffineSpace3fa M = qd;
AffineSpace3fa D(one);
D.p.x = M.l.vx.y;
D.p.y = M.l.vx.z;
D.p.z = M.l.vy.z;
M.l.vx.y = 0;
M.l.vx.z = 0;
M.l.vy.z = 0;
AffineSpace3fa R = LinearSpace3fa(q);
return D * R * M;
}
__forceinline void quaternionDecomposition(const AffineSpace3ff& qd, Vec3fa& T, Quaternion3f& q, AffineSpace3fa& S)
{
q = Quaternion3f(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
S = qd;
T.x = qd.l.vx.y;
T.y = qd.l.vx.z;
T.z = qd.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
}
__forceinline AffineSpace3fx quaternionDecomposition(Vec3fa const& T, Quaternion3f const& q, AffineSpace3fa const& S)
{
AffineSpace3ff M = S;
M.l.vx.w = q.i;
M.l.vy.w = q.j;
M.l.vz.w = q.k;
M.p.w = q.r;
M.l.vx.y = T.x;
M.l.vx.z = T.y;
M.l.vy.z = T.z;
return M;
}
struct __aligned(16) QuaternionDecomposition
{
float scale_x = 1.f;
float scale_y = 1.f;
float scale_z = 1.f;
float skew_xy = 0.f;
float skew_xz = 0.f;
float skew_yz = 0.f;
float shift_x = 0.f;
float shift_y = 0.f;
float shift_z = 0.f;
float quaternion_r = 1.f;
float quaternion_i = 0.f;
float quaternion_j = 0.f;
float quaternion_k = 0.f;
float translation_x = 0.f;
float translation_y = 0.f;
float translation_z = 0.f;
};
__forceinline QuaternionDecomposition quaternionDecomposition(AffineSpace3ff const& M)
{
QuaternionDecomposition qd;
qd.scale_x = M.l.vx.x;
qd.scale_y = M.l.vy.y;
qd.scale_z = M.l.vz.z;
qd.shift_x = M.p.x;
qd.shift_y = M.p.y;
qd.shift_z = M.p.z;
qd.translation_x = M.l.vx.y;
qd.translation_y = M.l.vx.z;
qd.translation_z = M.l.vy.z;
qd.skew_xy = M.l.vy.x;
qd.skew_xz = M.l.vz.x;
qd.skew_yz = M.l.vz.y;
qd.quaternion_r = M.p.w;
qd.quaternion_i = M.l.vx.w;
qd.quaternion_j = M.l.vy.w;
qd.quaternion_k = M.l.vz.w;
return qd;
}
////////////////////////////////////////////////////////////////////////////////
/*
* ! Template Specialization for 2D: return matrix for rotation around point
* (rotation around arbitrarty vector is not meaningful in 2D)
*/
template<> __forceinline
AffineSpace2f AffineSpace2f::rotate(const Vec2f& p, const float& r) {
return translate(+p)*AffineSpace2f(LinearSpace2f::rotate(r))*translate(-p);
}
////////////////////////////////////////////////////////////////////////////////
// Similarity Transform
//
// checks, if M is a similarity transformation, i.e if there exists a factor D
// such that for all x,y: distance(Mx, My) = D * distance(x, y)
////////////////////////////////////////////////////////////////////////////////
__forceinline bool similarityTransform(const AffineSpace3fa& M, float* D)
{
if (D) *D = 0.f;
if (abs(dot(M.l.vx, M.l.vy)) > 1e-5f) return false;
if (abs(dot(M.l.vx, M.l.vz)) > 1e-5f) return false;
if (abs(dot(M.l.vy, M.l.vz)) > 1e-5f) return false;
const float D_x = dot(M.l.vx, M.l.vx);
const float D_y = dot(M.l.vy, M.l.vy);
const float D_z = dot(M.l.vz, M.l.vz);
if (abs(D_x - D_y) > 1e-5f ||
abs(D_x - D_z) > 1e-5f ||
abs(D_y - D_z) > 1e-5f)
return false;
if (D) *D = sqrtf(D_x);
return true;
}
__forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
{
Vec3fa::storeu(&ptr->l.vx, source.l.vx);
Vec3fa::storeu(&ptr->l.vy, source.l.vy);
Vec3fa::storeu(&ptr->l.vz, source.l.vz);
Vec3fa::storeu(&ptr->p, source.p);
}
__forceinline AffineSpace3fa AffineSpace3fa_load_unaligned(AffineSpace3fa* ptr)
{
AffineSpace3fa space;
space.l.vx = Vec3fa::loadu(&ptr->l.vx);
space.l.vy = Vec3fa::loadu(&ptr->l.vy);
space.l.vz = Vec3fa::loadu(&ptr->l.vz);
space.p = Vec3fa::loadu(&ptr->p);
return space;
}
#undef VectorT
#undef ScalarT
}

336
thirdparty/embree/common/math/bbox.h vendored Normal file
View File

@@ -0,0 +1,336 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
#include "vec3.h"
namespace embree
{
namespace internal {
template <typename T> __forceinline T divideByTwo(const T& v) { return v / T(2); }
template <> __forceinline float divideByTwo<float>(const float& v) { return v * 0.5f; }
template <> __forceinline double divideByTwo<double>(const double& v) { return v * 0.5; }
} // namespace internal
template<typename T>
struct BBox
{
T lower, upper;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline BBox ( ) { }
template<typename T1>
__forceinline BBox ( const BBox<T1>& other ) : lower(other.lower), upper(other.upper) {}
__forceinline BBox& operator=( const BBox& other ) { lower = other.lower; upper = other.upper; return *this; }
__forceinline BBox ( const T& v ) : lower(v), upper(v) {}
__forceinline BBox ( const T& lower, const T& upper ) : lower(lower), upper(upper) {}
////////////////////////////////////////////////////////////////////////////////
/// Extending Bounds
////////////////////////////////////////////////////////////////////////////////
__forceinline const BBox& extend(const BBox& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
__forceinline const BBox& extend(const T & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
/*! tests if box is empty */
__forceinline bool empty() const { for (int i=0; i<T::N; i++) if (lower[i] > upper[i]) return true; return false; }
/*! computes the size of the box */
__forceinline T size() const { return upper - lower; }
/*! computes the center of the box */
__forceinline T center() const { return internal::divideByTwo<T>(lower+upper); }
/*! computes twice the center of the box */
__forceinline T center2() const { return lower+upper; }
/*! merges two boxes */
__forceinline static const BBox merge (const BBox& a, const BBox& b) {
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
}
/*! intersects two boxes */
__forceinline static const BBox intersect (const BBox& a, const BBox& b) {
return BBox(max(a.lower, b.lower), min(a.upper, b.upper));
}
/*! enlarge box by some scaling factor */
__forceinline BBox enlarge_by(const float a) const {
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline BBox( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( FullTy ) : lower(neg_inf), upper(pos_inf) {}
__forceinline BBox( FalseTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( TrueTy ) : lower(neg_inf), upper(pos_inf) {}
__forceinline BBox( NegInfTy ): lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( PosInfTy ): lower(neg_inf), upper(pos_inf) {}
};
template<> __forceinline bool BBox<float>::empty() const {
return lower > upper;
}
#if defined(__SSE__) || defined(__ARM_NEON)
template<> __forceinline bool BBox<Vec3fa>::empty() const {
return !all(le_mask(lower,upper));
}
template<> __forceinline bool BBox<Vec3fx>::empty() const {
return !all(le_mask(lower,upper));
}
#endif
/*! tests if box is finite */
__forceinline bool isvalid( const BBox<Vec3fa>& v ) {
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)));
}
/*! tests if box is finite and non-empty*/
__forceinline bool isvalid_non_empty( const BBox<Vec3fa>& v ) {
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)) & le_mask(v.lower,v.upper));
}
/*! tests if box has finite entries */
__forceinline bool is_finite( const BBox<Vec3fa>& b) {
return is_finite(b.lower) && is_finite(b.upper);
}
/*! test if point contained in box */
__forceinline bool inside ( const BBox<Vec3fa>& b, const Vec3fa& p ) { return all(ge_mask(p,b.lower) & le_mask(p,b.upper)); }
/*! computes the center of the box */
template<typename T> __forceinline const T center2(const BBox<T>& box) { return box.lower + box.upper; }
template<typename T> __forceinline const T center (const BBox<T>& box) { return internal::divideByTwo<T>(center2(box)); }
/*! computes the volume of a bounding box */
__forceinline float volume ( const BBox<Vec3fa>& b ) { return reduce_mul(b.size()); }
__forceinline float safeVolume( const BBox<Vec3fa>& b ) { if (b.empty()) return 0.0f; else return volume(b); }
/*! computes the volume of a bounding box */
__forceinline float volume( const BBox<Vec3f>& b ) { return reduce_mul(b.size()); }
/*! computes the surface area of a bounding box */
template<typename T> __forceinline const T area( const BBox<Vec2<T> >& b ) { const Vec2<T> d = b.size(); return d.x*d.y; }
template<typename T> __forceinline const T halfArea( const BBox<Vec3<T> >& b ) { return halfArea(b.size()); }
template<typename T> __forceinline const T area( const BBox<Vec3<T> >& b ) { return T(2)*halfArea(b); }
__forceinline float halfArea( const BBox<Vec3fa>& b ) { return halfArea(b.size()); }
__forceinline float area( const BBox<Vec3fa>& b ) { return 2.0f*halfArea(b); }
__forceinline float halfArea( const BBox<Vec3fx>& b ) { return halfArea(b.size()); }
__forceinline float area( const BBox<Vec3fx>& b ) { return 2.0f*halfArea(b); }
template<typename Vec> __forceinline float safeArea( const BBox<Vec>& b ) { if (b.empty()) return 0.0f; else return area(b); }
template<typename T> __forceinline float expectedApproxHalfArea(const BBox<T>& box) {
return halfArea(box);
}
/*! merges bounding boxes and points */
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const T& b ) { return BBox<T>(min(a.lower, b ), max(a.upper, b )); }
template<typename T> __forceinline const BBox<T> merge( const T& a, const BBox<T>& b ) { return BBox<T>(min(a , b.lower), max(a , b.upper)); }
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(min(a.lower, b.lower), max(a.upper, b.upper)); }
/*! Merges three boxes. */
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return merge(a,merge(b,c)); }
/*! Merges four boxes. */
template<typename T> __forceinline BBox<T> merge(const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d) {
return merge(merge(a,b),merge(c,d));
}
/*! Comparison Operators */
template<typename T> __forceinline bool operator==( const BBox<T>& a, const BBox<T>& b ) { return a.lower == b.lower && a.upper == b.upper; }
template<typename T> __forceinline bool operator!=( const BBox<T>& a, const BBox<T>& b ) { return a.lower != b.lower || a.upper != b.upper; }
/*! scaling */
template<typename T> __forceinline BBox<T> operator *( const float& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
template<typename T> __forceinline BBox<T> operator *( const T& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
/*! translations */
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower+b.lower,a.upper+b.upper); }
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower-b.lower,a.upper-b.upper); }
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower+b ,a.upper+b ); }
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower-b ,a.upper-b ); }
/*! extension */
template<typename T> __forceinline BBox<T> enlarge(const BBox<T>& a, const T& b) { return BBox<T>(a.lower-b, a.upper+b); }
/*! intersect bounding boxes */
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(max(a.lower, b.lower), min(a.upper, b.upper)); }
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return intersect(a,intersect(b,c)); }
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d ) { return intersect(intersect(a,b),intersect(c,d)); }
/*! subtract bounds from each other */
template<typename T> __forceinline void subtract(const BBox<T>& a, const BBox<T>& b, BBox<T>& c, BBox<T>& d)
{
c.lower = a.lower;
c.upper = min(a.upper,b.lower);
d.lower = max(a.lower,b.upper);
d.upper = a.upper;
}
/*! tests if bounding boxes (and points) are disjoint (empty intersection) */
template<typename T> __inline bool disjoint( const BBox<T>& a, const BBox<T>& b ) { return intersect(a,b).empty(); }
template<typename T> __inline bool disjoint( const BBox<T>& a, const T& b ) { return disjoint(a,BBox<T>(b)); }
template<typename T> __inline bool disjoint( const T& a, const BBox<T>& b ) { return disjoint(BBox<T>(a),b); }
/*! tests if bounding boxes (and points) are conjoint (non-empty intersection) */
template<typename T> __inline bool conjoint( const BBox<T>& a, const BBox<T>& b ) { return !intersect(a,b).empty(); }
template<typename T> __inline bool conjoint( const BBox<T>& a, const T& b ) { return conjoint(a,BBox<T>(b)); }
template<typename T> __inline bool conjoint( const T& a, const BBox<T>& b ) { return conjoint(BBox<T>(a),b); }
/*! subset relation */
template<typename T> __inline bool subset( const BBox<T>& a, const BBox<T>& b )
{
for ( size_t i = 0; i < T::N; i++ ) if ( a.lower[i] < b.lower[i] ) return false;
for ( size_t i = 0; i < T::N; i++ ) if ( a.upper[i] > b.upper[i] ) return false;
return true;
}
template<> __inline bool subset( const BBox<Vec3fa>& a, const BBox<Vec3fa>& b ) {
return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
}
template<> __inline bool subset( const BBox<Vec3fx>& a, const BBox<Vec3fx>& b ) {
return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
}
/*! blending */
template<typename T>
__forceinline BBox<T> lerp(const BBox<T>& b0, const BBox<T>& b1, const float t) {
return BBox<T>(lerp(b0.lower,b1.lower,t),lerp(b0.upper,b1.upper,t));
}
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const BBox<T>& box) {
return cout << "[" << box.lower << "; " << box.upper << "]";
}
/*! default template instantiations */
typedef BBox<float> BBox1f;
typedef BBox<Vec2f> BBox2f;
typedef BBox<Vec2fa> BBox2fa;
typedef BBox<Vec3f> BBox3f;
typedef BBox<Vec3fa> BBox3fa;
typedef BBox<Vec3fx> BBox3fx;
typedef BBox<Vec3ff> BBox3ff;
}
////////////////////////////////////////////////////////////////////////////////
/// SSE / AVX / MIC specializations
////////////////////////////////////////////////////////////////////////////////
#if defined (__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined (__AVX__)
#include "../simd/avx.h"
#endif
#if defined(__AVX512F__)
#include "../simd/avx512.h"
#endif
namespace embree
{
template<int N>
__forceinline BBox<Vec3<vfloat<N>>> transpose(const BBox3fa* bounds);
template<>
__forceinline BBox<Vec3<vfloat4>> transpose<4>(const BBox3fa* bounds)
{
BBox<Vec3<vfloat4>> dest;
transpose((vfloat4&)bounds[0].lower,
(vfloat4&)bounds[1].lower,
(vfloat4&)bounds[2].lower,
(vfloat4&)bounds[3].lower,
dest.lower.x,
dest.lower.y,
dest.lower.z);
transpose((vfloat4&)bounds[0].upper,
(vfloat4&)bounds[1].upper,
(vfloat4&)bounds[2].upper,
(vfloat4&)bounds[3].upper,
dest.upper.x,
dest.upper.y,
dest.upper.z);
return dest;
}
#if defined(__AVX__)
template<>
__forceinline BBox<Vec3<vfloat8>> transpose<8>(const BBox3fa* bounds)
{
BBox<Vec3<vfloat8>> dest;
transpose((vfloat4&)bounds[0].lower,
(vfloat4&)bounds[1].lower,
(vfloat4&)bounds[2].lower,
(vfloat4&)bounds[3].lower,
(vfloat4&)bounds[4].lower,
(vfloat4&)bounds[5].lower,
(vfloat4&)bounds[6].lower,
(vfloat4&)bounds[7].lower,
dest.lower.x,
dest.lower.y,
dest.lower.z);
transpose((vfloat4&)bounds[0].upper,
(vfloat4&)bounds[1].upper,
(vfloat4&)bounds[2].upper,
(vfloat4&)bounds[3].upper,
(vfloat4&)bounds[4].upper,
(vfloat4&)bounds[5].upper,
(vfloat4&)bounds[6].upper,
(vfloat4&)bounds[7].upper,
dest.upper.x,
dest.upper.y,
dest.upper.z);
return dest;
}
#endif
template<int N>
__forceinline BBox3fa merge(const BBox3fa* bounds);
template<>
__forceinline BBox3fa merge<4>(const BBox3fa* bounds)
{
const Vec3fa lower = min(min(bounds[0].lower,bounds[1].lower),
min(bounds[2].lower,bounds[3].lower));
const Vec3fa upper = max(max(bounds[0].upper,bounds[1].upper),
max(bounds[2].upper,bounds[3].upper));
return BBox3fa(lower,upper);
}
#if defined(__AVX__)
template<>
__forceinline BBox3fa merge<8>(const BBox3fa* bounds)
{
const Vec3fa lower = min(min(min(bounds[0].lower,bounds[1].lower),min(bounds[2].lower,bounds[3].lower)),
min(min(bounds[4].lower,bounds[5].lower),min(bounds[6].lower,bounds[7].lower)));
const Vec3fa upper = max(max(max(bounds[0].upper,bounds[1].upper),max(bounds[2].upper,bounds[3].upper)),
max(max(bounds[4].upper,bounds[5].upper),max(bounds[6].upper,bounds[7].upper)));
return BBox3fa(lower,upper);
}
#endif
}

47
thirdparty/embree/common/math/col3.h vendored Normal file
View File

@@ -0,0 +1,47 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// RGB Color Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Col3
{
T r, g, b;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Col3 ( ) { }
__forceinline Col3 ( const Col3& other ) { r = other.r; g = other.g; b = other.b; }
__forceinline Col3& operator=( const Col3& other ) { r = other.r; g = other.g; b = other.b; return *this; }
__forceinline explicit Col3 (const T& v) : r(v), g(v), b(v) {}
__forceinline Col3 (const T& r, const T& g, const T& b) : r(r), g(g), b(b) {}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Col3 (ZeroTy) : r(zero) , g(zero) , b(zero) {}
__forceinline Col3 (OneTy) : r(one) , g(one) , b(one) {}
__forceinline Col3 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf) {}
__forceinline Col3 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf) {}
};
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col3<T>& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
/*! default template instantiations */
typedef Col3<unsigned char> Col3uc;
typedef Col3<float > Col3f;
}

47
thirdparty/embree/common/math/col4.h vendored Normal file
View File

@@ -0,0 +1,47 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// RGBA Color Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Col4
{
T r, g, b, a;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Col4 ( ) { }
__forceinline Col4 ( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; }
__forceinline Col4& operator=( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; return *this; }
__forceinline explicit Col4 (const T& v) : r(v), g(v), b(v), a(v) {}
__forceinline Col4 (const T& r, const T& g, const T& b, const T& a) : r(r), g(g), b(b), a(a) {}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Col4 (ZeroTy) : r(zero) , g(zero) , b(zero) , a(zero) {}
__forceinline Col4 (OneTy) : r(one) , g(one) , b(one) , a(one) {}
__forceinline Col4 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf), a(pos_inf) {}
__forceinline Col4 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf), a(neg_inf) {}
};
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col4<T>& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ", " << a.a << ")";
}
/*! default template instantiations */
typedef Col4<unsigned char> Col4uc;
typedef Col4<float > Col4f;
}

268
thirdparty/embree/common/math/color.h vendored Normal file
View File

@@ -0,0 +1,268 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "color_sycl.h"
#else
#include "constants.h"
#include "col3.h"
#include "col4.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE RGBA Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color4
{
union {
__m128 m128;
struct { float r,g,b,a; };
};
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4 () {}
__forceinline Color4 ( const __m128 a ) : m128(a) {}
__forceinline explicit Color4 (const float v) : m128(_mm_set1_ps(v)) {}
__forceinline Color4 (const float r, const float g, const float b, const float a) : m128(_mm_set_ps(a,b,g,r)) {}
__forceinline explicit Color4 ( const Col3uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(255.0f,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
__forceinline explicit Color4 ( const Col3f& other ) { m128 = _mm_set_ps(1.0f,other.b,other.g,other.r); }
__forceinline explicit Color4 ( const Col4uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(other.a,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
__forceinline explicit Color4 ( const Col4f& other ) { m128 = _mm_set_ps(other.a,other.b,other.g,other.r); }
__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
__forceinline void set(Col3uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
}
__forceinline void set(Col4uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
d.a = (unsigned char)(s[3]);
}
__forceinline void set(float &f) const
{
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
__forceinline Color4( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// SSE RGB Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color
{
union {
__m128 m128;
struct { float r,g,b; };
};
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color () {}
__forceinline Color ( const __m128 a ) : m128(a) {}
__forceinline explicit Color (const float v) : m128(_mm_set1_ps(v)) {}
__forceinline Color (const float r, const float g, const float b) : m128(_mm_set_ps(0.0f,b,g,r)) {}
__forceinline Color ( const Color& other ) : m128(other.m128) {}
__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
__forceinline Color ( const Color4& other ) : m128(other.m128) {}
__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
__forceinline void set(Col3uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
}
__forceinline void set(Col4uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
d.a = 255;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
__forceinline Color( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a ) { return a; }
__forceinline const Color operator -( const Color& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
}
__forceinline const Color abs ( const Color& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
}
__forceinline const Color rcp ( const Color& a )
{
#if defined(__aarch64__)
__m128 reciprocal = _mm_rcp_ps(a.m128);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
return (const Color)reciprocal;
#else
#if defined(__AVX512VL__)
const Color r = _mm_rcp14_ps(a.m128);
#else
const Color r = _mm_rcp_ps(a.m128);
#endif
return _mm_add_ps(r,_mm_mul_ps(r, _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(a, r)))); // computes r + r * (1 - a * r)
#endif //defined(__aarch64__)
}
__forceinline const Color rsqrt( const Color& a )
{
#if defined(__aarch64__)
__m128 r = _mm_rsqrt_ps(a.m128);
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
return r;
#else
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
#endif //defined(__aarch64__)
}
__forceinline const Color sqrt ( const Color& a ) { return _mm_sqrt_ps(a.m128); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a, const Color& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline const Color operator -( const Color& a, const Color& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline const Color operator *( const Color& a, const Color& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
__forceinline const Color min( const Color& a, const Color& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline const Color max( const Color& a, const Color& b ) { return _mm_max_ps(a.m128,b.m128); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
__forceinline bool operator !=( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
__forceinline bool operator < ( const Color& a, const Color& b ) {
if (a.r != b.r) return a.r < b.r;
if (a.g != b.g) return a.g < b.g;
if (a.b != b.b) return a.b < b.b;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f, t, mask);
}
////////////////////////////////////////////////////////////////////////////////
/// Special Operators
////////////////////////////////////////////////////////////////////////////////
/*! computes luminance of a color */
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
/*! output operator */
__forceinline embree_ostream operator<<(embree_ostream cout, const Color& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
}
#endif

View File

@@ -0,0 +1,219 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "constants.h"
#include "col3.h"
#include "col4.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE RGBA Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color4
{
struct { float r,g,b,a; };
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4 () {}
//__forceinline Color4 ( const __m128 a ) : m128(a) {}
__forceinline explicit Color4 (const float v) : r(v), g(v), b(v), a(v) {}
__forceinline Color4 (const float r, const float g, const float b, const float a) : r(r), g(g), b(b), a(a) {}
__forceinline explicit Color4 ( const Col3uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(1.0f) {}
__forceinline explicit Color4 ( const Col3f& other ) : r(other.r), g(other.g), b(other.b), a(1.0f) {}
__forceinline explicit Color4 ( const Col4uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(other.a/255.0f) {}
__forceinline explicit Color4 ( const Col4f& other ) : r(other.r), g(other.g), b(other.b), a(other.a) {}
//__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
//__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
__forceinline void set(Col3uc& d) const
{
d.r = (unsigned char)(clamp(r)*255.0f);
d.g = (unsigned char)(clamp(g)*255.0f);
d.b = (unsigned char)(clamp(b)*255.0f);
}
__forceinline void set(Col4uc& d) const
{
d.r = (unsigned char)(clamp(r)*255.0f);
d.g = (unsigned char)(clamp(g)*255.0f);
d.b = (unsigned char)(clamp(b)*255.0f);
d.a = (unsigned char)(clamp(a)*255.0f);
}
__forceinline void set(float &f) const
{
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f), a(0.0f) {}
__forceinline Color4( OneTy ) : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
//__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
//__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// SSE RGB Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color
{
struct { float r,g,b; };
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color () {}
//__forceinline Color ( const __m128 a ) : m128(a) {}
__forceinline explicit Color (const float v) : r(v), g(v), b(v) {}
__forceinline Color (const float r, const float g, const float b) : r(r), g(g), b(b) {}
//__forceinline Color ( const Color& other ) : m128(other.m128) {}
//__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
//__forceinline Color ( const Color4& other ) : m128(other.m128) {}
//__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
#if 0
__forceinline void set(Col3uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
}
__forceinline void set(Col4uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
d.a = 255;
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f) {}
__forceinline Color( OneTy ) : r(1.0f), g(1.0f), b(1.0f) {}
//__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
//__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a ) { return a; }
__forceinline const Color operator -( const Color& a ) { return Color(-a.r, -a.g, -a.b); }
__forceinline const Color abs ( const Color& a ) { return Color(abs(a.r), abs(a.g), abs(a.b)); }
__forceinline const Color rcp ( const Color& a ) { return Color(1.0f/a.r, 1.0f/a.g, 1.0f/a.b); }
__forceinline const Color rsqrt( const Color& a ) { return Color(1.0f/sqrt(a.r), 1.0f/sqrt(a.g), 1.0f/sqrt(a.b)); }
__forceinline const Color sqrt ( const Color& a ) { return Color(sqrt(a.r), sqrt(a.g), sqrt(a.b)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a, const Color& b ) { return Color(a.r+b.r, a.g+b.g, a.b+b.b); }
__forceinline const Color operator -( const Color& a, const Color& b ) { return Color(a.r-b.r, a.g-b.g, a.b-b.b); }
__forceinline const Color operator *( const Color& a, const Color& b ) { return Color(a.r*b.r, a.g*b.g, a.b*b.b); }
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
__forceinline const Color min( const Color& a, const Color& b ) { return Color(min(a.r,b.r), min(a.g,b.g), min(a.b,b.b)); }
__forceinline const Color max( const Color& a, const Color& b ) { return Color(max(a.r,b.r), max(a.g,b.g), max(a.b,b.b)); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Color& a, const Color& b ) { return a.r == b.r && a.g == b.g && a.b == b.b; }
__forceinline bool operator !=( const Color& a, const Color& b ) { return a.r != b.r || a.g != b.g || a.b != b.b; }
__forceinline bool operator < ( const Color& a, const Color& b ) {
if (a.r != b.r) return a.r < b.r;
if (a.g != b.g) return a.g < b.g;
if (a.b != b.b) return a.b < b.b;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
return s ? t : f;
}
////////////////////////////////////////////////////////////////////////////////
/// Special Operators
////////////////////////////////////////////////////////////////////////////////
/*! computes luminance of a color */
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
/*! output operator */
inline std::ostream& operator<<(std::ostream& cout, const Color& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
}

View File

@@ -0,0 +1,8 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "constants.h"
namespace embree
{
}

View File

@@ -0,0 +1,209 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include <limits>
#define _USE_MATH_DEFINES
#include <math.h> // using cmath causes issues under Windows
#include <cfloat>
#include <climits>
namespace embree
{
static MAYBE_UNUSED const float one_over_255 = 1.0f/255.0f;
static MAYBE_UNUSED const float min_rcp_input = 1E-18f; // for abs(x) >= min_rcp_input the newton raphson rcp calculation does not fail
/* we consider floating point numbers in that range as valid input numbers */
static MAYBE_UNUSED float FLT_LARGE = 1.844E18f;
struct TrueTy {
__forceinline operator bool( ) const { return true; }
};
const constexpr TrueTy True = TrueTy();
struct FalseTy {
__forceinline operator bool( ) const { return false; }
};
const constexpr FalseTy False = FalseTy();
struct ZeroTy
{
__forceinline operator double ( ) const { return 0; }
__forceinline operator float ( ) const { return 0; }
__forceinline operator long long( ) const { return 0; }
__forceinline operator unsigned long long( ) const { return 0; }
__forceinline operator long ( ) const { return 0; }
__forceinline operator unsigned long ( ) const { return 0; }
__forceinline operator int ( ) const { return 0; }
__forceinline operator unsigned int ( ) const { return 0; }
__forceinline operator short ( ) const { return 0; }
__forceinline operator unsigned short ( ) const { return 0; }
__forceinline operator char ( ) const { return 0; }
__forceinline operator unsigned char ( ) const { return 0; }
};
const constexpr ZeroTy zero = ZeroTy();
struct OneTy
{
__forceinline operator double ( ) const { return 1; }
__forceinline operator float ( ) const { return 1; }
__forceinline operator long long( ) const { return 1; }
__forceinline operator unsigned long long( ) const { return 1; }
__forceinline operator long ( ) const { return 1; }
__forceinline operator unsigned long ( ) const { return 1; }
__forceinline operator int ( ) const { return 1; }
__forceinline operator unsigned int ( ) const { return 1; }
__forceinline operator short ( ) const { return 1; }
__forceinline operator unsigned short ( ) const { return 1; }
__forceinline operator char ( ) const { return 1; }
__forceinline operator unsigned char ( ) const { return 1; }
};
const constexpr OneTy one = OneTy();
struct NegInfTy
{
__forceinline operator double ( ) const { return -std::numeric_limits<double>::infinity(); }
__forceinline operator float ( ) const { return -std::numeric_limits<float>::infinity(); }
__forceinline operator long long( ) const { return std::numeric_limits<long long>::min(); }
__forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::min(); }
__forceinline operator long ( ) const { return std::numeric_limits<long>::min(); }
__forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::min(); }
__forceinline operator int ( ) const { return std::numeric_limits<int>::min(); }
__forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::min(); }
__forceinline operator short ( ) const { return std::numeric_limits<short>::min(); }
__forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::min(); }
__forceinline operator char ( ) const { return std::numeric_limits<char>::min(); }
__forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::min(); }
};
const constexpr NegInfTy neg_inf = NegInfTy();
struct PosInfTy
{
__forceinline operator double ( ) const { return std::numeric_limits<double>::infinity(); }
__forceinline operator float ( ) const { return std::numeric_limits<float>::infinity(); }
__forceinline operator long long( ) const { return std::numeric_limits<long long>::max(); }
__forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::max(); }
__forceinline operator long ( ) const { return std::numeric_limits<long>::max(); }
__forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::max(); }
__forceinline operator int ( ) const { return std::numeric_limits<int>::max(); }
__forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::max(); }
__forceinline operator short ( ) const { return std::numeric_limits<short>::max(); }
__forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::max(); }
__forceinline operator char ( ) const { return std::numeric_limits<char>::max(); }
__forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::max(); }
};
const constexpr PosInfTy inf = PosInfTy();
const constexpr PosInfTy pos_inf = PosInfTy();
struct NaNTy
{
__forceinline operator double( ) const { return std::numeric_limits<double>::quiet_NaN(); }
__forceinline operator float ( ) const { return std::numeric_limits<float>::quiet_NaN(); }
};
const constexpr NaNTy nan = NaNTy();
struct UlpTy
{
__forceinline operator double( ) const { return std::numeric_limits<double>::epsilon(); }
__forceinline operator float ( ) const { return std::numeric_limits<float>::epsilon(); }
};
const constexpr UlpTy ulp = UlpTy();
struct PiTy
{
__forceinline operator double( ) const { return double(M_PI); }
__forceinline operator float ( ) const { return float(M_PI); }
};
const constexpr PiTy pi = PiTy();
struct OneOverPiTy
{
__forceinline operator double( ) const { return double(M_1_PI); }
__forceinline operator float ( ) const { return float(M_1_PI); }
};
const constexpr OneOverPiTy one_over_pi = OneOverPiTy();
struct TwoPiTy
{
__forceinline operator double( ) const { return double(2.0*M_PI); }
__forceinline operator float ( ) const { return float(2.0*M_PI); }
};
const constexpr TwoPiTy two_pi = TwoPiTy();
struct OneOverTwoPiTy
{
__forceinline operator double( ) const { return double(0.5*M_1_PI); }
__forceinline operator float ( ) const { return float(0.5*M_1_PI); }
};
const constexpr OneOverTwoPiTy one_over_two_pi = OneOverTwoPiTy();
struct FourPiTy
{
__forceinline operator double( ) const { return double(4.0*M_PI); }
__forceinline operator float ( ) const { return float(4.0*M_PI); }
};
const constexpr FourPiTy four_pi = FourPiTy();
struct OneOverFourPiTy
{
__forceinline operator double( ) const { return double(0.25*M_1_PI); }
__forceinline operator float ( ) const { return float(0.25*M_1_PI); }
};
const constexpr OneOverFourPiTy one_over_four_pi = OneOverFourPiTy();
struct StepTy {
__forceinline operator double ( ) const { return 0; }
__forceinline operator float ( ) const { return 0; }
__forceinline operator long long( ) const { return 0; }
__forceinline operator unsigned long long( ) const { return 0; }
__forceinline operator long ( ) const { return 0; }
__forceinline operator unsigned long ( ) const { return 0; }
__forceinline operator int ( ) const { return 0; }
__forceinline operator unsigned int ( ) const { return 0; }
__forceinline operator short ( ) const { return 0; }
__forceinline operator unsigned short ( ) const { return 0; }
__forceinline operator char ( ) const { return 0; }
__forceinline operator unsigned char ( ) const { return 0; }
};
const constexpr StepTy step = StepTy();
struct ReverseStepTy {
};
const constexpr ReverseStepTy reverse_step = ReverseStepTy();
struct EmptyTy {
};
const constexpr EmptyTy empty = EmptyTy();
struct FullTy {
};
const constexpr FullTy full = FullTy();
struct UndefinedTy {
};
const constexpr UndefinedTy undefined = UndefinedTy();
}

468
thirdparty/embree/common/math/emath.h vendored Normal file
View File

@@ -0,0 +1,468 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../sys/intrinsics.h"
#include "constants.h"
#include <cmath>
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "math_sycl.h"
#else
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
#include <emmintrin.h>
#include <xmmintrin.h>
#include <immintrin.h>
#endif
#if defined(__WIN32__)
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
namespace std
{
__forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
__forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
__forceinline bool isfinite (const float x) { return _finite(x) != 0; }
}
#endif
#endif
namespace embree
{
__forceinline bool isvalid ( const float& v ) {
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
}
__forceinline int cast_f2i(float f) {
union { float f; int i; } v; v.f = f; return v.i;
}
__forceinline float cast_i2f(int i) {
union { float f; int i; } v; v.i = i; return v.f;
}
__forceinline int toInt (const float& a) { return int(a); }
__forceinline float toFloat(const int& a) { return float(a); }
__forceinline int asInt (const float& a) { return *((int*)&a); }
__forceinline float asFloat(const int& a) { return *((float*)&a); }
#if defined(__WIN32__)
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
#endif
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
__forceinline float sqr ( const float x ) { return x*x; }
__forceinline float rcp ( const float x )
{
#if defined(__aarch64__)
// Move scalar to vector register and do rcp.
__m128 a;
a[0] = x;
float32x4_t reciprocal = vrecpeq_f32(a);
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
return reciprocal[0];
#else
const __m128 a = _mm_set_ss(x);
#if defined(__AVX512VL__)
const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a);
#else
const __m128 r = _mm_rcp_ss(a);
#endif
#if defined(__AVX2__)
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
#else
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
#endif
#endif //defined(__aarch64__)
}
__forceinline float signmsk ( const float x ) {
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
__m128i b;
a[0] = x;
b[0] = 0x80000000;
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
return a[0];
#else
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
#endif
}
__forceinline float xorf( const float x, const float y ) {
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
__m128 b;
a[0] = x;
b[0] = y;
a = _mm_xor_ps(a, b);
return a[0];
#else
return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
#endif
}
__forceinline float andf( const float x, const unsigned y ) {
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
__m128i b;
a[0] = x;
b[0] = y;
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
return a[0];
#else
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
#endif
}
__forceinline float rsqrt( const float x )
{
#if defined(__aarch64__)
// FP and Neon shares same vector register in arm64
__m128 a;
a[0] = x;
__m128 value = _mm_rsqrt_ps(a);
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
return value[0];
#else
const __m128 a = _mm_set_ss(x);
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
#else
__m128 r = _mm_rsqrt_ss(a);
#endif
const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r),
_mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
return _mm_cvtss_f32(c);
#endif
}
#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
__forceinline int roundf(float f) { return (int)(f + 0.5f); }
#else
__forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); }
__forceinline double nextafter(double x, double y) { return ::nextafter(x, y); }
#endif
__forceinline float abs ( const float x ) { return ::fabsf(x); }
__forceinline float acos ( const float x ) { return ::acosf (x); }
__forceinline float asin ( const float x ) { return ::asinf (x); }
__forceinline float atan ( const float x ) { return ::atanf (x); }
__forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); }
__forceinline float cos ( const float x ) { return ::cosf (x); }
__forceinline float cosh ( const float x ) { return ::coshf (x); }
__forceinline float exp ( const float x ) { return ::expf (x); }
__forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); }
__forceinline float log ( const float x ) { return ::logf (x); }
__forceinline float log10( const float x ) { return ::log10f(x); }
__forceinline float pow ( const float x, const float y ) { return ::powf (x, y); }
__forceinline float sin ( const float x ) { return ::sinf (x); }
__forceinline float sinh ( const float x ) { return ::sinhf (x); }
__forceinline float sqrt ( const float x ) { return ::sqrtf (x); }
__forceinline float tan ( const float x ) { return ::tanf (x); }
__forceinline float tanh ( const float x ) { return ::tanhf (x); }
__forceinline float floor( const float x ) { return ::floorf (x); }
__forceinline float ceil ( const float x ) { return ::ceilf (x); }
__forceinline float frac ( const float x ) { return x-floor(x); }
__forceinline double abs ( const double x ) { return ::fabs(x); }
__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
__forceinline double acos ( const double x ) { return ::acos (x); }
__forceinline double asin ( const double x ) { return ::asin (x); }
__forceinline double atan ( const double x ) { return ::atan (x); }
__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
__forceinline double cos ( const double x ) { return ::cos (x); }
__forceinline double cosh ( const double x ) { return ::cosh (x); }
__forceinline double exp ( const double x ) { return ::exp (x); }
__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
__forceinline double log ( const double x ) { return ::log (x); }
__forceinline double log10( const double x ) { return ::log10(x); }
__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
__forceinline double rcp ( const double x ) { return 1.0/x; }
__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
__forceinline double sin ( const double x ) { return ::sin (x); }
__forceinline double sinh ( const double x ) { return ::sinh (x); }
__forceinline double sqr ( const double x ) { return x*x; }
__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
__forceinline double tan ( const double x ) { return ::tan (x); }
__forceinline double tanh ( const double x ) { return ::tanh (x); }
__forceinline double floor( const double x ) { return ::floor (x); }
__forceinline double ceil ( const double x ) { return ::ceil (x); }
#if defined(__aarch64__)
__forceinline float mini(float a, float b) {
// FP and Neon shares same vector register in arm64
__m128 x;
__m128 y;
x[0] = a;
y[0] = b;
x = _mm_min_ps(x, y);
return x[0];
}
#elif defined(__SSE4_1__)
__forceinline float mini(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_min_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
#if defined(__aarch64__)
__forceinline float maxi(float a, float b) {
// FP and Neon shares same vector register in arm64
__m128 x;
__m128 y;
x[0] = a;
y[0] = b;
x = _mm_max_ps(x, y);
return x[0];
}
#elif defined(__SSE4_1__)
__forceinline float maxi(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_max_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
template<typename T>
__forceinline T twice(const T& a) { return a+a; }
__forceinline int min(int a, int b) { return a<b ? a:b; }
__forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
__forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
__forceinline float min(float a, float b) { return a<b ? a:b; }
__forceinline double min(double a, double b) { return a<b ? a:b; }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
#endif
#if defined(__EMSCRIPTEN__)
__forceinline long min(long a, long b) { return a<b ? a:b; }
#endif
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
__forceinline int max(int a, int b) { return a<b ? b:a; }
__forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
__forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
__forceinline float max(float a, float b) { return a<b ? b:a; }
__forceinline double max(double a, double b) { return a<b ? b:a; }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
#endif
#if defined(__EMSCRIPTEN__)
__forceinline long max(long a, long b) { return a<b ? b:a; }
#endif
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
#if defined(__MACOSX__)
__forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
__forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
#endif
#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
__forceinline void sincosf(float x, float *sin, float *cos) {
__sincosf(x,sin,cos);
}
#endif
#if defined(__WIN32__) || defined(__FreeBSD__)
__forceinline void sincosf(float x, float *s, float *c) {
*s = sinf(x); *c = cosf(x);
}
#endif
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
#if defined(__AVX2__)
__forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
#elif defined (__aarch64__) && defined(__clang__)
#pragma clang fp contract(fast)
__forceinline float madd ( const float a, const float b, const float c) { return a*b + c; }
__forceinline float msub ( const float a, const float b, const float c) { return a*b - c; }
__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
#pragma clang fp contract(on)
#else
__forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
__forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
__forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
__forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
#endif
/*! random functions */
template<typename T> T random() { return T(0); }
#if defined(_WIN32)
template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
#else
template<> __forceinline int random() { return int(rand()); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
#endif
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
#if _WIN32
__forceinline double drand48() {
return double(rand())/double(RAND_MAX);
}
__forceinline void srand48(long seed) {
return srand(seed);
}
#endif
/*! selects */
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
__forceinline bool none(bool s) { return !s; }
__forceinline bool all (bool s) { return s; }
__forceinline bool any (bool s) { return s; }
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
__forceinline float lerp(const float v0, const float v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
template<typename T>
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
}
/*! exchange */
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
/* load/store */
template<typename Ty> struct mem;
template<> struct mem<float> {
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
};
/*! bit reverse operation */
template<class T>
__forceinline T bitReverse(const T& vin)
{
T v = vin;
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
v = ( v >> 16 ) | ( v << 16);
return v;
}
/*! bit interleave operation */
template<class T>
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
{
T x = xin, y = yin, z = zin;
x = (x | (x << 16)) & 0x030000FF;
x = (x | (x << 8)) & 0x0300F00F;
x = (x | (x << 4)) & 0x030C30C3;
x = (x | (x << 2)) & 0x09249249;
y = (y | (y << 16)) & 0x030000FF;
y = (y | (y << 8)) & 0x0300F00F;
y = (y | (y << 4)) & 0x030C30C3;
y = (y | (y << 2)) & 0x09249249;
z = (z | (z << 16)) & 0x030000FF;
z = (z | (z << 8)) & 0x0300F00F;
z = (z | (z << 4)) & 0x030C30C3;
z = (z | (z << 2)) & 0x09249249;
return x | (y << 1) | (z << 2);
}
#if defined(__AVX2__) && !defined(__aarch64__)
template<>
__forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
{
const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ );
const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */);
const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */);
return xx | yy | zz;
}
#endif
/*! bit interleave operation for 64bit data types*/
template<class T>
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
T x = xin & 0x1fffff;
T y = yin & 0x1fffff;
T z = zin & 0x1fffff;
x = (x | x << 32) & 0x1f00000000ffff;
x = (x | x << 16) & 0x1f0000ff0000ff;
x = (x | x << 8) & 0x100f00f00f00f00f;
x = (x | x << 4) & 0x10c30c30c30c30c3;
x = (x | x << 2) & 0x1249249249249249;
y = (y | y << 32) & 0x1f00000000ffff;
y = (y | y << 16) & 0x1f0000ff0000ff;
y = (y | y << 8) & 0x100f00f00f00f00f;
y = (y | y << 4) & 0x10c30c30c30c30c3;
y = (y | y << 2) & 0x1249249249249249;
z = (z | z << 32) & 0x1f00000000ffff;
z = (z | z << 16) & 0x1f0000ff0000ff;
z = (z | z << 8) & 0x100f00f00f00f00f;
z = (z | z << 4) & 0x10c30c30c30c30c3;
z = (z | z << 2) & 0x1249249249249249;
return x | (y << 1) | (z << 2);
}
}
#endif

161
thirdparty/embree/common/math/interval.h vendored Normal file
View File

@@ -0,0 +1,161 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
#include "vec3.h"
#include "bbox.h"
namespace embree
{
template<typename V>
struct Interval
{
V lower, upper;
__forceinline Interval() {}
__forceinline Interval ( const Interval& other ) { lower = other.lower; upper = other.upper; }
__forceinline Interval& operator=( const Interval& other ) { lower = other.lower; upper = other.upper; return *this; }
__forceinline Interval(const V& a) : lower(a), upper(a) {}
__forceinline Interval(const V& lower, const V& upper) : lower(lower), upper(upper) {}
__forceinline Interval(const BBox<V>& a) : lower(a.lower), upper(a.upper) {}
/*! tests if box is empty */
//__forceinline bool empty() const { return lower > upper; }
/*! computes the size of the interval */
__forceinline V size() const { return upper - lower; }
__forceinline V center() const { return 0.5f*(lower+upper); }
__forceinline const Interval& extend(const Interval& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
__forceinline const Interval& extend(const V & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
__forceinline friend Interval operator +( const Interval& a, const Interval& b ) {
return Interval(a.lower+b.lower,a.upper+b.upper);
}
__forceinline friend Interval operator -( const Interval& a, const Interval& b ) {
return Interval(a.lower-b.upper,a.upper-b.lower);
}
__forceinline friend Interval operator -( const Interval& a, const V& b ) {
return Interval(a.lower-b,a.upper-b);
}
__forceinline friend Interval operator *( const Interval& a, const Interval& b )
{
const V ll = a.lower*b.lower;
const V lu = a.lower*b.upper;
const V ul = a.upper*b.lower;
const V uu = a.upper*b.upper;
return Interval(min(ll,lu,ul,uu),max(ll,lu,ul,uu));
}
__forceinline friend Interval merge( const Interval& a, const Interval& b) {
return Interval(min(a.lower,b.lower),max(a.upper,b.upper));
}
__forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c) {
return merge(merge(a,b),c);
}
__forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c, const Interval& d) {
return merge(merge(a,b),merge(c,d));
}
/*! intersect bounding boxes */
__forceinline friend const Interval intersect( const Interval& a, const Interval& b ) { return Interval(max(a.lower, b.lower), min(a.upper, b.upper)); }
__forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c ) { return intersect(a,intersect(b,c)); }
__forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c, const Interval& d ) { return intersect(intersect(a,b),intersect(c,d)); }
friend embree_ostream operator<<(embree_ostream cout, const Interval& a) {
return cout << "[" << a.lower << ", " << a.upper << "]";
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Interval( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline Interval( FullTy ) : lower(neg_inf), upper(pos_inf) {}
};
__forceinline bool isEmpty(const Interval<float>& v) {
return v.lower > v.upper;
}
__forceinline vboolx isEmpty(const Interval<vfloatx>& v) {
return v.lower > v.upper;
}
/*! subset relation */
template<typename T> __forceinline bool subset( const Interval<T>& a, const Interval<T>& b ) {
return (a.lower > b.lower) && (a.upper < b.upper);
}
template<typename T> __forceinline bool subset( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
return subset(a.x,b.x) && subset(a.y,b.y);
}
template<typename T> __forceinline const Vec2<Interval<T>> intersect( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
return Vec2<Interval<T>>(intersect(a.x,b.x),intersect(a.y,b.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Interval<T> select ( bool s, const Interval<T>& t, const Interval<T>& f ) {
return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
}
template<typename T> __forceinline Interval<T> select ( const typename T::Bool& s, const Interval<T>& t, const Interval<T>& f ) {
return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
}
__forceinline int numRoots(const Interval<float>& p0, const Interval<float>& p1)
{
float eps = 1E-4f;
bool neg0 = p0.lower < eps; bool pos0 = p0.upper > -eps;
bool neg1 = p1.lower < eps; bool pos1 = p1.upper > -eps;
return (neg0 && pos1) || (pos0 && neg1) || (neg0 && pos0) || (neg1 && pos1);
}
typedef Interval<float> Interval1f;
typedef Vec2<Interval<float>> Interval2f;
typedef Vec3<Interval<float>> Interval3f;
inline void swap(float& a, float& b) { float tmp = a; a = b; b = tmp; }
inline Interval1f shift(const Interval1f& v, float shift) { return Interval1f(v.lower + shift, v.upper + shift); }
#define TWO_PI (2.0*M_PI)
inline Interval1f sin(Interval1f interval)
{
if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
float sinLower = sin(interval.lower);
float sinUpper = sin(interval.upper);
if (sinLower > sinUpper) swap(sinLower, sinUpper);
if (interval.lower < M_PI / 2.0 && interval.upper > M_PI / 2.0) sinUpper = 1.0;
if (interval.lower < 3.0 * M_PI / 2.0 && interval.upper > 3.0 * M_PI / 2.0) sinLower = -1.0;
return Interval1f(sinLower, sinUpper);
}
inline Interval1f cos(Interval1f interval)
{
if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
float cosLower = cos(interval.lower);
float cosUpper = cos(interval.upper);
if (cosLower > cosUpper) swap(cosLower, cosUpper);
if (interval.lower < M_PI && interval.upper > M_PI) cosLower = -1.0;
return Interval1f(cosLower, cosUpper);
}
#undef TWO_PI
}

331
thirdparty/embree/common/math/lbbox.h vendored Normal file
View File

@@ -0,0 +1,331 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bbox.h"
#include "range.h"
namespace embree
{
template<typename T>
__forceinline std::pair<T,T> globalLinear(const std::pair<T,T>& v, const BBox1f& dt)
{
const float rcp_dt_size = float(1.0f)/dt.size();
const T g0 = lerp(v.first,v.second,-dt.lower*rcp_dt_size);
const T g1 = lerp(v.first,v.second,(1.0f-dt.lower)*rcp_dt_size);
return std::make_pair(g0,g1);
}
template<typename T>
struct LBBox
{
public:
__forceinline LBBox () {}
template<typename T1>
__forceinline LBBox ( const LBBox<T1>& other )
: bounds0(other.bounds0), bounds1(other.bounds1) {}
__forceinline LBBox& operator= ( const LBBox& other ) {
bounds0 = other.bounds0; bounds1 = other.bounds1; return *this;
}
__forceinline LBBox (EmptyTy)
: bounds0(EmptyTy()), bounds1(EmptyTy()) {}
__forceinline explicit LBBox ( const BBox<T>& bounds)
: bounds0(bounds), bounds1(bounds) { }
__forceinline LBBox ( const BBox<T>& bounds0, const BBox<T>& bounds1)
: bounds0(bounds0), bounds1(bounds1) { }
LBBox ( const avector<BBox<T>>& bounds )
{
assert(bounds.size());
BBox<T> b0 = bounds.front();
BBox<T> b1 = bounds.back();
for (size_t i=1; i<bounds.size()-1; i++) {
const float f = float(i)/float(bounds.size()-1);
const BBox<T> bt = lerp(b0,b1,f);
const T dlower = min(bounds[i].lower-bt.lower,T(zero));
const T dupper = max(bounds[i].upper-bt.upper,T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds of a primitive for the specified time range */
template<typename BoundsFunc>
__forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range, float numTimeSegments)
{
const float lower = time_range.lower*numTimeSegments;
const float upper = time_range.upper*numTimeSegments;
const float ilowerf = floor(lower);
const float iupperf = ceil(upper);
const int ilower = (int)ilowerf;
const int iupper = (int)iupperf;
const BBox<T> blower0 = bounds(ilower);
const BBox<T> bupper1 = bounds(iupper);
if (iupper-ilower == 1) {
bounds0 = lerp(blower0, bupper1, lower-ilowerf);
bounds1 = lerp(bupper1, blower0, iupperf-upper);
return;
}
const BBox<T> blower1 = bounds(ilower+1);
const BBox<T> bupper0 = bounds(iupper-1);
BBox<T> b0 = lerp(blower0, blower1, lower-ilowerf);
BBox<T> b1 = lerp(bupper1, bupper0, iupperf-upper);
for (int i = ilower+1; i < iupper; i++)
{
const float f = (float(i)/numTimeSegments - time_range.lower) / time_range.size();
const BBox<T> bt = lerp(b0, b1, f);
const BBox<T> bi = bounds(i);
const T dlower = min(bi.lower-bt.lower, T(zero));
const T dupper = max(bi.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds of a primitive for the specified time range */
template<typename BoundsFunc>
__forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range_in, const BBox1f& geom_time_range, float geom_time_segments)
{
/* normalize global time_range_in to local geom_time_range */
const BBox1f time_range((time_range_in.lower-geom_time_range.lower)/geom_time_range.size(),
(time_range_in.upper-geom_time_range.lower)/geom_time_range.size());
const float lower = time_range.lower*geom_time_segments;
const float upper = time_range.upper*geom_time_segments;
const float ilowerf = floor(lower);
const float iupperf = ceil(upper);
const float ilowerfc = max(0.0f,ilowerf);
const float iupperfc = min(iupperf,geom_time_segments);
const int ilowerc = (int)ilowerfc;
const int iupperc = (int)iupperfc;
assert(iupperc-ilowerc > 0);
/* this larger iteration range guarantees that we process borders of geom_time_range is (partially) inside time_range_in */
const int ilower_iter = max(-1,(int)ilowerf);
const int iupper_iter = min((int)iupperf,(int)geom_time_segments+1);
const BBox<T> blower0 = bounds(ilowerc);
const BBox<T> bupper1 = bounds(iupperc);
if (iupper_iter-ilower_iter == 1) {
bounds0 = lerp(blower0, bupper1, max(0.0f,lower-ilowerfc));
bounds1 = lerp(bupper1, blower0, max(0.0f,iupperfc-upper));
return;
}
const BBox<T> blower1 = bounds(ilowerc+1);
const BBox<T> bupper0 = bounds(iupperc-1);
BBox<T> b0 = lerp(blower0, blower1, max(0.0f,lower-ilowerfc));
BBox<T> b1 = lerp(bupper1, bupper0, max(0.0f,iupperfc-upper));
for (int i = ilower_iter+1; i < iupper_iter; i++)
{
const float f = (float(i)/geom_time_segments - time_range.lower) / time_range.size();
const BBox<T> bt = lerp(b0, b1, f);
const BBox<T> bi = bounds(i);
const T dlower = min(bi.lower-bt.lower, T(zero));
const T dupper = max(bi.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds of a primitive for the specified time range */
template<typename BoundsFunc>
__forceinline LBBox(const BoundsFunc& bounds, const range<int>& time_range, int numTimeSegments)
{
const int ilower = time_range.begin();
const int iupper = time_range.end();
BBox<T> b0 = bounds(ilower);
BBox<T> b1 = bounds(iupper);
if (iupper-ilower == 1)
{
bounds0 = b0;
bounds1 = b1;
return;
}
for (int i = ilower+1; i<iupper; i++)
{
const float f = float(i - time_range.begin()) / float(time_range.size());
const BBox<T> bt = lerp(b0, b1, f);
const BBox<T> bi = bounds(i);
const T dlower = min(bi.lower-bt.lower, T(zero));
const T dupper = max(bi.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
bounds0 = b0;
bounds1 = b1;
}
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
__forceinline LBBox(const BBox1f& time_range_in, const LBBox<T> lbounds, const BBox1f& target_time_range)
{
const BBox3f bounds0 = lbounds.bounds0;
const BBox3f bounds1 = lbounds.bounds1;
/* normalize global target_time_range to local time_range_in */
const BBox1f time_range((target_time_range.lower-time_range_in.lower)/time_range_in.size(),
(target_time_range.upper-time_range_in.lower)/time_range_in.size());
const BBox1f clipped_time_range(max(0.0f,time_range.lower), min(1.0f,time_range.upper));
/* compute bounds at begin and end of clipped time range */
BBox<T> b0 = lerp(bounds0,bounds1,clipped_time_range.lower);
BBox<T> b1 = lerp(bounds0,bounds1,clipped_time_range.upper);
/* make sure that b0 is properly bounded at time_range_in.lower */
{
const BBox<T> bt = lerp(b0, b1, (0.0f - time_range.lower) / time_range.size());
const T dlower = min(bounds0.lower-bt.lower, T(zero));
const T dupper = max(bounds0.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
/* make sure that b1 is properly bounded at time_range_in.upper */
{
const BBox<T> bt = lerp(b0, b1, (1.0f - time_range.lower) / time_range.size());
const T dlower = min(bounds1.lower-bt.lower, T(zero));
const T dupper = max(bounds1.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
this->bounds0 = b0;
this->bounds1 = b1;
}
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
__forceinline LBBox(const BBox1f& time_range_in, const BBox<T>& bounds0, const BBox<T>& bounds1, const BBox1f& target_time_range)
: LBBox(time_range_in,LBBox(bounds0,bounds1),target_time_range) {}
public:
__forceinline bool empty() const {
return bounds().empty();
}
__forceinline BBox<T> bounds () const {
return merge(bounds0,bounds1);
}
__forceinline BBox<T> interpolate( const float t ) const {
return lerp(bounds0,bounds1,t);
}
__forceinline LBBox<T> interpolate( const BBox1f& dt ) const {
return LBBox<T>(interpolate(dt.lower),interpolate(dt.upper));
}
__forceinline void extend( const LBBox& other ) {
bounds0.extend(other.bounds0);
bounds1.extend(other.bounds1);
}
__forceinline float expectedHalfArea() const;
__forceinline float expectedHalfArea(const BBox1f& dt) const {
return interpolate(dt).expectedHalfArea();
}
__forceinline float expectedApproxHalfArea() const {
return 0.5f*(halfArea(bounds0) + halfArea(bounds1));
}
/* calculates bounds for [0,1] time range from bounds in dt time range */
__forceinline LBBox global(const BBox1f& dt) const
{
const float rcp_dt_size = 1.0f/dt.size();
const BBox<T> b0 = interpolate(-dt.lower*rcp_dt_size);
const BBox<T> b1 = interpolate((1.0f-dt.lower)*rcp_dt_size);
return LBBox(b0,b1);
}
/*! Comparison Operators */
//template<typename TT> friend __forceinline bool operator==( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
//template<typename TT> friend __forceinline bool operator!=( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
friend __forceinline bool operator==( const LBBox& a, const LBBox& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
friend __forceinline bool operator!=( const LBBox& a, const LBBox& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
/*! output operator */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const LBBox& box) {
return cout << "LBBox { " << box.bounds0 << "; " << box.bounds1 << " }";
}
public:
BBox<T> bounds0, bounds1;
};
/*! tests if box is finite */
template<typename T>
__forceinline bool isvalid( const LBBox<T>& v ) {
return isvalid(v.bounds0) && isvalid(v.bounds1);
}
template<typename T>
__forceinline bool isvalid_non_empty( const LBBox<T>& v ) {
return isvalid_non_empty(v.bounds0) && isvalid_non_empty(v.bounds1);
}
template<typename T>
__forceinline T expectedArea(const T& a0, const T& a1, const T& b0, const T& b1)
{
const T da = a1-a0;
const T db = b1-b0;
return a0*b0+(a0*db+da*b0)*T(0.5f) + da*db*T(1.0f/3.0f);
}
template<> __forceinline float LBBox<Vec3fa>::expectedHalfArea() const
{
const Vec3fa d0 = bounds0.size();
const Vec3fa d1 = bounds1.size();
return reduce_add(expectedArea(Vec3fa(d0.x,d0.y,d0.z),
Vec3fa(d1.x,d1.y,d1.z),
Vec3fa(d0.y,d0.z,d0.x),
Vec3fa(d1.y,d1.z,d1.x)));
}
template<typename T>
__forceinline float expectedApproxHalfArea(const LBBox<T>& box) {
return box.expectedApproxHalfArea();
}
template<typename T>
__forceinline LBBox<T> merge(const LBBox<T>& a, const LBBox<T>& b) {
return LBBox<T>(merge(a.bounds0, b.bounds0), merge(a.bounds1, b.bounds1));
}
/*! subset relation */
template<typename T> __inline bool subset( const LBBox<T>& a, const LBBox<T>& b ) {
return subset(a.bounds0,b.bounds0) && subset(a.bounds1,b.bounds1);
}
/*! default template instantiations */
typedef LBBox<float> LBBox1f;
typedef LBBox<Vec2f> LBBox2f;
typedef LBBox<Vec3f> LBBox3f;
typedef LBBox<Vec3fa> LBBox3fa;
typedef LBBox<Vec3fx> LBBox3fx;
}

View File

@@ -0,0 +1,149 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// 2D Linear Transform (2x2 Matrix)
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct LinearSpace2
{
typedef T Vector;
typedef typename T::Scalar Scalar;
/*! default matrix constructor */
__forceinline LinearSpace2 ( ) {}
__forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; }
__forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; }
template<typename L1> __forceinline LinearSpace2( const LinearSpace2<L1>& s ) : vx(s.vx), vy(s.vy) {}
/*! matrix construction from column vectors */
__forceinline LinearSpace2(const Vector& vx, const Vector& vy)
: vx(vx), vy(vy) {}
/*! matrix construction from row mayor data */
__forceinline LinearSpace2(const Scalar& m00, const Scalar& m01,
const Scalar& m10, const Scalar& m11)
: vx(m00,m10), vy(m01,m11) {}
/*! compute the determinant of the matrix */
__forceinline const Scalar det() const { return vx.x*vy.y - vx.y*vy.x; }
/*! compute adjoint matrix */
__forceinline const LinearSpace2 adjoint() const { return LinearSpace2(vy.y,-vy.x,-vx.y,vx.x); }
/*! compute inverse matrix */
__forceinline const LinearSpace2 inverse() const { return adjoint()/det(); }
/*! compute transposed matrix */
__forceinline const LinearSpace2 transposed() const { return LinearSpace2(vx.x,vx.y,vy.x,vy.y); }
/*! returns first row of matrix */
__forceinline Vector row0() const { return Vector(vx.x,vy.x); }
/*! returns second row of matrix */
__forceinline Vector row1() const { return Vector(vx.y,vy.y); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline LinearSpace2( ZeroTy ) : vx(zero), vy(zero) {}
__forceinline LinearSpace2( OneTy ) : vx(one, zero), vy(zero, one) {}
/*! return matrix for scaling */
static __forceinline LinearSpace2 scale(const Vector& s) {
return LinearSpace2(s.x, 0,
0 , s.y);
}
/*! return matrix for rotation */
static __forceinline LinearSpace2 rotate(const Scalar& r) {
Scalar s = sin(r), c = cos(r);
return LinearSpace2(c, -s,
s, c);
}
/*! return closest orthogonal matrix (i.e. a general rotation including reflection) */
LinearSpace2 orthogonal() const
{
LinearSpace2 m = *this;
// mirrored?
Scalar mirror(one);
if (m.det() < Scalar(zero)) {
m.vx = -m.vx;
mirror = -mirror;
}
// rotation
for (int i = 0; i < 99; i++) {
const LinearSpace2 m_next = 0.5 * (m + m.transposed().inverse());
const LinearSpace2 d = m_next - m;
m = m_next;
// norm^2 of difference small enough?
if (max(dot(d.vx, d.vx), dot(d.vy, d.vy)) < 1e-8)
break;
}
// rotation * mirror_x
return LinearSpace2(mirror*m.vx, m.vy);
}
public:
/*! the column vectors of the matrix */
Vector vx,vy;
};
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a ) { return LinearSpace2<T>(-a.vx,-a.vy); }
template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a ) { return LinearSpace2<T>(+a.vx,+a.vy); }
template<typename T> __forceinline LinearSpace2<T> rcp ( const LinearSpace2<T>& a ) { return a.inverse(); }
////////////////////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx+b.vx,a.vy+b.vy); }
template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx-b.vx,a.vy-b.vy); }
template<typename T> __forceinline LinearSpace2<T> operator*(const typename T::Scalar & a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
template<typename T> __forceinline T operator*(const LinearSpace2<T>& a, const T & b) { return b.x*a.vx + b.y*a.vy; }
template<typename T> __forceinline LinearSpace2<T> operator*(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const typename T::Scalar & b) { return LinearSpace2<T>(a.vx/b, a.vy/b); }
template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return a * rcp(b); }
template<typename T> __forceinline LinearSpace2<T>& operator *=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a * b; }
template<typename T> __forceinline LinearSpace2<T>& operator /=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx == b.vx && a.vy == b.vy; }
template<typename T> __forceinline bool operator !=( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx != b.vx || a.vy != b.vy; }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace2<T>& m) {
return cout << "{ vx = " << m.vx << ", vy = " << m.vy << "}";
}
/*! Shortcuts for common linear spaces. */
typedef LinearSpace2<Vec2f> LinearSpace2f;
typedef LinearSpace2<Vec2fa> LinearSpace2fa;
}

View File

@@ -0,0 +1,217 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec3.h"
#include "quaternion.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// 3D Linear Transform (3x3 Matrix)
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct LinearSpace3
{
typedef T Vector;
typedef typename T::Scalar Scalar;
/*! default matrix constructor */
__forceinline LinearSpace3 ( ) {}
__forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; }
__forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; }
template<typename L1> __forceinline LinearSpace3( const LinearSpace3<L1>& s ) : vx(s.vx), vy(s.vy), vz(s.vz) {}
/*! matrix construction from column vectors */
__forceinline LinearSpace3(const Vector& vx, const Vector& vy, const Vector& vz)
: vx(vx), vy(vy), vz(vz) {}
/*! construction from quaternion */
__forceinline LinearSpace3( const QuaternionT<Scalar>& q )
: vx((q.r*q.r + q.i*q.i - q.j*q.j - q.k*q.k), 2.0f*(q.i*q.j + q.r*q.k), 2.0f*(q.i*q.k - q.r*q.j))
, vy(2.0f*(q.i*q.j - q.r*q.k), (q.r*q.r - q.i*q.i + q.j*q.j - q.k*q.k), 2.0f*(q.j*q.k + q.r*q.i))
, vz(2.0f*(q.i*q.k + q.r*q.j), 2.0f*(q.j*q.k - q.r*q.i), (q.r*q.r - q.i*q.i - q.j*q.j + q.k*q.k)) {}
/*! matrix construction from row mayor data */
__forceinline LinearSpace3(const Scalar& m00, const Scalar& m01, const Scalar& m02,
const Scalar& m10, const Scalar& m11, const Scalar& m12,
const Scalar& m20, const Scalar& m21, const Scalar& m22)
: vx(m00,m10,m20), vy(m01,m11,m21), vz(m02,m12,m22) {}
/*! compute the determinant of the matrix */
__forceinline const Scalar det() const { return dot(vx,cross(vy,vz)); }
/*! compute adjoint matrix */
__forceinline const LinearSpace3 adjoint() const { return LinearSpace3(cross(vy,vz),cross(vz,vx),cross(vx,vy)).transposed(); }
/*! compute inverse matrix */
__forceinline const LinearSpace3 inverse() const { return adjoint()/det(); }
/*! compute transposed matrix */
__forceinline const LinearSpace3 transposed() const { return LinearSpace3(vx.x,vx.y,vx.z,vy.x,vy.y,vy.z,vz.x,vz.y,vz.z); }
/*! returns first row of matrix */
__forceinline Vector row0() const { return Vector(vx.x,vy.x,vz.x); }
/*! returns second row of matrix */
__forceinline Vector row1() const { return Vector(vx.y,vy.y,vz.y); }
/*! returns third row of matrix */
__forceinline Vector row2() const { return Vector(vx.z,vy.z,vz.z); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline LinearSpace3( ZeroTy ) : vx(zero), vy(zero), vz(zero) {}
__forceinline LinearSpace3( OneTy ) : vx(one, zero, zero), vy(zero, one, zero), vz(zero, zero, one) {}
/*! return matrix for scaling */
static __forceinline LinearSpace3 scale(const Vector& s) {
return LinearSpace3(s.x, 0, 0,
0 , s.y, 0,
0 , 0, s.z);
}
/*! return matrix for rotation around arbitrary axis */
static __forceinline LinearSpace3 rotate(const Vector& _u, const Scalar& r) {
Vector u = normalize(_u);
Scalar s = sin(r), c = cos(r);
return LinearSpace3(u.x*u.x+(1-u.x*u.x)*c, u.x*u.y*(1-c)-u.z*s, u.x*u.z*(1-c)+u.y*s,
u.x*u.y*(1-c)+u.z*s, u.y*u.y+(1-u.y*u.y)*c, u.y*u.z*(1-c)-u.x*s,
u.x*u.z*(1-c)-u.y*s, u.y*u.z*(1-c)+u.x*s, u.z*u.z+(1-u.z*u.z)*c);
}
public:
/*! the column vectors of the matrix */
Vector vx,vy,vz;
};
#if !defined(__SYCL_DEVICE_ONLY__)
/*! compute transposed matrix */
template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
}
#endif
template<typename T>
__forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) {
return xfm.transposed();
}
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a ) { return LinearSpace3<T>(-a.vx,-a.vy,-a.vz); }
template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a ) { return LinearSpace3<T>(+a.vx,+a.vy,+a.vz); }
template<typename T> __forceinline LinearSpace3<T> rcp ( const LinearSpace3<T>& a ) { return a.inverse(); }
/* constructs a coordinate frame form a normalized normal */
template<typename T> __forceinline LinearSpace3<T> frame(const T& N)
{
const T dx0(0,N.z,-N.y);
const T dx1(-N.z,0,N.x);
const T dx = normalize(select(dot(dx0,dx0) > dot(dx1,dx1),dx0,dx1));
const T dy = normalize(cross(N,dx));
return LinearSpace3<T>(dx,dy,N);
}
/* constructs a coordinate frame from a normal and approximate x-direction */
template<typename T> __forceinline LinearSpace3<T> frame(const T& N, const T& dxi)
{
if (abs(dot(dxi,N)) > 0.99f) return frame(N); // fallback in case N and dxi are very parallel
const T dx = normalize(cross(dxi,N));
const T dy = normalize(cross(N,dx));
return LinearSpace3<T>(dx,dy,N);
}
/* clamps linear space to range -1 to +1 */
template<typename T> __forceinline LinearSpace3<T> clamp(const LinearSpace3<T>& space) {
return LinearSpace3<T>(clamp(space.vx,T(-1.0f),T(1.0f)),
clamp(space.vy,T(-1.0f),T(1.0f)),
clamp(space.vz,T(-1.0f),T(1.0f)));
}
////////////////////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx+b.vx,a.vy+b.vy,a.vz+b.vz); }
template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx-b.vx,a.vy-b.vy,a.vz-b.vz); }
template<typename T> __forceinline LinearSpace3<T> operator*(const typename T::Scalar & a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
template<typename T> __forceinline T operator*(const LinearSpace3<T>& a, const T & b) { return madd(T(b.x),a.vx,madd(T(b.y),a.vy,T(b.z)*a.vz)); }
template<typename T> __forceinline LinearSpace3<T> operator*(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const typename T::Scalar & b) { return LinearSpace3<T>(a.vx/b, a.vy/b, a.vz/b); }
template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return a * rcp(b); }
template<typename T> __forceinline LinearSpace3<T>& operator *=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a * b; }
template<typename T> __forceinline LinearSpace3<T>& operator /=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a / b; }
template<typename T> __forceinline T xfmPoint (const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
template<typename T> __forceinline T xfmVector(const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
template<typename T> __forceinline T xfmNormal(const LinearSpace3<T>& s, const T & a) { return xfmVector(s.inverse().transposed(),a); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx == b.vx && a.vy == b.vy && a.vz == b.vz; }
template<typename T> __forceinline bool operator !=( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx != b.vx || a.vy != b.vy || a.vz != b.vz; }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline LinearSpace3<T> select ( const typename T::Scalar::Bool& s, const LinearSpace3<T>& t, const LinearSpace3<T>& f ) {
return LinearSpace3<T>(select(s,t.vx,f.vx),select(s,t.vy,f.vy),select(s,t.vz,f.vz));
}
/*! blending */
template<typename T>
__forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0, const LinearSpace3<T>& l1, const float t)
{
return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
lerp(l0.vy,l1.vy,t),
lerp(l0.vz,l1.vz,t));
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace3<T>& m) {
return cout << "{ vx = " << m.vx << ", vy = " << m.vy << ", vz = " << m.vz << "}";
}
/*! Shortcuts for common linear spaces. */
typedef LinearSpace3<Vec3f> LinearSpace3f;
typedef LinearSpace3<Vec3fa> LinearSpace3fa;
typedef LinearSpace3<Vec3fx> LinearSpace3fx;
typedef LinearSpace3<Vec3ff> LinearSpace3ff;
template<int N> using LinearSpace3vf = LinearSpace3<Vec3<vfloat<N>>>;
typedef LinearSpace3<Vec3<vfloat<4>>> LinearSpace3vf4;
typedef LinearSpace3<Vec3<vfloat<8>>> LinearSpace3vf8;
typedef LinearSpace3<Vec3<vfloat<16>>> LinearSpace3vf16;
/*! blending */
template<typename T, typename S>
__forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0,
const LinearSpace3<T>& l1,
const S& t)
{
return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
lerp(l0.vy,l1.vy,t),
lerp(l0.vz,l1.vz,t));
}
}

View File

@@ -0,0 +1,279 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../sys/intrinsics.h"
#include "constants.h"
#include <cmath>
namespace embree
{
__forceinline bool isvalid ( const float& v ) {
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
}
__forceinline int cast_f2i(float f) {
return __builtin_bit_cast(int,f);
}
__forceinline float cast_i2f(int i) {
return __builtin_bit_cast(float,i);
}
__forceinline int toInt (const float& a) { return int(a); }
__forceinline float toFloat(const int& a) { return float(a); }
__forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }
__forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }
//__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
__forceinline float sqr ( const float x ) { return x*x; }
__forceinline float rcp ( const float x ) {
return sycl::native::recip(x);
}
__forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }
//__forceinline float signmsk ( const float x ) {
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
//}
//__forceinline float xorf( const float x, const float y ) {
// return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
//}
//__forceinline float andf( const float x, const unsigned y ) {
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
//}
__forceinline float rsqrt( const float x ) {
return sycl::rsqrt(x);
}
//__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
//__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
//__forceinline int roundf(float f) { return (int)(f + 0.5f); }
__forceinline float abs ( const float x ) { return sycl::fabs(x); }
__forceinline float acos ( const float x ) { return sycl::acos(x); }
__forceinline float asin ( const float x ) { return sycl::asin(x); }
__forceinline float atan ( const float x ) { return sycl::atan(x); }
__forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }
__forceinline float cos ( const float x ) { return sycl::cos(x); }
__forceinline float cosh ( const float x ) { return sycl::cosh(x); }
__forceinline float exp ( const float x ) { return sycl::exp(x); }
__forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }
__forceinline float log ( const float x ) { return sycl::log(x); }
__forceinline float log10( const float x ) { return sycl::log10(x); }
__forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }
__forceinline float sin ( const float x ) { return sycl::sin(x); }
__forceinline float sinh ( const float x ) { return sycl::sinh(x); }
__forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }
__forceinline float tan ( const float x ) { return sycl::tan(x); }
__forceinline float tanh ( const float x ) { return sycl::tanh(x); }
__forceinline float floor( const float x ) { return sycl::floor(x); }
__forceinline float ceil ( const float x ) { return sycl::ceil(x); }
__forceinline float frac ( const float x ) { return x-floor(x); }
//__forceinline double abs ( const double x ) { return ::fabs(x); }
//__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
//__forceinline double acos ( const double x ) { return ::acos (x); }
//__forceinline double asin ( const double x ) { return ::asin (x); }
//__forceinline double atan ( const double x ) { return ::atan (x); }
//__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
//__forceinline double cos ( const double x ) { return ::cos (x); }
//__forceinline double cosh ( const double x ) { return ::cosh (x); }
//__forceinline double exp ( const double x ) { return ::exp (x); }
//__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
//__forceinline double log ( const double x ) { return ::log (x); }
//__forceinline double log10( const double x ) { return ::log10(x); }
//__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
//__forceinline double rcp ( const double x ) { return 1.0/x; }
//__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
//__forceinline double sin ( const double x ) { return ::sin (x); }
//__forceinline double sinh ( const double x ) { return ::sinh (x); }
//__forceinline double sqr ( const double x ) { return x*x; }
//__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
//__forceinline double tan ( const double x ) { return ::tan (x); }
//__forceinline double tanh ( const double x ) { return ::tanh (x); }
//__forceinline double floor( const double x ) { return ::floor (x); }
//__forceinline double ceil ( const double x ) { return ::ceil (x); }
/*
#if defined(__SSE4_1__)
__forceinline float mini(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_min_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
#if defined(__SSE4_1__)
__forceinline float maxi(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_max_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
*/
template<typename T>
__forceinline T twice(const T& a) { return a+a; }
__forceinline int min(int a, int b) { return sycl::min(a,b); }
__forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }
__forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }
__forceinline float min(float a, float b) { return sycl::fmin(a,b); }
__forceinline double min(double a, double b) { return sycl::fmin(a,b); }
#if defined(__X86_64__)
__forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }
#endif
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
__forceinline int max(int a, int b) { return sycl::max(a,b); }
__forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }
__forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }
__forceinline float max(float a, float b) { return sycl::fmax(a,b); }
__forceinline double max(double a, double b) { return sycl::fmax(a,b); }
#if defined(__X86_64__)
__forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }
#endif
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
__forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }
__forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }
__forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }
__forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }
/*! random functions */
/*
template<typename T> T random() { return T(0); }
template<> __forceinline int random() { return int(rand()); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
*/
/*! selects */
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
__forceinline bool none(bool s) { return !s; }
__forceinline bool all (bool s) { return s; }
__forceinline bool any (bool s) { return s; }
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
__forceinline float lerp(const float v0, const float v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
template<typename T>
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
}
/*! exchange */
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
/* load/store */
template<typename Ty> struct mem;
template<> struct mem<float> {
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
};
/*! bit reverse operation */
template<class T>
__forceinline T bitReverse(const T& vin)
{
T v = vin;
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
v = ( v >> 16 ) | ( v << 16);
return v;
}
/*! bit interleave operation */
template<class T>
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
{
T x = xin, y = yin, z = zin;
x = (x | (x << 16)) & 0x030000FF;
x = (x | (x << 8)) & 0x0300F00F;
x = (x | (x << 4)) & 0x030C30C3;
x = (x | (x << 2)) & 0x09249249;
y = (y | (y << 16)) & 0x030000FF;
y = (y | (y << 8)) & 0x0300F00F;
y = (y | (y << 4)) & 0x030C30C3;
y = (y | (y << 2)) & 0x09249249;
z = (z | (z << 16)) & 0x030000FF;
z = (z | (z << 8)) & 0x0300F00F;
z = (z | (z << 4)) & 0x030C30C3;
z = (z | (z << 2)) & 0x09249249;
return x | (y << 1) | (z << 2);
}
/*! bit interleave operation for 64bit data types*/
template<class T>
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
T x = xin & 0x1fffff;
T y = yin & 0x1fffff;
T z = zin & 0x1fffff;
x = (x | x << 32) & 0x1f00000000ffff;
x = (x | x << 16) & 0x1f0000ff0000ff;
x = (x | x << 8) & 0x100f00f00f00f00f;
x = (x | x << 4) & 0x10c30c30c30c30c3;
x = (x | x << 2) & 0x1249249249249249;
y = (y | y << 32) & 0x1f00000000ffff;
y = (y | y << 16) & 0x1f0000ff0000ff;
y = (y | y << 8) & 0x100f00f00f00f00f;
y = (y | y << 4) & 0x10c30c30c30c30c3;
y = (y | y << 2) & 0x1249249249249249;
z = (z | z << 32) & 0x1f00000000ffff;
z = (z | z << 16) & 0x1f0000ff0000ff;
z = (z | z << 8) & 0x100f00f00f00f00f;
z = (z | z << 4) & 0x10c30c30c30c30c3;
z = (z | z << 2) & 0x1249249249249249;
return x | (y << 1) | (z << 2);
}
}

39
thirdparty/embree/common/math/obbox.h vendored Normal file
View File

@@ -0,0 +1,39 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bbox.h"
#include "linearspace3.h"
namespace embree
{
/*! Oriented bounding box */
template<typename T>
struct OBBox
{
public:
__forceinline OBBox () {}
__forceinline OBBox (EmptyTy)
: space(one), bounds(empty) {}
__forceinline OBBox (const BBox<T>& bounds)
: space(one), bounds(bounds) {}
__forceinline OBBox (const LinearSpace3<T>& space, const BBox<T>& bounds)
: space(space), bounds(bounds) {}
friend embree_ostream operator<<(embree_ostream cout, const OBBox& p) {
return cout << "{ space = " << p.space << ", bounds = " << p.bounds << "}";
}
public:
LinearSpace3<T> space; //!< orthonormal transformation
BBox<T> bounds; //!< bounds in transformed space
};
typedef OBBox<Vec3f> OBBox3f;
typedef OBBox<Vec3fa> OBBox3fa;
}

View File

@@ -0,0 +1,258 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec3.h"
#include "vec4.h"
#include "transcendental.h"
namespace embree
{
////////////////////////////////////////////////////////////////
// Quaternion Struct
////////////////////////////////////////////////////////////////
template<typename T>
struct QuaternionT
{
typedef Vec3<T> Vector;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline QuaternionT () { }
__forceinline QuaternionT ( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; }
__forceinline QuaternionT& operator=( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; return *this; }
__forceinline QuaternionT( const T& r ) : r(r), i(zero), j(zero), k(zero) {}
__forceinline explicit QuaternionT( const Vec3<T>& v ) : r(zero), i(v.x), j(v.y), k(v.z) {}
__forceinline explicit QuaternionT( const Vec4<T>& v ) : r(v.x), i(v.y), j(v.z), k(v.w) {}
__forceinline QuaternionT( const T& r, const T& i, const T& j, const T& k ) : r(r), i(i), j(j), k(k) {}
__forceinline QuaternionT( const T& r, const Vec3<T>& v ) : r(r), i(v.x), j(v.y), k(v.z) {}
__inline QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz );
__inline QuaternionT( const T& yaw, const T& pitch, const T& roll );
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline QuaternionT( ZeroTy ) : r(zero), i(zero), j(zero), k(zero) {}
__forceinline QuaternionT( OneTy ) : r( one), i(zero), j(zero), k(zero) {}
/*! return quaternion for rotation around arbitrary axis */
static __forceinline QuaternionT rotate(const Vec3<T>& u, const T& r) {
return QuaternionT<T>(cos(T(0.5)*r),sin(T(0.5)*r)*normalize(u));
}
/*! returns the rotation axis of the quaternion as a vector */
__forceinline Vec3<T> v( ) const { return Vec3<T>(i, j, k); }
public:
T r, i, j, k;
};
template<typename T> __forceinline QuaternionT<T> operator *( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a * b.r, a * b.i, a * b.j, a * b.k); }
template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r * b, a.i * b, a.j * b, a.k * b); }
////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a ) { return QuaternionT<T>(+a.r, +a.i, +a.j, +a.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a ) { return QuaternionT<T>(-a.r, -a.i, -a.j, -a.k); }
template<typename T> __forceinline QuaternionT<T> conj ( const QuaternionT<T>& a ) { return QuaternionT<T>(a.r, -a.i, -a.j, -a.k); }
template<typename T> __forceinline T abs ( const QuaternionT<T>& a ) { return sqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
template<typename T> __forceinline QuaternionT<T> rcp ( const QuaternionT<T>& a ) { return conj(a)*rcp(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
template<typename T> __forceinline QuaternionT<T> normalize ( const QuaternionT<T>& a ) { return a*rsqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
// evaluates a*q-r
template<typename T> __forceinline QuaternionT<T>
msub(const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
{
return QuaternionT<T>(msub(a, q.r, p.r),
msub(a, q.i, p.i),
msub(a, q.j, p.j),
msub(a, q.k, p.k));
}
// evaluates a*q-r
template<typename T> __forceinline QuaternionT<T>
madd (const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
{
return QuaternionT<T>(madd(a, q.r, p.r),
madd(a, q.i, p.i),
madd(a, q.j, p.j),
madd(a, q.k, p.k));
}
////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////
template<typename T> __forceinline QuaternionT<T> operator +( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a + b.r, b.i, b.j, b.k); }
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r + b, a.i, a.j, a.k); }
template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r + b.r, a.i + b.i, a.j + b.j, a.k + b.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a - b.r, -b.i, -b.j, -b.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r - b, a.i, a.j, a.k); }
template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r - b.r, a.i - b.i, a.j - b.j, a.k - b.k); }
template<typename T> __forceinline Vec3<T> operator *( const QuaternionT<T>& a, const Vec3<T> & b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const QuaternionT<T>& b ) {
return QuaternionT<T>(a.r*b.r - a.i*b.i - a.j*b.j - a.k*b.k,
a.r*b.i + a.i*b.r + a.j*b.k - a.k*b.j,
a.r*b.j - a.i*b.k + a.j*b.r + a.k*b.i,
a.r*b.k + a.i*b.j - a.j*b.i + a.k*b.r);
}
template<typename T> __forceinline QuaternionT<T> operator /( const T & a, const QuaternionT<T>& b ) { return a*rcp(b); }
template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const T & b ) { return a*rcp(b); }
template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a*rcp(b); }
template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const T & b ) { return a = a+b; }
template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a+b; }
template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const T & b ) { return a = a-b; }
template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a-b; }
template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const T & b ) { return a = a*b; }
template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*b; }
template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const T & b ) { return a = a*rcp(b); }
template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*rcp(b); }
template<typename T, typename M> __forceinline QuaternionT<T>
select(const M& m, const QuaternionT<T>& q, const QuaternionT<T>& p)
{
return QuaternionT<T>(select(m, q.r, p.r),
select(m, q.i, p.i),
select(m, q.j, p.j),
select(m, q.k, p.k));
}
template<typename T> __forceinline Vec3<T> xfmPoint ( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline Vec3<T> xfmVector( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline Vec3<T> xfmNormal( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
template<typename T> __forceinline T dot(const QuaternionT<T>& a, const QuaternionT<T>& b) { return a.r*b.r + a.i*b.i + a.j*b.j + a.k*b.k; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r == b.r && a.i == b.i && a.j == b.j && a.k == b.k; }
template<typename T> __forceinline bool operator !=( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r != b.r || a.i != b.i || a.j != b.j || a.k != b.k; }
////////////////////////////////////////////////////////////////////////////////
/// Orientation Functions
////////////////////////////////////////////////////////////////////////////////
template<typename T> QuaternionT<T>::QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz )
{
if ( vx.x + vy.y + vz.z >= T(zero) )
{
const T t = T(one) + (vx.x + vy.y + vz.z);
const T s = rsqrt(t)*T(0.5f);
r = t*s;
i = (vy.z - vz.y)*s;
j = (vz.x - vx.z)*s;
k = (vx.y - vy.x)*s;
}
else if ( vx.x >= max(vy.y, vz.z) )
{
const T t = (T(one) + vx.x) - (vy.y + vz.z);
const T s = rsqrt(t)*T(0.5f);
r = (vy.z - vz.y)*s;
i = t*s;
j = (vx.y + vy.x)*s;
k = (vz.x + vx.z)*s;
}
else if ( vy.y >= vz.z ) // if ( vy.y >= max(vz.z, vx.x) )
{
const T t = (T(one) + vy.y) - (vz.z + vx.x);
const T s = rsqrt(t)*T(0.5f);
r = (vz.x - vx.z)*s;
i = (vx.y + vy.x)*s;
j = t*s;
k = (vy.z + vz.y)*s;
}
else //if ( vz.z >= max(vy.y, vx.x) )
{
const T t = (T(one) + vz.z) - (vx.x + vy.y);
const T s = rsqrt(t)*T(0.5f);
r = (vx.y - vy.x)*s;
i = (vz.x + vx.z)*s;
j = (vy.z + vz.y)*s;
k = t*s;
}
}
template<typename T> QuaternionT<T>::QuaternionT( const T& yaw, const T& pitch, const T& roll )
{
const T cya = cos(yaw *T(0.5f));
const T cpi = cos(pitch*T(0.5f));
const T cro = cos(roll *T(0.5f));
const T sya = sin(yaw *T(0.5f));
const T spi = sin(pitch*T(0.5f));
const T sro = sin(roll *T(0.5f));
r = cro*cya*cpi + sro*sya*spi;
i = cro*cya*spi + sro*sya*cpi;
j = cro*sya*cpi - sro*cya*spi;
k = sro*cya*cpi - cro*sya*spi;
}
//////////////////////////////////////////////////////////////////////////////
/// Output Operators
//////////////////////////////////////////////////////////////////////////////
template<typename T> static embree_ostream operator<<(embree_ostream cout, const QuaternionT<T>& q) {
return cout << "{ r = " << q.r << ", i = " << q.i << ", j = " << q.j << ", k = " << q.k << " }";
}
/*! default template instantiations */
typedef QuaternionT<float> Quaternion3f;
typedef QuaternionT<double> Quaternion3d;
template<int N> using Quaternion3vf = QuaternionT<vfloat<N>>;
typedef QuaternionT<vfloat<4>> Quaternion3vf4;
typedef QuaternionT<vfloat<8>> Quaternion3vf8;
typedef QuaternionT<vfloat<16>> Quaternion3vf16;
//////////////////////////////////////////////////////////////////////////////
/// Interpolation
//////////////////////////////////////////////////////////////////////////////
template<typename T>
__forceinline QuaternionT<T>lerp(const QuaternionT<T>& q0,
const QuaternionT<T>& q1,
const T& factor)
{
QuaternionT<T> q;
q.r = lerp(q0.r, q1.r, factor);
q.i = lerp(q0.i, q1.i, factor);
q.j = lerp(q0.j, q1.j, factor);
q.k = lerp(q0.k, q1.k, factor);
return q;
}
template<typename T>
__forceinline QuaternionT<T> slerp(const QuaternionT<T>& q0,
const QuaternionT<T>& q1_,
const T& t)
{
T cosTheta = dot(q0, q1_);
QuaternionT<T> q1 = select(cosTheta < 0.f, -q1_, q1_);
cosTheta = select(cosTheta < 0.f, -cosTheta, cosTheta);
// spherical linear interpolation
const T phi = t * fastapprox::acos(cosTheta);
T sinPhi, cosPhi;
fastapprox::sincos(phi, sinPhi, cosPhi);
QuaternionT<T> qperp = sinPhi * normalize(msub(cosTheta, q0, q1));
QuaternionT<T> qslerp = msub(cosPhi, q0, qperp);
// regular linear interpolation as fallback
QuaternionT<T> qlerp = normalize(lerp(q0, q1, t));
return select(cosTheta > 0.9995f, qlerp, qslerp);
}
}

137
thirdparty/embree/common/math/range.h vendored Normal file
View File

@@ -0,0 +1,137 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../math/emath.h"
namespace embree
{
template<typename Ty>
struct range
{
__forceinline range() {}
__forceinline range(const Ty& begin)
: _begin(begin), _end(begin+1) {}
__forceinline range(const Ty& begin, const Ty& end)
: _begin(begin), _end(end) {}
__forceinline range(const range& other)
: _begin(other._begin), _end(other._end) {}
template<typename T1>
__forceinline range(const range<T1>& other)
: _begin(Ty(other._begin)), _end(Ty(other._end)) {}
template<typename T1>
__forceinline range& operator =(const range<T1>& other) {
_begin = other._begin;
_end = other._end;
return *this;
}
__forceinline Ty begin() const {
return _begin;
}
__forceinline Ty end() const {
return _end;
}
__forceinline range intersect(const range& r) const {
return range (max(_begin,r._begin),min(_end,r._end));
}
__forceinline Ty size() const {
return _end - _begin;
}
__forceinline bool empty() const {
return _end <= _begin;
}
__forceinline Ty center() const {
return (_begin + _end)/2;
}
__forceinline std::pair<range,range> split() const
{
const Ty _center = center();
return std::make_pair(range(_begin,_center),range(_center,_end));
}
__forceinline void split(range& left_o, range& right_o) const
{
const Ty _center = center();
left_o = range(_begin,_center);
right_o = range(_center,_end);
}
__forceinline friend bool operator< (const range& r0, const range& r1) {
return r0.size() < r1.size();
}
friend embree_ostream operator<<(embree_ostream cout, const range& r) {
return cout << "range [" << r.begin() << ", " << r.end() << "]";
}
Ty _begin, _end;
};
template<typename Ty>
range<Ty> make_range(const Ty& begin, const Ty& end) {
return range<Ty>(begin,end);
}
template<typename Ty>
struct extended_range : public range<Ty>
{
__forceinline extended_range () {}
__forceinline extended_range (const Ty& begin)
: range<Ty>(begin), _ext_end(begin+1) {}
__forceinline extended_range (const Ty& begin, const Ty& end)
: range<Ty>(begin,end), _ext_end(end) {}
__forceinline extended_range (const Ty& begin, const Ty& end, const Ty& ext_end)
: range<Ty>(begin,end), _ext_end(ext_end) {}
__forceinline Ty ext_end() const {
return _ext_end;
}
__forceinline Ty ext_size() const {
return _ext_end - range<Ty>::_begin;
}
__forceinline Ty ext_range_size() const {
return _ext_end - range<Ty>::_end;
}
__forceinline bool has_ext_range() const {
assert(_ext_end >= range<Ty>::_end);
return (_ext_end - range<Ty>::_end) > 0;
}
__forceinline void set_ext_range(const size_t ext_end){
assert(ext_end >= range<Ty>::_end);
_ext_end = ext_end;
}
__forceinline void move_right(const size_t plus){
range<Ty>::_begin += plus;
range<Ty>::_end += plus;
_ext_end += plus;
}
friend embree_ostream operator<<(embree_ostream cout, const extended_range& r) {
return cout << "extended_range [" << r.begin() << ", " << r.end() << " (" << r.ext_end() << ")]";
}
Ty _ext_end;
};
}

View File

@@ -0,0 +1,525 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
// Transcendental functions from "ispc": https://github.com/ispc/ispc/
// Most of the transcendental implementations in ispc code come from
// Solomon Boulos's "syrah": https://github.com/boulos/syrah/
#include "../simd/simd.h"
namespace embree
{
namespace fastapprox
{
template <typename T>
__forceinline T sin(const T &v)
{
static const float piOverTwoVec = 1.57079637050628662109375;
static const float twoOverPiVec = 0.636619746685028076171875;
auto scaled = v * twoOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * piOverTwoVec;
auto kMod4 = k & 3;
auto sinUseCos = (kMod4 == 1) | (kMod4 == 3);
auto flipSign = (kMod4 > 1);
// These coefficients are from sollya with fpminimax(sin(x)/x, [|0, 2,
// 4, 6, 8, 10|], [|single...|], [0;Pi/2]);
static const float sinC2 = -0.16666667163372039794921875;
static const float sinC4 = +8.333347737789154052734375e-3;
static const float sinC6 = -1.9842604524455964565277099609375e-4;
static const float sinC8 = +2.760012648650445044040679931640625e-6;
static const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
static const float cosC2 = -0.5;
static const float cosC4 = +4.166664183139801025390625e-2;
static const float cosC6 = -1.388833043165504932403564453125e-3;
static const float cosC8 = +2.47562347794882953166961669921875e-5;
static const float cosC10 = -2.59630184018533327616751194000244140625e-7;
auto outside = select(sinUseCos, 1., x);
auto c2 = select(sinUseCos, T(cosC2), T(sinC2));
auto c4 = select(sinUseCos, T(cosC4), T(sinC4));
auto c6 = select(sinUseCos, T(cosC6), T(sinC6));
auto c8 = select(sinUseCos, T(cosC8), T(sinC8));
auto c10 = select(sinUseCos, T(cosC10), T(sinC10));
auto x2 = x * x;
auto formula = x2 * c10 + c8;
formula = x2 * formula + c6;
formula = x2 * formula + c4;
formula = x2 * formula + c2;
formula = x2 * formula + 1.;
formula *= outside;
formula = select(flipSign, -formula, formula);
return formula;
}
template <typename T>
__forceinline T cos(const T &v)
{
static const float piOverTwoVec = 1.57079637050628662109375;
static const float twoOverPiVec = 0.636619746685028076171875;
auto scaled = v * twoOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * piOverTwoVec;
auto kMod4 = k & 3;
auto cosUseCos = (kMod4 == 0) | (kMod4 == 2);
auto flipSign = (kMod4 == 1) | (kMod4 == 2);
const float sinC2 = -0.16666667163372039794921875;
const float sinC4 = +8.333347737789154052734375e-3;
const float sinC6 = -1.9842604524455964565277099609375e-4;
const float sinC8 = +2.760012648650445044040679931640625e-6;
const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
const float cosC2 = -0.5;
const float cosC4 = +4.166664183139801025390625e-2;
const float cosC6 = -1.388833043165504932403564453125e-3;
const float cosC8 = +2.47562347794882953166961669921875e-5;
const float cosC10 = -2.59630184018533327616751194000244140625e-7;
auto outside = select(cosUseCos, 1., x);
auto c2 = select(cosUseCos, T(cosC2), T(sinC2));
auto c4 = select(cosUseCos, T(cosC4), T(sinC4));
auto c6 = select(cosUseCos, T(cosC6), T(sinC6));
auto c8 = select(cosUseCos, T(cosC8), T(sinC8));
auto c10 = select(cosUseCos, T(cosC10), T(sinC10));
auto x2 = x * x;
auto formula = x2 * c10 + c8;
formula = x2 * formula + c6;
formula = x2 * formula + c4;
formula = x2 * formula + c2;
formula = x2 * formula + 1.;
formula *= outside;
formula = select(flipSign, -formula, formula);
return formula;
}
template <typename T>
__forceinline void sincos(const T &v, T &sinResult, T &cosResult)
{
const float piOverTwoVec = 1.57079637050628662109375;
const float twoOverPiVec = 0.636619746685028076171875;
auto scaled = v * twoOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * piOverTwoVec;
auto kMod4 = k & 3;
auto cosUseCos = ((kMod4 == 0) | (kMod4 == 2));
auto sinUseCos = ((kMod4 == 1) | (kMod4 == 3));
auto sinFlipSign = (kMod4 > 1);
auto cosFlipSign = ((kMod4 == 1) | (kMod4 == 2));
const float oneVec = +1.;
const float sinC2 = -0.16666667163372039794921875;
const float sinC4 = +8.333347737789154052734375e-3;
const float sinC6 = -1.9842604524455964565277099609375e-4;
const float sinC8 = +2.760012648650445044040679931640625e-6;
const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
const float cosC2 = -0.5;
const float cosC4 = +4.166664183139801025390625e-2;
const float cosC6 = -1.388833043165504932403564453125e-3;
const float cosC8 = +2.47562347794882953166961669921875e-5;
const float cosC10 = -2.59630184018533327616751194000244140625e-7;
auto x2 = x * x;
auto sinFormula = x2 * sinC10 + sinC8;
auto cosFormula = x2 * cosC10 + cosC8;
sinFormula = x2 * sinFormula + sinC6;
cosFormula = x2 * cosFormula + cosC6;
sinFormula = x2 * sinFormula + sinC4;
cosFormula = x2 * cosFormula + cosC4;
sinFormula = x2 * sinFormula + sinC2;
cosFormula = x2 * cosFormula + cosC2;
sinFormula = x2 * sinFormula + oneVec;
cosFormula = x2 * cosFormula + oneVec;
sinFormula *= x;
sinResult = select(sinUseCos, cosFormula, sinFormula);
cosResult = select(cosUseCos, cosFormula, sinFormula);
sinResult = select(sinFlipSign, -sinResult, sinResult);
cosResult = select(cosFlipSign, -cosResult, cosResult);
}
template <typename T>
__forceinline T tan(const T &v)
{
const float piOverFourVec = 0.785398185253143310546875;
const float fourOverPiVec = 1.27323949337005615234375;
auto xLt0 = v < 0.;
auto y = select(xLt0, -v, v);
auto scaled = y * fourOverPiVec;
auto kReal = floor(scaled);
auto k = toInt(kReal);
auto x = y - kReal * piOverFourVec;
// If k & 1, x -= Pi/4
auto needOffset = (k & 1) != 0;
x = select(needOffset, x - piOverFourVec, x);
// If k & 3 == (0 or 3) let z = tan_In...(y) otherwise z = -cot_In0To...
auto kMod4 = k & 3;
auto useCotan = (kMod4 == 1) | (kMod4 == 2);
const float oneVec = 1.0;
const float tanC2 = +0.33333075046539306640625;
const float tanC4 = +0.13339905440807342529296875;
const float tanC6 = +5.3348250687122344970703125e-2;
const float tanC8 = +2.46033705770969390869140625e-2;
const float tanC10 = +2.892402000725269317626953125e-3;
const float tanC12 = +9.500005282461643218994140625e-3;
const float cotC2 = -0.3333333432674407958984375;
const float cotC4 = -2.222204394638538360595703125e-2;
const float cotC6 = -2.11752182804048061370849609375e-3;
const float cotC8 = -2.0846328698098659515380859375e-4;
const float cotC10 = -2.548247357481159269809722900390625e-5;
const float cotC12 = -3.5257363606433500535786151885986328125e-7;
auto x2 = x * x;
T z;
if (any(useCotan))
{
auto cotVal = x2 * cotC12 + cotC10;
cotVal = x2 * cotVal + cotC8;
cotVal = x2 * cotVal + cotC6;
cotVal = x2 * cotVal + cotC4;
cotVal = x2 * cotVal + cotC2;
cotVal = x2 * cotVal + oneVec;
// The equation is for x * cot(x) but we need -x * cot(x) for the tan part.
cotVal /= -x;
z = cotVal;
}
auto useTan = !useCotan;
if (any(useTan))
{
auto tanVal = x2 * tanC12 + tanC10;
tanVal = x2 * tanVal + tanC8;
tanVal = x2 * tanVal + tanC6;
tanVal = x2 * tanVal + tanC4;
tanVal = x2 * tanVal + tanC2;
tanVal = x2 * tanVal + oneVec;
// Equation was for tan(x)/x
tanVal *= x;
z = select(useTan, tanVal, z);
}
return select(xLt0, -z, z);
}
template <typename T>
__forceinline T asin(const T &x0)
{
auto isneg = (x0 < 0.f);
auto x = abs(x0);
auto isnan = (x > 1.f);
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
// [1e-20;.9999999999999999]);
// avg error: 1.1105439e-06, max error 1.3187528e-06
auto v = 1.57079517841339111328125f +
x * (-0.21450997889041900634765625f +
x * (8.78556668758392333984375e-2f +
x * (-4.489909112453460693359375e-2f +
x * (1.928029954433441162109375e-2f +
x * (-4.3095736764371395111083984375e-3f)))));
v *= -sqrt(1.f - x);
v = v + 1.57079637050628662109375f;
v = select(v < 0.f, T(0.f), v);
v = select(isneg, -v, v);
v = select(isnan, T(cast_i2f(0x7fc00000)), v);
return v;
}
template <typename T>
__forceinline T acos(const T &v)
{
return 1.57079637050628662109375f - asin(v);
}
template <typename T>
__forceinline T atan(const T &v)
{
const float piOverTwoVec = 1.57079637050628662109375;
// atan(-x) = -atan(x) (so flip from negative to positive first)
// If x > 1 -> atan(x) = Pi/2 - atan(1/x)
auto xNeg = v < 0.f;
auto xFlipped = select(xNeg, -v, v);
auto xGt1 = xFlipped > 1.;
auto x = select(xGt1, rcpSafe(xFlipped), xFlipped);
// These coefficients approximate atan(x)/x
const float atanC0 = +0.99999988079071044921875;
const float atanC2 = -0.3333191573619842529296875;
const float atanC4 = +0.199689209461212158203125;
const float atanC6 = -0.14015688002109527587890625;
const float atanC8 = +9.905083477497100830078125e-2;
const float atanC10 = -5.93664981424808502197265625e-2;
const float atanC12 = +2.417283318936824798583984375e-2;
const float atanC14 = -4.6721356920897960662841796875e-3;
auto x2 = x * x;
auto result = x2 * atanC14 + atanC12;
result = x2 * result + atanC10;
result = x2 * result + atanC8;
result = x2 * result + atanC6;
result = x2 * result + atanC4;
result = x2 * result + atanC2;
result = x2 * result + atanC0;
result *= x;
result = select(xGt1, piOverTwoVec - result, result);
result = select(xNeg, -result, result);
return result;
}
template <typename T>
__forceinline T atan2(const T &y, const T &x)
{
const float piVec = 3.1415926536;
// atan2(y, x) =
//
// atan2(y > 0, x = +-0) -> Pi/2
// atan2(y < 0, x = +-0) -> -Pi/2
// atan2(y = +-0, x < +0) -> +-Pi
// atan2(y = +-0, x >= +0) -> +-0
//
// atan2(y >= 0, x < 0) -> Pi + atan(y/x)
// atan2(y < 0, x < 0) -> -Pi + atan(y/x)
// atan2(y, x > 0) -> atan(y/x)
//
// and then a bunch of code for dealing with infinities.
auto yOverX = y * rcpSafe(x);
auto atanArg = atan(yOverX);
auto xLt0 = x < 0.f;
auto yLt0 = y < 0.f;
auto offset = select(xLt0,
select(yLt0, T(-piVec), T(piVec)), 0.f);
return offset + atanArg;
}
template <typename T>
__forceinline T exp(const T &v)
{
const float ln2Part1 = 0.6931457519;
const float ln2Part2 = 1.4286067653e-6;
const float oneOverLn2 = 1.44269502162933349609375;
auto scaled = v * oneOverLn2;
auto kReal = floor(scaled);
auto k = toInt(kReal);
// Reduced range version of x
auto x = v - kReal * ln2Part1;
x -= kReal * ln2Part2;
// These coefficients are for e^x in [0, ln(2)]
const float one = 1.;
const float c2 = 0.4999999105930328369140625;
const float c3 = 0.166668415069580078125;
const float c4 = 4.16539050638675689697265625e-2;
const float c5 = 8.378830738365650177001953125e-3;
const float c6 = 1.304379315115511417388916015625e-3;
const float c7 = 2.7555381529964506626129150390625e-4;
auto result = x * c7 + c6;
result = x * result + c5;
result = x * result + c4;
result = x * result + c3;
result = x * result + c2;
result = x * result + one;
result = x * result + one;
// Compute 2^k (should differ for float and double, but I'll avoid
// it for now and just do floats)
const int fpbias = 127;
auto biasedN = k + fpbias;
auto overflow = kReal > fpbias;
// Minimum exponent is -126, so if k is <= -127 (k + 127 <= 0)
// we've got underflow. -127 * ln(2) -> -88.02. So the most
// negative float input that doesn't result in zero is like -88.
auto underflow = kReal <= -fpbias;
const int infBits = 0x7f800000;
biasedN <<= 23;
// Reinterpret this thing as float
auto twoToTheN = asFloat(biasedN);
// Handle both doubles and floats (hopefully eliding the copy for float)
auto elemtype2n = twoToTheN;
result *= elemtype2n;
result = select(overflow, cast_i2f(infBits), result);
result = select(underflow, 0., result);
return result;
}
// Range reduction for logarithms takes log(x) -> log(2^n * y) -> n
// * log(2) + log(y) where y is the reduced range (usually in [1/2, 1)).
template <typename T, typename R>
__forceinline void __rangeReduceLog(const T &input,
T &reduced,
R &exponent)
{
auto intVersion = asInt(input);
// single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
// exponent mask = 0111 1111 1000 0000 0000 0000 0000 0000
// 0x7 0xF 0x8 0x0 0x0 0x0 0x0 0x0
// non-exponent = 1000 0000 0111 1111 1111 1111 1111 1111
// = 0x8 0x0 0x7 0xF 0xF 0xF 0xF 0xF
//const int exponentMask(0x7F800000)
static const int nonexponentMask = 0x807FFFFF;
// We want the reduced version to have an exponent of -1 which is
// -1 + 127 after biasing or 126
static const int exponentNeg1 = (126l << 23);
// NOTE(boulos): We don't need to mask anything out since we know
// the sign bit has to be 0. If it's 1, we need to return infinity/nan
// anyway (log(x), x = +-0 -> infinity, x < 0 -> NaN).
auto biasedExponent = intVersion >> 23; // This number is [0, 255] but it means [-127, 128]
auto offsetExponent = biasedExponent + 1; // Treat the number as if it were 2^{e+1} * (1.m)/2
exponent = offsetExponent - 127; // get the real value
// Blend the offset_exponent with the original input (do this in
// int for now, until I decide if float can have & and &not)
auto blended = (intVersion & nonexponentMask) | (exponentNeg1);
reduced = asFloat(blended);
}
template <typename T> struct ExponentType { };
template <int N> struct ExponentType<vfloat_impl<N>> { typedef vint<N> Ty; };
template <> struct ExponentType<float> { typedef int Ty; };
template <typename T>
__forceinline T log(const T &v)
{
T reduced;
typename ExponentType<T>::Ty exponent;
const int nanBits = 0x7fc00000;
const int negInfBits = 0xFF800000;
const float nan = cast_i2f(nanBits);
const float negInf = cast_i2f(negInfBits);
auto useNan = v < 0.;
auto useInf = v == 0.;
auto exceptional = useNan | useInf;
const float one = 1.0;
auto patched = select(exceptional, one, v);
__rangeReduceLog(patched, reduced, exponent);
const float ln2 = 0.693147182464599609375;
auto x1 = one - reduced;
const float c1 = +0.50000095367431640625;
const float c2 = +0.33326041698455810546875;
const float c3 = +0.2519190013408660888671875;
const float c4 = +0.17541764676570892333984375;
const float c5 = +0.3424419462680816650390625;
const float c6 = -0.599632322788238525390625;
const float c7 = +1.98442304134368896484375;
const float c8 = -2.4899270534515380859375;
const float c9 = +1.7491014003753662109375;
auto result = x1 * c9 + c8;
result = x1 * result + c7;
result = x1 * result + c6;
result = x1 * result + c5;
result = x1 * result + c4;
result = x1 * result + c3;
result = x1 * result + c2;
result = x1 * result + c1;
result = x1 * result + one;
// Equation was for -(ln(red)/(1-red))
result *= -x1;
result += toFloat(exponent) * ln2;
return select(exceptional,
select(useNan, T(nan), T(negInf)),
result);
}
template <typename T>
__forceinline T pow(const T &x, const T &y)
{
auto x1 = abs(x);
auto z = exp(y * log(x1));
// Handle special cases
const float twoOver23 = 8388608.0f;
auto yInt = y == round(y);
auto yOddInt = select(yInt, asInt(abs(y) + twoOver23) << 31, 0); // set sign bit
// x == 0
z = select(x == 0.0f,
select(y < 0.0f, T(inf) | signmsk(x),
select(y == 0.0f, T(1.0f), asFloat(yOddInt) & x)), z);
// x < 0
auto xNegative = x < 0.0f;
if (any(xNegative))
{
auto z1 = z | asFloat(yOddInt);
z1 = select(yInt, z1, std::numeric_limits<float>::quiet_NaN());
z = select(xNegative, z1, z);
}
auto xFinite = isfinite(x);
auto yFinite = isfinite(y);
if (all(xFinite & yFinite))
return z;
// x finite and y infinite
z = select(andn(xFinite, yFinite),
select(x1 == 1.0f, 1.0f,
select((x1 > 1.0f) ^ (y < 0.0f), inf, T(0.0f))), z);
// x infinite
z = select(xFinite, z,
select(y == 0.0f, 1.0f,
select(y < 0.0f, T(0.0f), inf) | (asFloat(yOddInt) & x)));
return z;
}
template <typename T>
__forceinline T pow(const T &x, float y)
{
return pow(x, T(y));
}
} // namespace fastapprox
} // namespace embree

236
thirdparty/embree/common/math/vec2.h vendored Normal file
View File

@@ -0,0 +1,236 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
struct Vec2fa;
////////////////////////////////////////////////////////////////////////////////
/// Generic 2D vector Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Vec2
{
enum { N = 2 };
union {
struct { T x, y; };
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
T components[N];
#endif
};
typedef T Scalar;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2( ) {}
__forceinline explicit Vec2( const T& a ) : x(a), y(a) {}
__forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {}
__forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; }
Vec2( const Vec2fa& other );
template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {}
template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; }
__forceinline Vec2& operator =( const Vec2& other ) { x = other.x; y = other.y; return *this; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2( ZeroTy ) : x(zero), y(zero) {}
__forceinline Vec2( OneTy ) : x(one), y(one) {}
__forceinline Vec2( PosInfTy ) : x(pos_inf), y(pos_inf) {}
__forceinline Vec2( NegInfTy ) : x(neg_inf), y(neg_inf) {}
#if defined(__WIN32__) && _MSC_VER == 1800 // workaround for older VS 2013 compiler
__forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return (&x)[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 2); return (&x)[axis]; }
#else
__forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return components[axis]; }
__forceinline T& operator [](const size_t axis ) { assert(axis < 2); return components[axis]; }
#endif
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a ) { return Vec2<T>(+a.x, +a.y); }
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a ) { return Vec2<T>(-a.x, -a.y); }
template<typename T> __forceinline Vec2<T> abs ( const Vec2<T>& a ) { return Vec2<T>(abs (a.x), abs (a.y)); }
template<typename T> __forceinline Vec2<T> rcp ( const Vec2<T>& a ) { return Vec2<T>(rcp (a.x), rcp (a.y)); }
template<typename T> __forceinline Vec2<T> rsqrt ( const Vec2<T>& a ) { return Vec2<T>(rsqrt(a.x), rsqrt(a.y)); }
template<typename T> __forceinline Vec2<T> sqrt ( const Vec2<T>& a ) { return Vec2<T>(sqrt (a.x), sqrt (a.y)); }
template<typename T> __forceinline Vec2<T> frac ( const Vec2<T>& a ) { return Vec2<T>(frac (a.x), frac (a.y)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x + b.x, a.y + b.y); }
template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x + b , a.y + b ); }
template<typename T> __forceinline Vec2<T> operator +( const T& a, const Vec2<T>& b ) { return Vec2<T>(a + b.x, a + b.y); }
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x - b.x, a.y - b.y); }
template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x - b , a.y - b ); }
template<typename T> __forceinline Vec2<T> operator -( const T& a, const Vec2<T>& b ) { return Vec2<T>(a - b.x, a - b.y); }
template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x * b.x, a.y * b.y); }
template<typename T> __forceinline Vec2<T> operator *( const T& a, const Vec2<T>& b ) { return Vec2<T>(a * b.x, a * b.y); }
template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x * b , a.y * b ); }
template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x / b.x, a.y / b.y); }
template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x / b , a.y / b ); }
template<typename T> __forceinline Vec2<T> operator /( const T& a, const Vec2<T>& b ) { return Vec2<T>(a / b.x, a / b.y); }
template<typename T> __forceinline Vec2<T> min(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(min(a.x, b.x), min(a.y, b.y)); }
template<typename T> __forceinline Vec2<T> max(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(max(a.x, b.x), max(a.y, b.y)); }
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> madd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> msub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmadd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmsub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> madd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a,b.x,c.x), madd(a,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> msub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a,b.x,c.x), msub(a,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmadd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y) ); }
template<typename T> __forceinline Vec2<T> nmsub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y) ); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T>& operator +=( Vec2<T>& a, const Vec2<T>& b ) { a.x += b.x; a.y += b.y; return a; }
template<typename T> __forceinline Vec2<T>& operator -=( Vec2<T>& a, const Vec2<T>& b ) { a.x -= b.x; a.y -= b.y; return a; }
template<typename T> __forceinline Vec2<T>& operator *=( Vec2<T>& a, const T& b ) { a.x *= b ; a.y *= b ; return a; }
template<typename T> __forceinline Vec2<T>& operator /=( Vec2<T>& a, const T& b ) { a.x /= b ; a.y /= b ; return a; }
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T reduce_add( const Vec2<T>& a ) { return a.x + a.y; }
template<typename T> __forceinline T reduce_mul( const Vec2<T>& a ) { return a.x * a.y; }
template<typename T> __forceinline T reduce_min( const Vec2<T>& a ) { return min(a.x, a.y); }
template<typename T> __forceinline T reduce_max( const Vec2<T>& a ) { return max(a.x, a.y); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const Vec2<T>& a, const Vec2<T>& b ) { return a.x == b.x && a.y == b.y; }
template<typename T> __forceinline bool operator !=( const Vec2<T>& a, const Vec2<T>& b ) { return a.x != b.x || a.y != b.y; }
template<typename T> __forceinline bool operator < ( const Vec2<T>& a, const Vec2<T>& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Shift Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> shift_right_1( const Vec2<T>& a ) {
return Vec2<T>(shift_right_1(a.x),shift_right_1(a.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T dot ( const Vec2<T>& a, const Vec2<T>& b ) { return madd(a.x,b.x,a.y*b.y); }
template<typename T> __forceinline Vec2<T> cross ( const Vec2<T>& a ) { return Vec2<T>(-a.y,a.x); }
template<typename T> __forceinline T length ( const Vec2<T>& a ) { return sqrt(dot(a,a)); }
template<typename T> __forceinline Vec2<T> normalize( const Vec2<T>& a ) { return a*rsqrt(dot(a,a)); }
template<typename T> __forceinline T distance ( const Vec2<T>& a, const Vec2<T>& b ) { return length(a-b); }
template<typename T> __forceinline T det ( const Vec2<T>& a, const Vec2<T>& b ) { return a.x*b.y - a.y*b.x; }
template<typename T> __forceinline Vec2<T> normalize_safe( const Vec2<T>& a ) {
const T d = dot(a,a); return select(d == T( zero ),a, a*rsqrt(d) );
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec2<T> select ( bool s, const Vec2<T>& t, const Vec2<T>& f ) {
return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
}
template<typename T> __forceinline Vec2<T> select ( const Vec2<bool>& s, const Vec2<T>& t, const Vec2<T>& f ) {
return Vec2<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y));
}
template<typename T> __forceinline Vec2<T> select ( const typename T::Bool& s, const Vec2<T>& t, const Vec2<T>& f ) {
return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
}
template<typename T>
__forceinline Vec2<T> lerp(const Vec2<T>& v0, const Vec2<T>& v1, const T& t) {
return madd(Vec2<T>(T(1.0f)-t),v0,t*v1);
}
template<typename T> __forceinline int maxDim ( const Vec2<T>& a )
{
const Vec2<T> b = abs(a);
if (b.x > b.y) return 0;
else return 1;
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec2<T>& a) {
return cout << "(" << a.x << ", " << a.y << ")";
}
////////////////////////////////////////////////////////////////////////////////
/// Default template instantiations
////////////////////////////////////////////////////////////////////////////////
typedef Vec2<bool > Vec2b;
typedef Vec2<int > Vec2i;
typedef Vec2<float> Vec2f;
}
#include "vec2fa.h"
#if defined(__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined(__AVX__)
#include "../simd/avx.h"
#endif
#if defined(__AVX512F__)
#include "../simd/avx512.h"
#endif
namespace embree
{
template<> __forceinline Vec2<float>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#if defined(__SSE__) || defined(__ARM_NEON)
template<> __forceinline Vec2<vfloat4>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
#if defined(__AVX__)
template<> __forceinline Vec2<vfloat8>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
}

325
thirdparty/embree/common/math/vec2fa.h vendored Normal file
View File

@@ -0,0 +1,325 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec2fa_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec2fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec2fa
{
ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 2 };
union {
__m128 m128;
struct { float x,y,az,aw; };
};
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ) {}
__forceinline Vec2fa( const __m128 a ) : m128(a) {}
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
__forceinline Vec2fa ( const Vec2fa& other ) { m128 = other.m128; }
__forceinline Vec2fa& operator =( const Vec2fa& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec2fa( const float a ) : m128(_mm_set1_ps(a)) {}
__forceinline Vec2fa( const float x, const float y) : m128(_mm_set_ps(y, y, y, x)) {}
__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec2fa load( const void* const a ) {
return Vec2fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
}
static __forceinline Vec2fa loadu( const void* const a ) {
return Vec2fa(_mm_and_ps(_mm_loadu_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
}
static __forceinline void storeu ( void* ptr, const Vec2fa& v ) {
_mm_storeu_ps((float*)ptr,v);
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec2fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Vec2fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Vec2fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
__forceinline Vec2fa operator -( const Vec2fa& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
}
__forceinline Vec2fa abs ( const Vec2fa& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
}
__forceinline Vec2fa sign ( const Vec2fa& a ) {
return blendv_ps(Vec2fa(one), -Vec2fa(one), _mm_cmplt_ps (a,Vec2fa(zero)));
}
__forceinline Vec2fa rcp ( const Vec2fa& a )
{
#if defined(__aarch64__)
__m128 reciprocal = _mm_rcp_ps(a.m128);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
return (const Vec2fa)reciprocal;
#else
#if defined(__AVX512VL__)
const Vec2fa r = _mm_rcp14_ps(a.m128);
#else
const Vec2fa r = _mm_rcp_ps(a.m128);
#endif
#if defined(__AVX2__)
const Vec2fa h_n = _mm_fnmadd_ps(a, r, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
const Vec2fa res = _mm_fmadd_ps(r, h_n, r); // Then compute r + r * h_n
#else
const Vec2fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r)); // First, compute 1 - a * r (which will be very close to 0)
const Vec2fa res = _mm_add_ps(r,_mm_mul_ps(r, h_n)); // Then compute r + r * h_n
#endif
return res;
#endif //defined(__aarch64__)
}
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); }
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps(a,a); }
__forceinline Vec2fa rsqrt( const Vec2fa& a )
{
#if defined(__aarch64__)
__m128 r = _mm_rsqrt_ps(a.m128);
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
return r;
#else
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
#endif
}
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
return blendv_ps(a, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
}
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec2fa log ( const Vec2fa& a ) {
return Vec2fa(logf(a.x),logf(a.y));
}
__forceinline Vec2fa exp ( const Vec2fa& a ) {
return Vec2fa(expf(a.x),expf(a.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps(a.m128,b.m128); }
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
return Vec2fa(powf(a.x,b),powf(a.y,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__AVX2__)
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps(a,b,c); }
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps(a,b,c); }
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps(a,b,c); }
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps(a,b,c); }
#else
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b+c; }
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b-c; }
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b+c;}
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b-c; }
#endif
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
__forceinline float reduce_min(const Vec2fa& v) { return min(v.x,v.y); }
__forceinline float reduce_max(const Vec2fa& v) { return max(v.x,v.y); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 3) == 3; }
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 3) != 0; }
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE4_1__)
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
return _mm_cvtss_f32(_mm_dp_ps(a,b,0x3F));
}
#else
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
return reduce_add(a*b);
}
#endif
__forceinline Vec2fa cross ( const Vec2fa& a ) {
return Vec2fa(-a.y,a.x);
}
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f, t, mask);
}
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec2fa& a )
{
const Vec2fa b = abs(a);
if (b.x > b.y) return 0;
else return 1;
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
//__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); }
__forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); }
__forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); }
#elif defined (__SSE4_1__)
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
__forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); }
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); }
#else
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(floorf(a.x),floorf(a.y)); }
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(ceilf (a.x),ceilf (a.y)); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
return cout << "(" << a.x << ", " << a.y << ")";
}
typedef Vec2fa Vec2fa_t;
}
#endif

View File

@@ -0,0 +1,270 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
struct Vec3fa;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec2fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec2fa
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 2 };
struct { float x,y; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ) {}
//__forceinline Vec2fa( const __m128 a ) : m128(a) {}
explicit Vec2fa(const Vec3fa& a);
__forceinline explicit Vec2fa( const vfloat<4>& a ) {
x = a[0];
y = a[1];
}
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
__forceinline Vec2fa ( const Vec2fa& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2fa& other ) { x = other.x; y = other.y; return *this; }
__forceinline explicit Vec2fa( const float a ) : x(a), y(a) {}
__forceinline Vec2fa( const float x, const float y) : x(x), y(y) {}
//__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec2fa load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec2fa(ptr[0],ptr[1]);
}
static __forceinline Vec2fa loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec2fa(ptr[0],ptr[1]);
}
static __forceinline void storeu ( void* a, const Vec2fa& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ZeroTy ) : x(0.0f), y(0.0f) {}
__forceinline Vec2fa( OneTy ) : x(1.0f), y(1.0f) {}
__forceinline Vec2fa( PosInfTy ) : x(+INFINITY), y(+INFINITY) {}
__forceinline Vec2fa( NegInfTy ) : x(-INFINITY), y(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
//__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
//__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
__forceinline Vec2fa operator -( const Vec2fa& a ) { return Vec2fa(-a.x,-a.y); }
__forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
__forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
//__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); }
__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); }
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }
__forceinline Vec2fa rsqrt( const Vec2fa& a ) { return Vec2fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y)); }
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
return Vec2fa(x,y);
}
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec2fa log ( const Vec2fa& a ) {
return Vec2fa(sycl::log(a.x),sycl::log(a.y));
}
__forceinline Vec2fa exp ( const Vec2fa& a ) {
return Vec2fa(sycl::exp(a.x),sycl::exp(a.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x+b.x, a.y+b.y); }
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x-b.x, a.y-b.y); }
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x*b.x, a.y*b.y); }
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x/b.x, a.y/b.y); }
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return Vec2fa(a.x/b, a.y/b); }
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return Vec2fa(a/b.x, a/b.y); }
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) {
return Vec2fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y));
}
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) {
return Vec2fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
return Vec2fa(powf(a.x,b),powf(a.y,b));
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y)); }
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y)); }
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y)); }
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y)); }
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
__forceinline float reduce_min(const Vec2fa& v) { return sycl::fmin(v.x,v.y); }
__forceinline float reduce_max(const Vec2fa& v) { return sycl::fmax(v.x,v.y); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return a.x == b.x && a.y == b.y; }
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return a.x != b.x || a.y != b.y; }
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
return reduce_add(a*b);
}
__forceinline Vec2fa cross ( const Vec2fa& a ) {
return Vec2fa(-a.y,a.x);
}
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
return Vec2fa(s ? t.x : f.x, s ? t.y : f.y);
}
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec2fa& a )
{
const Vec2fa b = abs(a);
if (b.x > b.y) return 0;
else return 1;
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(sycl::trunc(a.x),sycl::trunc(a.y)); }
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(sycl::floor(a.x),sycl::floor(a.y)); }
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(sycl::ceil (a.x),sycl::ceil (a.y)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
return cout << "(" << a.x << ", " << a.y << ")";
}
/*template<>
__forceinline vfloat_impl<4>::vfloat_impl(const Vec2fa& a)
{
v = 0;
const unsigned int lid = get_sub_group_local_id();
if (lid == 0) v = a.x;
if (lid == 1) v = a.y;
}*/
typedef Vec2fa Vec2fa_t;
}

357
thirdparty/embree/common/math/vec3.h vendored Normal file
View File

@@ -0,0 +1,357 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
namespace embree
{
struct Vec3fa;
////////////////////////////////////////////////////////////////////////////////
/// Generic 3D vector Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Vec3
{
enum { N = 3 };
union {
struct {
T x, y, z;
};
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
T components[N];
#endif
};
typedef T Scalar;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3( ) {}
__forceinline explicit Vec3( const T& a ) : x(a), y(a), z(a) {}
__forceinline Vec3( const T& x, const T& y, const T& z ) : x(x), y(y), z(z) {}
__forceinline Vec3( const Vec3& other ) { x = other.x; y = other.y; z = other.z; }
__forceinline Vec3( const Vec3fa& other );
template<typename T1> __forceinline Vec3( const Vec3<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)) {}
template<typename T1> __forceinline Vec3& operator =(const Vec3<T1>& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline Vec3& operator =(const Vec3& other) { x = other.x; y = other.y; z = other.z; return *this; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3( ZeroTy ) : x(zero), y(zero), z(zero) {}
__forceinline Vec3( OneTy ) : x(one), y(one), z(one) {}
__forceinline Vec3( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf) {}
__forceinline Vec3( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf) {}
#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
__forceinline const T& operator []( const size_t axis ) const { assert(axis < 3); return (&x)[axis]; }
__forceinline T& operator []( const size_t axis ) { assert(axis < 3); return (&x)[axis]; }
#else
__forceinline const T& operator [](const size_t axis) const { assert(axis < 3); return components[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 3); return components[axis]; }
#endif
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a ) { return Vec3<T>(+a.x, +a.y, +a.z); }
template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a ) { return Vec3<T>(-a.x, -a.y, -a.z); }
template<typename T> __forceinline Vec3<T> abs ( const Vec3<T>& a ) { return Vec3<T>(abs (a.x), abs (a.y), abs (a.z)); }
template<typename T> __forceinline Vec3<T> rcp ( const Vec3<T>& a ) { return Vec3<T>(rcp (a.x), rcp (a.y), rcp (a.z)); }
template<typename T> __forceinline Vec3<T> rsqrt ( const Vec3<T>& a ) { return Vec3<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z)); }
template<typename T> __forceinline Vec3<T> sqrt ( const Vec3<T>& a ) { return Vec3<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z)); }
template<typename T> __forceinline Vec3<T> zero_fix( const Vec3<T>& a )
{
return Vec3<T>(select(abs(a.x)<min_rcp_input,T(min_rcp_input),a.x),
select(abs(a.y)<min_rcp_input,T(min_rcp_input),a.y),
select(abs(a.z)<min_rcp_input,T(min_rcp_input),a.z));
}
template<typename T> __forceinline Vec3<T> rcp_safe(const Vec3<T>& a) { return rcp(zero_fix(a)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x + b.x, a.y + b.y, a.z + b.z); }
template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x - b.x, a.y - b.y, a.z - b.z); }
template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x * b.x, a.y * b.y, a.z * b.z); }
template<typename T> __forceinline Vec3<T> operator *( const T& a, const Vec3<T>& b ) { return Vec3<T>(a * b.x, a * b.y, a * b.z); }
template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x * b , a.y * b , a.z * b ); }
template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x / b , a.y / b , a.z / b ); }
template<typename T> __forceinline Vec3<T> operator /( const T& a, const Vec3<T>& b ) { return Vec3<T>(a / b.x, a / b.y, a / b.z); }
template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x / b.x, a.y / b.y, a.z / b.z); }
template<typename T> __forceinline Vec3<T> min(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); }
template<typename T> __forceinline Vec3<T> max(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); }
template<typename T> __forceinline Vec3<T> operator >>( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x >> b, a.y >> b, a.z >> b); }
template<typename T> __forceinline Vec3<T> operator <<( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x << b, a.y << b, a.z << b); }
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> madd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> msub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> nmadd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z));}
template<typename T> __forceinline Vec3<T> nmsub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> madd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> msub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z)); }
template<typename T> __forceinline Vec3<T> nmadd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z));}
template<typename T> __forceinline Vec3<T> nmsub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const T b ) { a.x += b; a.y += b; a.z += b; return a; }
template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const Vec3<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; return a; }
template<typename T> __forceinline Vec3<T>& operator -=( Vec3<T>& a, const Vec3<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; return a; }
template<typename T> __forceinline Vec3<T>& operator *=( Vec3<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; return a; }
template<typename T> __forceinline Vec3<T>& operator /=( Vec3<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; return a; }
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T reduce_add( const Vec3<T>& a ) { return a.x + a.y + a.z; }
template<typename T> __forceinline T reduce_mul( const Vec3<T>& a ) { return a.x * a.y * a.z; }
template<typename T> __forceinline T reduce_min( const Vec3<T>& a ) { return min(a.x, a.y, a.z); }
template<typename T> __forceinline T reduce_max( const Vec3<T>& a ) { return max(a.x, a.y, a.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const Vec3<T>& a, const Vec3<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
template<typename T> __forceinline bool operator !=( const Vec3<T>& a, const Vec3<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
template<typename T> __forceinline bool operator < ( const Vec3<T>& a, const Vec3<T>& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Shift Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> shift_right_1( const Vec3<T>& a ) {
return Vec3<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<T> select ( bool s, const Vec3<T>& t, const Vec3<T>& f ) {
return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
}
template<typename T> __forceinline Vec3<T> select ( const Vec3<bool>& s, const Vec3<T>& t, const Vec3<T>& f ) {
return Vec3<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z));
}
template<typename T> __forceinline Vec3<T> select ( const typename T::Bool& s, const Vec3<T>& t, const Vec3<T>& f ) {
return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
}
template<typename T>
__forceinline Vec3<T> lerp(const Vec3<T>& v0, const Vec3<T>& v1, const T& t) {
return madd(Vec3<T>(T(1.0f)-t),v0,t*v1);
}
template<typename T> __forceinline int maxDim ( const Vec3<T>& a )
{
const Vec3<T> b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec3<bool> eq_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x==b.x,a.y==b.y,a.z==b.z); }
template<typename T> __forceinline Vec3<bool> neq_mask(const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x!=b.x,a.y!=b.y,a.z!=b.z); }
template<typename T> __forceinline Vec3<bool> lt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x< b.x,a.y< b.y,a.z< b.z); }
template<typename T> __forceinline Vec3<bool> le_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x<=b.x,a.y<=b.y,a.z<=b.z); }
template<typename T> __forceinline Vec3<bool> gt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x> b.x,a.y> b.y,a.z> b.z); }
template<typename T> __forceinline Vec3<bool> ge_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x>=b.x,a.y>=b.y,a.z>=b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T sqr ( const Vec3<T>& a ) { return dot(a,a); }
template<typename T> __forceinline T dot ( const Vec3<T>& a, const Vec3<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,a.z*b.z)); }
template<typename T> __forceinline T length ( const Vec3<T>& a ) { return sqrt(sqr(a)); }
template<typename T> __forceinline T rcp_length( const Vec3<T>& a ) { return rsqrt(sqr(a)); }
template<typename T> __forceinline Vec3<T> normalize( const Vec3<T>& a ) { return a*rsqrt(sqr(a)); }
template<typename T> __forceinline T distance ( const Vec3<T>& a, const Vec3<T>& b ) { return length(a-b); }
template<typename T> __forceinline Vec3<T> cross ( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); }
template<typename T> __forceinline Vec3<T> stable_triangle_normal( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c )
{
const T ab_x = a.z*b.y, ab_y = a.x*b.z, ab_z = a.y*b.x;
const T bc_x = b.z*c.y, bc_y = b.x*c.z, bc_z = b.y*c.x;
const Vec3<T> cross_ab(msub(a.y,b.z,ab_x), msub(a.z,b.x,ab_y), msub(a.x,b.y,ab_z));
const Vec3<T> cross_bc(msub(b.y,c.z,bc_x), msub(b.z,c.x,bc_y), msub(b.x,c.y,bc_z));
const auto sx = abs(ab_x) < abs(bc_x);
const auto sy = abs(ab_y) < abs(bc_y);
const auto sz = abs(ab_z) < abs(bc_z);
return Vec3<T>(select(sx,cross_ab.x,cross_bc.x),
select(sy,cross_ab.y,cross_bc.y),
select(sz,cross_ab.z,cross_bc.z));
}
template<typename T> __forceinline T sum ( const Vec3<T>& a ) { return a.x+a.y+a.z; }
template<typename T> __forceinline T halfArea ( const Vec3<T>& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
template<typename T> __forceinline T area ( const Vec3<T>& d ) { return 2.0f*halfArea(d); }
template<typename T> __forceinline Vec3<T> normalize_safe( const Vec3<T>& a ) {
const T d = dot(a,a); return select(d == T( zero ), a , a*rsqrt(d) );
}
template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& P, const Vec3<T>& Q0, const Vec3<T>& Q1)
{
const Vec3<T> N = cross(P-Q0,Q1-Q0);
const Vec3<T> D = Q1-Q0;
return dot(N,N)*rcp(dot(D,D));
}
template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& PmQ0, const Vec3<T>& Q1mQ0)
{
const Vec3<T> N = cross(PmQ0,Q1mQ0);
const Vec3<T> D = Q1mQ0;
return dot(N,N)*rcp(dot(D,D));
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3<T>& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
typedef Vec3<bool > Vec3b;
typedef Vec3<int > Vec3i;
typedef Vec3<float> Vec3f;
}
#include "vec3ba.h"
#include "vec3ia.h"
#include "vec3fa.h"
////////////////////////////////////////////////////////////////////////////////
/// SSE / AVX / MIC specializations
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined(__AVX__)
#include "../simd/avx.h"
#endif
#if defined(__AVX512F__)
#include "../simd/avx512.h"
#endif
namespace embree
{
template<typename Out, typename In>
__forceinline Vec3<Out> broadcast(const Vec3<In>& a, const size_t k) {
return Vec3<Out>(Out(a.x[k]), Out(a.y[k]), Out(a.z[k]));
}
template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; }
#if !defined(__SYCL_DEVICE_ONLY__)
#if defined(__AVX__)
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
template<>
__forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
}
#endif
#if defined(__SSE__) || defined(__ARM_NEON)
template<>
__forceinline Vec3<vfloat4> broadcast<vfloat4,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]));
}
template<int i0, int i1, int i2, int i3>
__forceinline Vec3<vfloat4> shuffle(const Vec3<vfloat4>& b) {
return Vec3<vfloat4>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
}
#endif
#if defined(__AVX__)
template<>
__forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
template<>
__forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
}
template<>
__forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat8>(const Vec3<vfloat8>& a, const size_t k) {
return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
}
template<int i0, int i1, int i2, int i3>
__forceinline Vec3<vfloat8> shuffle(const Vec3<vfloat8>& b) {
return Vec3<vfloat8>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {}
#endif
#else
#if defined(__SSE__)
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#endif
}

127
thirdparty/embree/common/math/vec3ba.h vendored Normal file
View File

@@ -0,0 +1,127 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3ba_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ba Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ba
{
ALIGNED_STRUCT_(16);
union {
__m128 m128;
struct { int x,y,z; };
};
typedef int Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( ) {}
__forceinline Vec3ba( const __m128 input ) : m128(input) {}
__forceinline Vec3ba( const Vec3ba& other ) : m128(other.m128) {}
__forceinline Vec3ba& operator =(const Vec3ba& other) { m128 = other.m128; return *this; }
__forceinline explicit Vec3ba( bool a )
: m128(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
__forceinline Vec3ba( bool a, bool b, bool c)
: m128(mm_lookupmask_ps[(size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
__forceinline operator const __m128&() const { return m128; }
__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( FalseTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec3ba( TrueTy ) : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator !( const Vec3ba& a ) { return _mm_xor_ps(a.m128, Vec3ba(embree::True)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return _mm_and_ps(a.m128, b.m128); }
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return _mm_or_ps (a.m128, b.m128); }
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return _mm_xor_ps(a.m128, b.m128); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators + Select
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) == 7;
}
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) != 7;
}
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline bool reduce_and( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) == 0x7; }
__forceinline bool reduce_or ( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) != 0x0; }
__forceinline bool all ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x7; }
__forceinline bool any ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) != 0x0; }
__forceinline bool none ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x0; }
__forceinline size_t movemask(const Vec3ba& a) { return _mm_movemask_ps(a) & 0x7; }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")";
}
}
#endif

View File

@@ -0,0 +1,115 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ba Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ba
{
//ALIGNED_STRUCT_(16);
struct { bool x,y,z; };
typedef bool Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( ) {}
//__forceinline Vec3ba( const __m128 input ) : m128(input) {}
__forceinline Vec3ba( const Vec3ba& other ) : x(other.x), y(other.y), z(other.z) {}
__forceinline Vec3ba& operator =(const Vec3ba& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3ba( bool a ) : x(a), y(a), z(a) {}
__forceinline Vec3ba( bool a, bool b, bool c) : x(a), y(b), z(c) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( FalseTy ) : x(false), y(false), z(false) {}
__forceinline Vec3ba( TrueTy ) : x(true), y(true), z(true) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
//__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
//__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator !( const Vec3ba& a ) { return Vec3ba(!a.x,!a.y,!a.z); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x & b.x, a.y & b.y, a.z & b.z); }
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x | b.x, a.y | b.y, a.z | b.z); }
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators + Select
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
return a.x == b.x && a.y == b.y && a.z == b.z;
}
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
return a.x != b.x || a.y != b.y || a.z != b.z;
}
/*
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline bool reduce_and( const Vec3ba& a ) { return a.x & a.y & a.z; }
__forceinline bool reduce_or ( const Vec3ba& a ) { return a.x | a.y | a.z; }
__forceinline bool all ( const Vec3ba& b ) { return reduce_and(b); }
__forceinline bool any ( const Vec3ba& b ) { return reduce_or(b); }
__forceinline bool none ( const Vec3ba& b ) { return !reduce_or(b); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
return cout;
}
}

791
thirdparty/embree/common/math/vec3fa.h vendored Normal file
View File

@@ -0,0 +1,791 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3fa_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fa
{
ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
union {
__m128 m128;
struct { float x,y,z; };
};
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ) {}
__forceinline Vec3fa( const __m128 a ) : m128(a) {}
__forceinline Vec3fa ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
__forceinline Vec3fa ( const Vec3fa& other ) { m128 = other.m128; }
__forceinline Vec3fa& operator =( const Vec3fa& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec3fa( const float a ) : m128(_mm_set1_ps(a)) {}
__forceinline Vec3fa( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
__forceinline explicit Vec3fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
__forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
__forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
__forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fa load( const void* const a ) {
#if defined(__aarch64__)
__m128 t = _mm_load_ps((float*)a);
t[3] = 0.0f;
return Vec3fa(t);
#else
return Vec3fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
#endif
}
static __forceinline Vec3fa loadu( const void* const a ) {
return Vec3fa(_mm_loadu_ps((float*)a));
}
static __forceinline void storeu ( void* ptr, const Vec3fa& v ) {
_mm_storeu_ps((float*)ptr,v.m128);
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec3fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Vec3fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Vec3fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
__forceinline Vec3fa operator -( const Vec3fa& a ) {
#if defined(__aarch64__)
return vnegq_f32(a.m128);
#else
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
#endif
}
__forceinline Vec3fa abs ( const Vec3fa& a ) {
#if defined(__aarch64__)
return _mm_abs_ps(a.m128);
#else
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
#endif
}
__forceinline Vec3fa sign ( const Vec3fa& a ) {
return blendv_ps(Vec3fa(one).m128, (-Vec3fa(one)).m128, _mm_cmplt_ps (a.m128,Vec3fa(zero).m128));
}
__forceinline Vec3fa rcp ( const Vec3fa& a )
{
#if defined(__aarch64__)
return vdivq_f32(vdupq_n_f32(1.0f),a.m128);
#else
#if defined(__AVX512VL__)
const Vec3fa r = _mm_rcp14_ps(a.m128);
#else
const Vec3fa r = _mm_rcp_ps(a.m128);
#endif
#if defined(__AVX2__)
const Vec3fa h_n = _mm_fnmadd_ps(a.m128, r.m128, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
const Vec3fa res = _mm_fmadd_ps(r.m128, h_n.m128, r.m128); // Then compute r + r * h_n
#else
const Vec3fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a.m128, r.m128)); // First, compute 1 - a * r (which will be very close to 0)
const Vec3fa res = _mm_add_ps(r.m128,_mm_mul_ps(r.m128, h_n.m128)); // Then compute r + r * h_n
#endif
return res;
#endif //defined(__aarch64__)
}
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a.m128); }
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return _mm_mul_ps(a.m128,a.m128); }
__forceinline Vec3fa rsqrt( const Vec3fa& a )
{
#if defined(__aarch64__)
__m128 r = _mm_rsqrt_ps(a.m128);
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
return r;
#else
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
#endif
}
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
}
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fa log ( const Vec3fa& a ) {
return Vec3fa(logf(a.x),logf(a.y),logf(a.z));
}
__forceinline Vec3fa exp ( const Vec3fa& a ) {
return Vec3fa(expf(a.x),expf(a.y),expf(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return _mm_div_ps(a.m128,b.m128); }
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a.m128,b.m128); }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__AVX2__) || defined(__ARM_NEON)
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
#else
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; }
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;}
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; }
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; }
#endif
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline float reduce_add(const Vec3fa& v) {
float32x4_t t = v.m128;
t[3] = 0.0f;
return vaddvq_f32(t);
}
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fa& v) {
float32x4_t t = v.m128;
t[3] = t[2];
return vminvq_f32(t);
}
__forceinline float reduce_max(const Vec3fa& v) {
float32x4_t t = v.m128;
t[3] = t[2];
return vmaxvq_f32(t);
}
#else
__forceinline float reduce_add(const Vec3fa& v) {
const vfloat4 a(v.m128);
const vfloat4 b = shuffle<1>(a);
const vfloat4 c = shuffle<2>(a);
return _mm_cvtss_f32(a+b+c);
}
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fa& v) { return min(v.x,v.y,v.z); }
__forceinline float reduce_max(const Vec3fa& v) { return max(v.x,v.y,v.z); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a.m128, b.m128); }
#if defined(__aarch64__)
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpgt_ps (a.m128, b.m128); }
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpge_ps (a.m128, b.m128); }
#else
__forceinline Vec3ba gt_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnle_ps(a.m128, b.m128); }
__forceinline Vec3ba ge_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnlt_ps(a.m128, b.m128); }
#endif
__forceinline bool isvalid ( const Vec3fa& v ) {
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fa& a ) {
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
}
__forceinline bool isvalid4 ( const Vec3fa& v ) {
return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
}
__forceinline bool is_finite4 ( const Vec3fa& a ) {
return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE4_1__)
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
}
#else
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
return reduce_add(a*b);
}
#endif
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b )
{
vfloat4 a0 = vfloat4(a.m128);
vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
vfloat4 b1 = vfloat4(b.m128);
return Vec3fa(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
}
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f.m128, t.m128, mask);
}
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
return blendv_ps(f.m128, t.m128, s);
}
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fa& a )
{
const Vec3fa b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline Vec3fa floor(const Vec3fa& a) { return vrndmq_f32(a.m128); }
__forceinline Vec3fa ceil (const Vec3fa& a) { return vrndpq_f32(a.m128); }
__forceinline Vec3fa trunc(const Vec3fa& a) { return vrndq_f32(a.m128); }
#elif defined (__SSE4_1__)
__forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
__forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
#else
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(floorf(a.x),floorf(a.y),floorf(a.z)); }
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
typedef Vec3fa Vec3fa_t;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fx Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fx
{
ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
union {
__m128 m128;
struct { float x,y,z; union { int a; unsigned u; float w; }; };
};
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ) {}
__forceinline Vec3fx( const __m128 a ) : m128(a) {}
__forceinline explicit Vec3fx(const Vec3fa& v) : m128(v.m128) {}
__forceinline operator Vec3fa () const { return Vec3fa(m128); }
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
__forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; }
__forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {}
__forceinline Vec3fx( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
__forceinline Vec3fx( const Vec3fa& other, const int a1) { m128 = other.m128; a = a1; }
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; }
__forceinline Vec3fx( const Vec3fa& other, const float w1) {
#if defined (__aarch64__)
m128 = other.m128; m128[3] = w1;
#elif defined (__SSE4_1__)
m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4);
#else
const vint4 mask(-1,-1,-1,0);
m128 = select(vboolf4(_mm_castsi128_ps(mask)),vfloat4(other.m128),vfloat4(w1));
#endif
}
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : m128(_mm_set_ps(w, z, y, x)) {}
//__forceinline explicit Vec3fx( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
__forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
__forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
__forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
__forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fx load( const void* const a ) {
return Vec3fx(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
}
static __forceinline Vec3fx loadu( const void* const a ) {
return Vec3fx(_mm_loadu_ps((float*)a));
}
static __forceinline void storeu ( void* ptr, const Vec3fx& v ) {
_mm_storeu_ps((float*)ptr,v.m128);
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ZeroTy ) : m128(_mm_setzero_ps()) {}
__forceinline Vec3fx( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
__forceinline Vec3fx( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
__forceinline Vec3fx( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
__forceinline Vec3fx operator -( const Vec3fx& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return _mm_xor_ps(a.m128, mask);
}
__forceinline Vec3fx abs ( const Vec3fx& a ) {
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return _mm_and_ps(a.m128, mask);
}
__forceinline Vec3fx sign ( const Vec3fx& a ) {
return blendv_ps(Vec3fx(one).m128, (-Vec3fx(one)).m128, _mm_cmplt_ps (a.m128,Vec3fx(zero).m128));
}
__forceinline Vec3fx rcp ( const Vec3fx& a )
{
#if defined(__AVX512VL__)
const Vec3fx r = _mm_rcp14_ps(a.m128);
#else
const Vec3fx r = _mm_rcp_ps(a.m128);
#endif
#if defined(__AVX2__)
const Vec3fx res = _mm_mul_ps(r.m128,_mm_fnmadd_ps(r.m128, a.m128, vfloat4(2.0f)));
#else
const Vec3fx res = _mm_mul_ps(r.m128,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r.m128, a.m128)));
//return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
#endif
return res;
}
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return _mm_sqrt_ps(a.m128); }
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return _mm_mul_ps(a.m128,a.m128); }
__forceinline Vec3fx rsqrt( const Vec3fx& a )
{
#if defined(__AVX512VL__)
__m128 r = _mm_rsqrt14_ps(a.m128);
#else
__m128 r = _mm_rsqrt_ps(a.m128);
#endif
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
}
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
}
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fx log ( const Vec3fx& a ) {
return Vec3fx(logf(a.x),logf(a.y),logf(a.z));
}
__forceinline Vec3fx exp ( const Vec3fx& a ) {
return Vec3fx(expf(a.x),expf(a.y),expf(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return _mm_add_ps(a.m128, b.m128); }
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return _mm_sub_ps(a.m128, b.m128); }
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return _mm_mul_ps(a.m128, b.m128); }
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return _mm_div_ps(a.m128,b.m128); }
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a.m128,b.m128); }
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a.m128,b.m128); }
#if defined(__SSE4_1__) || defined(__aarch64__)
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__) || defined(__aarch64__)
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a.m128);
const vint4 bi = _mm_castps_si128(b.m128);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__AVX2__)
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
#else
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b+c; }
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b-c; }
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b+c;}
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b-c; }
#endif
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fx& v) {
const vfloat4 a(v.m128);
const vfloat4 b = shuffle<1>(a);
const vfloat4 c = shuffle<2>(a);
return _mm_cvtss_f32(a+b+c);
}
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fx& v) { return min(v.x,v.y,v.z); }
__forceinline float reduce_max(const Vec3fx& v) { return max(v.x,v.y,v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmple_ps (a.m128, b.m128); }
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnle_ps(a.m128, b.m128); }
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); }
__forceinline bool isvalid ( const Vec3fx& v ) {
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fx& a ) {
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
}
__forceinline bool isvalid4 ( const Vec3fx& v ) {
return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
}
__forceinline bool is_finite4 ( const Vec3fx& a ) {
return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE4_1__)
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
}
#else
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
return reduce_add(a*b);
}
#endif
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b )
{
vfloat4 a0 = vfloat4(a.m128);
vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
vfloat4 b1 = vfloat4(b.m128);
return Vec3fx(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
}
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
return blendv_ps(f.m128, t.m128, mask);
}
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
return blendv_ps(f.m128, t.m128, s);
}
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fx& a )
{
const Vec3fx b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline Vec3fx trunc(const Vec3fx& a) { return vrndq_f32(a.m128); }
__forceinline Vec3fx floor(const Vec3fx& a) { return vrndmq_f32(a.m128); }
__forceinline Vec3fx ceil (const Vec3fx& a) { return vrndpq_f32(a.m128); }
#elif defined (__SSE4_1__)
__forceinline Vec3fx trunc( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
__forceinline Vec3fx floor( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
#else
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(truncf(a.x),truncf(a.y),truncf(a.z)); }
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(floorf(a.x),floorf(a.y),floorf(a.z)); }
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
typedef Vec3fx Vec3ff;
}
#endif

View File

@@ -0,0 +1,625 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fa
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
struct { float x,y,z, do_not_use; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ) {}
//__forceinline Vec3fa( const __m128 a ) : m128(a) {}
//__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
__forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
__forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
__forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
__forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
__forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
//friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fa load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fa(ptr[0],ptr[1],ptr[2]);
}
static __forceinline Vec3fa loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fa(ptr[0],ptr[1],ptr[2]);
}
static __forceinline void storeu ( void* a, const Vec3fa& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
__forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
__forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
__forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
__forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
__forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
__forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
//__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z)); }
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
__forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
return Vec3fa(x,y,z);
}
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fa log ( const Vec3fa& a ) {
return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
}
__forceinline Vec3fa exp ( const Vec3fa& a ) {
return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
}
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
*/
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
__forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
__forceinline bool isvalid ( const Vec3fa& v ) {
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fa& a ) {
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
return reduce_add(a*b);
}
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
}
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
}
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
}
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fa& a )
{
const Vec3fa b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
__forceinline Vec2fa::Vec2fa(const Vec3fa& a)
: x(a.x), y(a.y) {}
__forceinline Vec3ia::Vec3ia( const Vec3fa& a )
: x((int)a.x), y((int)a.y), z((int)a.z) {}
typedef Vec3fa Vec3fa_t;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fx Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fx
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
struct { float x,y,z; union { int a; unsigned u; float w; }; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ) {}
//__forceinline Vec3fx( const __m128 a ) : m128(a) {}
__forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
__forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
__forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
//__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
//__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
__forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
__forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
__forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
__forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
__forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
__forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
//friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fx load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
}
static __forceinline Vec3fx loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
}
static __forceinline void storeu ( void* a, const Vec3fx& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
__forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
__forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
__forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
__forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
__forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
__forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
//__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z),sycl::native::recip(a.w)); }
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
__forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
return Vec3fx(x,y,z);
}
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fx log ( const Vec3fx& a ) {
return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
}
__forceinline Vec3fx exp ( const Vec3fx& a ) {
return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
}
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
__forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
__forceinline bool isvalid ( const Vec3fx& v ) {
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fx& a ) {
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
}
__forceinline bool isvalid4 ( const Vec3fx& v ) {
const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
return valid_x & valid_y & valid_z & valid_w;
}
__forceinline bool is_finite4 ( const Vec3fx& v ) {
const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
return finite_x & finite_y & finite_z & finite_w;
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
return reduce_add(a*b);
}
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
}
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
}
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
}
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fx& a )
{
const Vec3fx b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
}
typedef Vec3fx Vec3ff;
//__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
// : x(a.x), y(a.y) {}
//__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
// : x((int)a.x), y((int)a.y), z((int)a.z) {}
}
#if __SYCL_COMPILER_VERSION >= 20210801
namespace sycl {
template<> struct is_device_copyable<embree::Vec3fa> : std::true_type {};
template<> struct is_device_copyable<const embree::Vec3fa> : std::true_type {};
}
#endif

203
thirdparty/embree/common/math/vec3ia.h vendored Normal file
View File

@@ -0,0 +1,203 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3ia_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ia Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ia
{
ALIGNED_STRUCT_(16);
union {
__m128i m128;
struct { int x,y,z; };
};
typedef int Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ) {}
__forceinline Vec3ia( const __m128i a ) : m128(a) {}
__forceinline Vec3ia( const Vec3ia& other ) : m128(other.m128) {}
__forceinline Vec3ia& operator =(const Vec3ia& other) { m128 = other.m128; return *this; }
__forceinline explicit Vec3ia( const int a ) : m128(_mm_set1_epi32(a)) {}
__forceinline Vec3ia( const int x, const int y, const int z) : m128(_mm_set_epi32(z, z, y, x)) {}
__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
__forceinline operator const __m128i&() const { return m128; }
__forceinline operator __m128i&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ZeroTy ) : m128(_mm_setzero_si128()) {}
__forceinline Vec3ia( OneTy ) : m128(_mm_set1_epi32(1)) {}
__forceinline Vec3ia( PosInfTy ) : m128(_mm_set1_epi32(pos_inf)) {}
__forceinline Vec3ia( NegInfTy ) : m128(_mm_set1_epi32(neg_inf)) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a ) { return a; }
__forceinline Vec3ia operator -( const Vec3ia& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); }
#if (defined(__aarch64__))
__forceinline Vec3ia abs ( const Vec3ia& a ) { return vabsq_s32(a.m128); }
#elif defined(__SSSE3__)
__forceinline Vec3ia abs ( const Vec3ia& a ) { return _mm_abs_epi32(a.m128); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return _mm_add_epi32(a.m128, b.m128); }
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return _mm_sub_epi32(a.m128, b.m128); }
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return _mm_mullo_epi32(a.m128, b.m128); }
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
#endif
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return _mm_and_si128(a.m128, b.m128); }
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return _mm_or_si128(a.m128, b.m128); }
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return _mm_xor_si128(a.m128, b.m128); }
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return _mm_slli_epi32(a.m128, n); }
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return _mm_srai_epi32(a.m128, n); }
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return _mm_slli_epi32(a.m128, b); }
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return _mm_srai_epi32(a.m128, b); }
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return _mm_srli_epi32(a.m128, b); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
#endif
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
#if !defined(__ARM_NEON)
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
#if defined(__aarch64__) || defined(__SSE4_1__)
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
#else
return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f));
#endif
}
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
#if defined(__aarch64__)
__forceinline int reduce_add(const Vec3ia& v) { return vaddvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0))); }
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
__forceinline int reduce_min(const Vec3ia& v) { return vminvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x7FFFFFFF))); }
__forceinline int reduce_max(const Vec3ia& v) { return vmaxvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x80000000))); }
#else
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
__forceinline int reduce_min(const Vec3ia& v) { return min(v.x,v.y,v.z); }
__forceinline int reduce_max(const Vec3ia& v) { return max(v.x,v.y,v.z); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) == 7; }
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) != 7; }
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpeq_epi32 (a.m128, b.m128)); }
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmplt_epi32 (a.m128, b.m128)); }
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpgt_epi32 (a.m128, b.m128)); }
#if defined(__aarch64__) || defined(__SSE4_1__)
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return _mm_min_epi32(a.m128,b.m128); }
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return _mm_max_epi32(a.m128,b.m128); }
#else
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return select(lt_mask(a,b),a,b); }
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return select(gt_mask(a,b),a,b); }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
}
#endif

View File

@@ -0,0 +1,178 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ia Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ia
{
ALIGNED_STRUCT_(16);
struct { int x,y,z; };
typedef int Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ) {}
//__forceinline Vec3ia( const __m128i a ) : m128(a) {}
__forceinline Vec3ia( const Vec3ia& other ) : x(other.x), y(other.y), z(other.z) {}
__forceinline Vec3ia& operator =(const Vec3ia& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3ia( const int a ) : x(a), y(a), z(a) {}
__forceinline Vec3ia( const int x, const int y, const int z) : x(x), y(y), z(z) {}
//__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
__forceinline explicit Vec3ia(const vint4& a) : x(a[0]), y(a[1]), z(a[2]) {}
__forceinline explicit Vec3ia( const Vec3fa& a );
//__forceinline operator const __m128i&() const { return m128; }
//__forceinline operator __m128i&() { return m128; }
__forceinline operator vint4() const { return vint4(x,y,z,z); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ZeroTy ) : x(0), y(0), z(0) {}
__forceinline Vec3ia( OneTy ) : x(1), y(1), z(1) {}
__forceinline Vec3ia( PosInfTy ) : x(0x7FFFFFFF), y(0x7FFFFFFF), z(0x7FFFFFFF) {}
__forceinline Vec3ia( NegInfTy ) : x(0x80000000), y(0x80000000), z(0x80000000) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a ) { return Vec3ia(+a.x,+a.y,+a.z); }
__forceinline Vec3ia operator -( const Vec3ia& a ) { return Vec3ia(-a.x,-a.y,-a.z); }
__forceinline Vec3ia abs ( const Vec3ia& a ) { return Vec3ia(sycl::abs(a.x),sycl::abs(a.y),sycl::abs(a.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x+b.x, a.y+b.y, a.z+b.z); }
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x-b.x, a.y-b.y, a.z-b.z); }
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x*b.x, a.y*b.y, a.z*b.z); }
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x&b.x, a.y&b.y, a.z&b.z); }
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x|b.x, a.y|b.y, a.z|b.z); }
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x^b.x, a.y^b.y, a.z^b.z); }
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return Vec3ia(a.x<<n, a.y<<n, a.z<<n); }
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return Vec3ia(a.x>>n, a.y>>n, a.z>>n); }
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return Vec3ia(a.x<<b, a.y<<b, a.z<<b); }
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return Vec3ia(a.x>>b, a.y>>b, a.z>>b); }
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return Vec3ia(unsigned(a.x)>>b, unsigned(a.y)>>b, unsigned(a.z)>>b); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
__forceinline int reduce_min(const Vec3ia& v) { return sycl::min(sycl::min(v.x,v.y),v.z); }
__forceinline int reduce_max(const Vec3ia& v) { return sycl::max(sycl::max(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return a.x == b.x & a.y == b.y & a.z == b.z; }
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return a.x != b.x & a.y != b.y & a.z != b.z; }
/*
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
*/
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
const int x = m.x ? t.x : f.x;
const int y = m.y ? t.y : f.y;
const int z = m.z ? t.z : f.z;
return Vec3ia(x,y,z);
}
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::min(a.x,b.x), sycl::min(a.y,b.y), sycl::min(a.z,b.z)); }
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::max(a.x,b.x), sycl::max(a.y,b.y), sycl::max(a.z,b.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
return cout;
}
}

266
thirdparty/embree/common/math/vec4.h vendored Normal file
View File

@@ -0,0 +1,266 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "emath.h"
#include "vec3.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// Generic 4D vector Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Vec4
{
enum { N = 4 };
union {
struct { T x, y, z, w; };
#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
T components[N];
#endif
};
typedef T Scalar;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec4( ) {}
__forceinline explicit Vec4( const T& a ) : x(a), y(a), z(a), w(a) {}
__forceinline Vec4( const T& x, const T& y, const T& z, const T& w ) : x(x), y(y), z(z), w(w) {}
__forceinline Vec4( const Vec3<T>& xyz, const T& w ) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
__forceinline Vec4( const Vec4& other ) { x = other.x; y = other.y; z = other.z; w = other.w; }
__forceinline Vec4( const Vec3fx& other );
template<typename T1> __forceinline Vec4( const Vec4<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)), w(T(a.w)) {}
template<typename T1> __forceinline Vec4& operator =(const Vec4<T1>& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
__forceinline Vec4& operator =(const Vec4& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
__forceinline operator Vec3<T> () const { return Vec3<T>(x,y,z); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec4( ZeroTy ) : x(zero), y(zero), z(zero), w(zero) {}
__forceinline Vec4( OneTy ) : x(one), y(one), z(one), w(one) {}
__forceinline Vec4( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf), w(pos_inf) {}
__forceinline Vec4( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf), w(neg_inf) {}
#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
__forceinline const T& operator [](const size_t axis) const { assert(axis < 4); return (&x)[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 4); return (&x)[axis]; }
#else
__forceinline const T& operator [](const size_t axis ) const { assert(axis < 4); return components[axis]; }
__forceinline T& operator [](const size_t axis) { assert(axis < 4); return components[axis]; }
#endif
////////////////////////////////////////////////////////////////////////////////
/// Swizzles
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3<T> xyz() const { return Vec3<T>(x, y, z); }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a ) { return Vec4<T>(+a.x, +a.y, +a.z, +a.w); }
template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a ) { return Vec4<T>(-a.x, -a.y, -a.z, -a.w); }
template<typename T> __forceinline Vec4<T> abs ( const Vec4<T>& a ) { return Vec4<T>(abs (a.x), abs (a.y), abs (a.z), abs (a.w)); }
template<typename T> __forceinline Vec4<T> rcp ( const Vec4<T>& a ) { return Vec4<T>(rcp (a.x), rcp (a.y), rcp (a.z), rcp (a.w)); }
template<typename T> __forceinline Vec4<T> rsqrt ( const Vec4<T>& a ) { return Vec4<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z), rsqrt(a.w)); }
template<typename T> __forceinline Vec4<T> sqrt ( const Vec4<T>& a ) { return Vec4<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z), sqrt (a.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
template<typename T> __forceinline Vec4<T> operator *( const T& a, const Vec4<T>& b ) { return Vec4<T>(a * b.x, a * b.y, a * b.z, a * b.w); }
template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x * b , a.y * b , a.z * b , a.w * b ); }
template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); }
template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x / b , a.y / b , a.z / b , a.w / b ); }
template<typename T> __forceinline Vec4<T> operator /( const T& a, const Vec4<T>& b ) { return Vec4<T>(a / b.x, a / b.y, a / b.z, a / b.w); }
template<typename T> __forceinline Vec4<T> min(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); }
template<typename T> __forceinline Vec4<T> max(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> madd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> msub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmadd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z),nmadd(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmsub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z),nmsub(a.w,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> madd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z), madd(a,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> msub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z), msub(a,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmadd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z),nmadd(a,b.w,c.w)); }
template<typename T> __forceinline Vec4<T> nmsub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z),nmsub(a,b.w,c.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T>& operator +=( Vec4<T>& a, const Vec4<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; }
template<typename T> __forceinline Vec4<T>& operator -=( Vec4<T>& a, const Vec4<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; return a; }
template<typename T> __forceinline Vec4<T>& operator *=( Vec4<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; a.w *= b ; return a; }
template<typename T> __forceinline Vec4<T>& operator /=( Vec4<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; a.w /= b ; return a; }
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T reduce_add( const Vec4<T>& a ) { return a.x + a.y + a.z + a.w; }
template<typename T> __forceinline T reduce_mul( const Vec4<T>& a ) { return a.x * a.y * a.z * a.w; }
template<typename T> __forceinline T reduce_min( const Vec4<T>& a ) { return min(a.x, a.y, a.z, a.w); }
template<typename T> __forceinline T reduce_max( const Vec4<T>& a ) { return max(a.x, a.y, a.z, a.w); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline bool operator ==( const Vec4<T>& a, const Vec4<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; }
template<typename T> __forceinline bool operator !=( const Vec4<T>& a, const Vec4<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; }
template<typename T> __forceinline bool operator < ( const Vec4<T>& a, const Vec4<T>& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
if (a.w != b.w) return a.w < b.w;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Shift Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> shift_right_1( const Vec4<T>& a ) {
return Vec4<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z),shift_right_1(a.w));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidean Space Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline T dot ( const Vec4<T>& a, const Vec4<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,madd(a.z,b.z,a.w*b.w))); }
template<typename T> __forceinline T length ( const Vec4<T>& a ) { return sqrt(dot(a,a)); }
template<typename T> __forceinline Vec4<T> normalize( const Vec4<T>& a ) { return a*rsqrt(dot(a,a)); }
template<typename T> __forceinline T distance ( const Vec4<T>& a, const Vec4<T>& b ) { return length(a-b); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline Vec4<T> select ( bool s, const Vec4<T>& t, const Vec4<T>& f ) {
return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
}
template<typename T> __forceinline Vec4<T> select ( const Vec4<bool>& s, const Vec4<T>& t, const Vec4<T>& f ) {
return Vec4<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z),select(s.w,t.w,f.w));
}
template<typename T> __forceinline Vec4<T> select ( const typename T::Bool& s, const Vec4<T>& t, const Vec4<T>& f ) {
return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
}
template<typename T>
__forceinline Vec4<T> lerp(const Vec4<T>& v0, const Vec4<T>& v1, const T& t) {
return madd(Vec4<T>(T(1.0f)-t),v0,t*v1);
}
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec4<T>& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ", " << a.w << ")";
}
////////////////////////////////////////////////////////////////////////////////
/// Default template instantiations
////////////////////////////////////////////////////////////////////////////////
typedef Vec4<bool > Vec4b;
typedef Vec4<unsigned char> Vec4uc;
typedef Vec4<int > Vec4i;
typedef Vec4<float > Vec4f;
}
#include "vec3ba.h"
#include "vec3ia.h"
#include "vec3fa.h"
////////////////////////////////////////////////////////////////////////////////
/// SSE / AVX / MIC specializations
////////////////////////////////////////////////////////////////////////////////
#if defined(__SSE__) || defined(__ARM_NEON)
#include "../simd/sse.h"
#endif
#if defined __AVX__
#include "../simd/avx.h"
#endif
#if defined __AVX512F__
#include "../simd/avx512.h"
#endif
namespace embree
{
template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
#if !defined(__SYCL_DEVICE_ONLY__)
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat8>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
#endif
#else
#if defined(__SSE__)
template<> __forceinline Vec4<vfloat4>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat8>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec4<vfloat16>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#endif
}