// File: basisu_math.h #pragma once // TODO: Would prefer this in the basisu namespace, but to avoid collisions with the existing vec/matrix classes I'm placing this in "bu_math". namespace bu_math { // Cross-platform 1.0f/sqrtf(x) approximation. See https://en.wikipedia.org/wiki/Fast_inverse_square_root#cite_note-37. // Would prefer using SSE1 etc. but that would require implementing multiple versions and platform divergence (needing more testing). BASISU_FORCE_INLINE float inv_sqrt(float v) { union { float flt; uint32_t ui; } un; un.flt = v; un.ui = 0x5F1FFFF9UL - (un.ui >> 1); return 0.703952253f * un.flt * (2.38924456f - v * (un.flt * un.flt)); } inline float smoothstep(float edge0, float edge1, float x) { assert(edge1 != edge0); // Scale, and clamp x to 0..1 range x = basisu::saturate((x - edge0) / (edge1 - edge0)); return x * x * (3.0f - 2.0f * x); } template class vec : public basisu::rel_ops > { public: typedef T scalar_type; enum { num_elements = N }; inline vec() { } inline vec(basisu::eClear) { clear(); } inline vec(const vec& other) { for (uint32_t i = 0; i < N; i++) m_s[i] = other.m_s[i]; } template inline vec(const vec& other) { set(other); } template inline vec(const vec& other, T w) { *this = other; m_s[N - 1] = w; } template inline explicit vec(Args... args) { static_assert(sizeof...(args) <= N); set(args...); } inline void clear() { if (N > 4) memset(m_s, 0, sizeof(m_s)); else { for (uint32_t i = 0; i < N; i++) m_s[i] = 0; } } template inline vec& set(const vec& other) { if ((void*)this == (void*)&other) return *this; const uint32_t m = basisu::minimum(N, ON); uint32_t i; for (i = 0; i < m; i++) m_s[i] = static_cast(other[i]); for (; i < N; i++) m_s[i] = 0; return *this; } inline vec& set_component(uint32_t index, T val) { assert(index < N); m_s[index] = val; return *this; } inline vec& set_all(T val) { for (uint32_t i = 0; i < N; i++) m_s[i] = val; return *this; } template inline vec& set(Args... args) { static_assert(sizeof...(args) <= N); // Initialize using parameter pack expansion T values[] = { static_cast(args)... }; // Special case if setting with a scalar if (sizeof...(args) == 1) { set_all(values[0]); } else { // Copy the values into the vector for (std::size_t i = 0; i < sizeof...(args); ++i) { m_s[i] = values[i]; } // Zero-initialize the remaining elements (if any) if (sizeof...(args) < N) { std::fill(m_s + sizeof...(args), m_s + N, T{}); } } return *this; } inline vec& set(const T* pValues) { for (uint32_t i = 0; i < N; i++) m_s[i] = pValues[i]; return *this; } template inline vec& swizzle_set(const vec& other, uint32_t i) { return set(static_cast(other[i])); } template inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j) { return set(static_cast(other[i]), static_cast(other[j])); } template inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k) { return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); } template inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k, uint32_t l) { return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); } inline vec& operator=(const vec& rhs) { if (this != &rhs) { for (uint32_t i = 0; i < N; i++) m_s[i] = rhs.m_s[i]; } return *this; } template inline vec& operator=(const vec& other) { if ((void*)this == (void*)&other) return *this; uint32_t s = basisu::minimum(N, O); uint32_t i; for (i = 0; i < s; i++) m_s[i] = static_cast(other[i]); for (; i < N; i++) m_s[i] = 0; return *this; } inline bool operator==(const vec& rhs) const { for (uint32_t i = 0; i < N; i++) if (!(m_s[i] == rhs.m_s[i])) return false; return true; } inline bool operator<(const vec& rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_s[i] < rhs.m_s[i]) return true; else if (!(m_s[i] == rhs.m_s[i])) return false; } return false; } inline T operator[](uint32_t i) const { assert(i < N); return m_s[i]; } inline T& operator[](uint32_t i) { assert(i < N); return m_s[i]; } template inline uint64_t get_component_bits_as_uint() const { static_assert(index < N); static_assert((sizeof(T) == sizeof(uint16_t)) || (sizeof(T) == sizeof(uint32_t)) || (sizeof(T) == sizeof(uint64_t)), "Unsupported type"); if (sizeof(T) == sizeof(uint16_t)) return *reinterpret_cast(&m_s[index]); else if (sizeof(T) == sizeof(uint32_t)) return *reinterpret_cast(&m_s[index]); else if (sizeof(T) == sizeof(uint64_t)) return *reinterpret_cast(&m_s[index]); else { assert(0); return 0; } } inline T get_x(void) const { return m_s[0]; } inline T get_y(void) const { static_assert(N >= 2); return m_s[1]; } inline T get_z(void) const { static_assert(N >= 3); return m_s[2]; } inline T get_w(void) const { static_assert(N >= 4); return m_s[3]; } inline vec get_x_vector() const { return broadcast<0>(); } inline vec get_y_vector() const { return broadcast<1>(); } inline vec get_z_vector() const { return broadcast<2>(); } inline vec get_w_vector() const { return broadcast<3>(); } inline T get_component(uint32_t i) const { return (*this)[i]; } inline vec& set_x(T v) { m_s[0] = v; return *this; } inline vec& set_y(T v) { static_assert(N >= 2); m_s[1] = v; return *this; } inline vec& set_z(T v) { static_assert(N >= 3); m_s[2] = v; return *this; } inline vec& set_w(T v) { static_assert(N >= 4); m_s[3] = v; return *this; } inline const T* get_ptr() const { return reinterpret_cast(&m_s[0]); } inline T* get_ptr() { return reinterpret_cast(&m_s[0]); } inline vec as_point() const { vec result(*this); result[N - 1] = 1; return result; } inline vec as_dir() const { vec result(*this); result[N - 1] = 0; return result; } inline vec<2, T> select2(uint32_t i, uint32_t j) const { assert((i < N) && (j < N)); return vec<2, T>(m_s[i], m_s[j]); } inline vec<3, T> select3(uint32_t i, uint32_t j, uint32_t k) const { assert((i < N) && (j < N) && (k < N)); return vec<3, T>(m_s[i], m_s[j], m_s[k]); } inline vec<4, T> select4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const { assert((i < N) && (j < N) && (k < N) && (l < N)); return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); } inline bool is_dir() const { return m_s[N - 1] == 0; } inline bool is_vector() const { return is_dir(); } inline bool is_point() const { return m_s[N - 1] == 1; } inline vec project() const { vec result(*this); if (result[N - 1]) result /= result[N - 1]; return result; } inline vec broadcast(unsigned i) const { return vec((*this)[i]); } template inline vec broadcast() const { return vec((*this)[i]); } inline vec swizzle(uint32_t i, uint32_t j) const { return vec((*this)[i], (*this)[j]); } inline vec swizzle(uint32_t i, uint32_t j, uint32_t k) const { return vec((*this)[i], (*this)[j], (*this)[k]); } inline vec swizzle(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const { return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); } inline vec operator-() const { vec result; for (uint32_t i = 0; i < N; i++) result.m_s[i] = -m_s[i]; return result; } inline vec operator+() const { return *this; } inline vec& operator+=(const vec& other) { for (uint32_t i = 0; i < N; i++) m_s[i] += other.m_s[i]; return *this; } inline vec& operator-=(const vec& other) { for (uint32_t i = 0; i < N; i++) m_s[i] -= other.m_s[i]; return *this; } inline vec& operator*=(const vec& other) { for (uint32_t i = 0; i < N; i++) m_s[i] *= other.m_s[i]; return *this; } inline vec& operator/=(const vec& other) { for (uint32_t i = 0; i < N; i++) m_s[i] /= other.m_s[i]; return *this; } inline vec& operator*=(T s) { for (uint32_t i = 0; i < N; i++) m_s[i] *= s; return *this; } inline vec& operator/=(T s) { for (uint32_t i = 0; i < N; i++) m_s[i] /= s; return *this; } friend inline vec operator*(const vec& lhs, T val) { vec result; for (uint32_t i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] * val; return result; } friend inline vec operator*(T val, const vec& rhs) { vec result; for (uint32_t i = 0; i < N; i++) result.m_s[i] = val * rhs.m_s[i]; return result; } friend inline vec operator/(const vec& lhs, const vec& rhs) { vec result; for (uint32_t i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; return result; } friend inline vec operator/(const vec& lhs, T val) { vec result; for (uint32_t i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] / val; return result; } friend inline vec operator+(const vec& lhs, const vec& rhs) { vec result; for (uint32_t i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; return result; } friend inline vec operator-(const vec& lhs, const vec& rhs) { vec result; for (uint32_t i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; return result; } static inline vec<3, T> cross2(const vec& a, const vec& b) { static_assert(N >= 2); return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); } inline vec<3, T> cross2(const vec& b) const { return cross2(*this, b); } static inline vec<3, T> cross3(const vec& a, const vec& b) { static_assert(N >= 3); return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); } inline vec<3, T> cross3(const vec& b) const { return cross3(*this, b); } static inline vec<3, T> cross(const vec& a, const vec& b) { static_assert(N >= 2); if (N == 2) return cross2(a, b); else return cross3(a, b); } inline vec<3, T> cross(const vec& b) const { static_assert(N >= 2); return cross(*this, b); } inline T dot(const vec& rhs) const { return dot(*this, rhs); } inline vec dot_vector(const vec& rhs) const { return vec(dot(*this, rhs)); } static inline T dot(const vec& lhs, const vec& rhs) { T result = lhs.m_s[0] * rhs.m_s[0]; for (uint32_t i = 1; i < N; i++) result += lhs.m_s[i] * rhs.m_s[i]; return result; } inline T dot2(const vec& rhs) const { static_assert(N >= 2); return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; } inline T dot3(const vec& rhs) const { static_assert(N >= 3); return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; } inline T dot4(const vec& rhs) const { static_assert(N >= 4); return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2] + m_s[3] * rhs.m_s[3]; } inline T norm(void) const { T sum = m_s[0] * m_s[0]; for (uint32_t i = 1; i < N; i++) sum += m_s[i] * m_s[i]; return sum; } inline T length(void) const { return sqrt(norm()); } inline T squared_distance(const vec& rhs) const { T dist2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_s[i] - rhs.m_s[i]; dist2 += d * d; } return dist2; } inline T squared_distance(const vec& rhs, T early_out) const { T dist2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_s[i] - rhs.m_s[i]; dist2 += d * d; if (dist2 > early_out) break; } return dist2; } inline T distance(const vec& rhs) const { T dist2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_s[i] - rhs.m_s[i]; dist2 += d * d; } return sqrt(dist2); } inline vec inverse() const { vec result; for (uint32_t i = 0; i < N; i++) result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; return result; } // returns squared length (norm) inline double normalize(const vec* pDefaultVec = NULL) { double n = m_s[0] * m_s[0]; for (uint32_t i = 1; i < N; i++) n += m_s[i] * m_s[i]; if (n != 0) *this *= static_cast(1.0f / sqrt(n)); else if (pDefaultVec) *this = *pDefaultVec; return n; } inline double normalize3(const vec* pDefaultVec = NULL) { static_assert(N >= 3); double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; if (n != 0) *this *= static_cast((1.0f / sqrt(n))); else if (pDefaultVec) *this = *pDefaultVec; return n; } inline vec& normalize_in_place(const vec* pDefaultVec = NULL) { normalize(pDefaultVec); return *this; } inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) { normalize3(pDefaultVec); return *this; } inline vec get_normalized(const vec* pDefaultVec = NULL) const { vec result(*this); result.normalize(pDefaultVec); return result; } inline vec get_normalized3(const vec* pDefaultVec = NULL) const { vec result(*this); result.normalize3(pDefaultVec); return result; } inline vec& clamp(T l, T h) { for (uint32_t i = 0; i < N; i++) m_s[i] = static_cast(basisu::clamp(m_s[i], l, h)); return *this; } inline vec& saturate() { return clamp(0.0f, 1.0f); } inline vec& clamp(const vec& l, const vec& h) { for (uint32_t i = 0; i < N; i++) m_s[i] = static_cast(basisu::clamp(m_s[i], l[i], h[i])); return *this; } inline bool is_within_bounds(const vec& l, const vec& h) const { for (uint32_t i = 0; i < N; i++) if ((m_s[i] < l[i]) || (m_s[i] > h[i])) return false; return true; } inline bool is_within_bounds(T l, T h) const { for (uint32_t i = 0; i < N; i++) if ((m_s[i] < l) || (m_s[i] > h)) return false; return true; } inline uint32_t get_major_axis(void) const { T m = fabs(m_s[0]); uint32_t r = 0; for (uint32_t i = 1; i < N; i++) { const T c = fabs(m_s[i]); if (c > m) { m = c; r = i; } } return r; } inline uint32_t get_minor_axis(void) const { T m = fabs(m_s[0]); uint32_t r = 0; for (uint32_t i = 1; i < N; i++) { const T c = fabs(m_s[i]); if (c < m) { m = c; r = i; } } return r; } inline void get_projection_axes(uint32_t& u, uint32_t& v) const { const int axis = get_major_axis(); if (m_s[axis] < 0.0f) { v = basisu::next_wrap(axis, N); u = basisu::next_wrap(v, N); } else { u = basisu::next_wrap(axis, N); v = basisu::next_wrap(u, N); } } inline T get_absolute_minimum(void) const { T result = fabs(m_s[0]); for (uint32_t i = 1; i < N; i++) result = basisu::minimum(result, fabs(m_s[i])); return result; } inline T get_absolute_maximum(void) const { T result = fabs(m_s[0]); for (uint32_t i = 1; i < N; i++) result = basisu::maximum(result, fabs(m_s[i])); return result; } inline T get_minimum(void) const { T result = m_s[0]; for (uint32_t i = 1; i < N; i++) result = basisu::minimum(result, m_s[i]); return result; } inline T get_maximum(void) const { T result = m_s[0]; for (uint32_t i = 1; i < N; i++) result = basisu::maximum(result, m_s[i]); return result; } inline vec& remove_unit_direction(const vec& dir) { *this -= (dot(dir) * dir); return *this; } inline vec get_remove_unit_direction(const vec& dir) const { return *this - (dot(dir) * dir); } inline bool all_less(const vec& b) const { for (uint32_t i = 0; i < N; i++) if (m_s[i] >= b.m_s[i]) return false; return true; } inline bool all_less_equal(const vec& b) const { for (uint32_t i = 0; i < N; i++) if (m_s[i] > b.m_s[i]) return false; return true; } inline bool all_greater(const vec& b) const { for (uint32_t i = 0; i < N; i++) if (m_s[i] <= b.m_s[i]) return false; return true; } inline bool all_greater_equal(const vec& b) const { for (uint32_t i = 0; i < N; i++) if (m_s[i] < b.m_s[i]) return false; return true; } inline vec negate_xyz() const { vec ret; ret[0] = -m_s[0]; if (N >= 2) ret[1] = -m_s[1]; if (N >= 3) ret[2] = -m_s[2]; for (uint32_t i = 3; i < N; i++) ret[i] = m_s[i]; return ret; } inline vec& invert() { for (uint32_t i = 0; i < N; i++) if (m_s[i] != 0.0f) m_s[i] = 1.0f / m_s[i]; return *this; } inline scalar_type perp_dot(const vec& b) const { static_assert(N == 2); return m_s[0] * b.m_s[1] - m_s[1] * b.m_s[0]; } inline vec perp() const { static_assert(N == 2); return vec(-m_s[1], m_s[0]); } inline vec get_floor() const { vec result; for (uint32_t i = 0; i < N; i++) result[i] = floor(m_s[i]); return result; } inline vec get_ceil() const { vec result; for (uint32_t i = 0; i < N; i++) result[i] = ceil(m_s[i]); return result; } inline T get_total() const { T res = m_s[0]; for (uint32_t i = 1; i < N; i++) res += m_s[i]; return res; } // static helper methods static inline vec mul_components(const vec& lhs, const vec& rhs) { vec result; for (uint32_t i = 0; i < N; i++) result[i] = lhs.m_s[i] * rhs.m_s[i]; return result; } static inline vec mul_add_components(const vec& a, const vec& b, const vec& c) { vec result; for (uint32_t i = 0; i < N; i++) result[i] = a.m_s[i] * b.m_s[i] + c.m_s[i]; return result; } static inline vec make_axis(uint32_t i) { vec result; result.clear(); result[i] = 1; return result; } static inline vec equals_mask(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret[i] = (a[i] == b[i]); return ret; } static inline vec not_equals_mask(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret[i] = (a[i] != b[i]); return ret; } static inline vec less_mask(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret[i] = (a[i] < b[i]); return ret; } static inline vec less_equals_mask(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret[i] = (a[i] <= b[i]); return ret; } static inline vec greater_equals_mask(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret[i] = (a[i] >= b[i]); return ret; } static inline vec greater_mask(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret[i] = (a[i] > b[i]); return ret; } static inline vec component_max(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret.m_s[i] = basisu::maximum(a.m_s[i], b.m_s[i]); return ret; } static inline vec component_min(const vec& a, const vec& b) { vec ret; for (uint32_t i = 0; i < N; i++) ret.m_s[i] = basisu::minimum(a.m_s[i], b.m_s[i]); return ret; } static inline vec lerp(const vec& a, const vec& b, float t) { vec ret; for (uint32_t i = 0; i < N; i++) ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; return ret; } static inline bool equal_tol(const vec& a, const vec& b, float t) { for (uint32_t i = 0; i < N; i++) if (!basisu::equal_tol(a.m_s[i], b.m_s[i], t)) return false; return true; } inline bool equal_tol(const vec& b, float t) const { return equal_tol(*this, b, t); } static inline vec make_random(basisu::rand& r, float l, float h) { vec result; for (uint32_t i = 0; i < N; i++) result[i] = r.frand(l, h); return result; } static inline vec make_random(basisu::rand& r, const vec& l, const vec& h) { vec result; for (uint32_t i = 0; i < N; i++) result[i] = r.frand(l[i], h[i]); return result; } void print() const { for (uint32_t c = 0; c < N; c++) printf("%3.3f ", (*this)[c]); printf("\n"); } protected: T m_s[N]; }; typedef vec<1, double> vec1D; typedef vec<2, double> vec2D; typedef vec<3, double> vec3D; typedef vec<4, double> vec4D; typedef vec<1, float> vec1F; typedef vec<2, float> vec2F; typedef basisu::vector vec2F_array; typedef vec<3, float> vec3F; typedef basisu::vector vec3F_array; typedef vec<4, float> vec4F; typedef basisu::vector vec4F_array; typedef vec<2, uint32_t> vec2U; typedef vec<3, uint32_t> vec3U; typedef vec<2, int> vec2I; typedef vec<3, int> vec3I; typedef vec<4, int> vec4I; typedef vec<2, int16_t> vec2I16; typedef vec<3, int16_t> vec3I16; inline vec2F rotate_point_2D(const vec2F& p, float rad) { float c = cosf(rad); float s = sinf(rad); float x = p[0]; float y = p[1]; return vec2F(x * c - y * s, x * s + y * c); } //-------------------------------------------------------------- // Matrix/vector cheat sheet, because confusingly, depending on how matrices are stored in memory people can use opposite definitions of "rows", "cols", etc. // See http://www.mindcontrol.org/~hplus/graphics/matrix-layout.html // // So in this simple row-major general matrix class: // matrix=[NumRows][NumCols] or [R][C], i.e. a 3x3 matrix stored in memory will appear as: R0C0, R0C1, R0C2, R1C0, R1C1, R1C2, etc. // Matrix multiplication: [R0,C0]*[R1,C1]=[R0,C1], C0 must equal R1 // // In this class: // A "row vector" type is a vector of size # of matrix cols, 1xC. It's the vector type that is used to store the matrix rows. // A "col vector" type is a vector of size # of matrix rows, Rx1. It's a vector type large enough to hold each matrix column. // // Subrow/col vectors: last component is assumed to be either 0 (a "vector") or 1 (a "point") // "subrow vector": vector/point of size # cols-1, 1x(C-1) // "subcol vector": vector/point of size # rows-1, (R-1)x1 // // D3D style: // vec*matrix, row vector on left (vec dotted against columns) // [1,4]*[4,4]=[1,4] // abcd * A B C D // A B C D // A B C D // A B C D // = e f g h // // Now confusingly, in the matrix transform method for vec*matrix below the vector's type is "col_vec", because col_vec will have the proper size for non-square matrices. But the vector on the left is written as row vector, argh. // // // OGL style: // matrix*vec, col vector on right (vec dotted against rows): // [4,4]*[4,1]=[4,1] // // A B C D * e = e // A B C D f f // A B C D g g // A B C D h h template Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) { static_assert((int)Z::num_rows == (int)X::num_rows); static_assert((int)Z::num_cols == (int)Y::num_cols); static_assert((int)X::num_cols == (int)Y::num_rows); assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); for (int r = 0; r < X::num_rows; r++) for (int c = 0; c < Y::num_cols; c++) { typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); for (uint32_t i = 1; i < X::num_cols; i++) s += lhs(r, i) * rhs(i, c); result(r, c) = s; } return result; } template Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) { static_assert((int)Z::num_rows == (int)X::num_cols); static_assert((int)Z::num_cols == (int)Y::num_cols); static_assert((int)X::num_rows == (int)Y::num_rows); assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); for (int r = 0; r < X::num_cols; r++) for (int c = 0; c < Y::num_cols; c++) { typename Z::scalar_type s = lhs(0, r) * rhs(0, c); for (uint32_t i = 1; i < X::num_rows; i++) s += lhs(i, r) * rhs(i, c); result(r, c) = s; } return result; } template Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) { static_assert((int)Z::num_rows == (int)X::num_rows); static_assert((int)Z::num_cols == (int)Y::num_rows); static_assert((int)X::num_cols == (int)Y::num_cols); assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); for (int r = 0; r < X::num_rows; r++) for (int c = 0; c < Y::num_rows; c++) { typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); for (uint32_t i = 1; i < X::num_cols; i++) s += lhs(r, i) * rhs(c, i); result(r, c) = s; } return result; } template class matrix { public: typedef T scalar_type; enum { num_rows = R, num_cols = C }; typedef vec col_vec; typedef vec < (R > 1) ? (R - 1) : 0, T > subcol_vec; typedef vec row_vec; typedef vec < (C > 1) ? (C - 1) : 0, T > subrow_vec; inline matrix() { } inline matrix(basisu::eClear) { clear(); } inline matrix(basisu::eIdentity) { set_identity_matrix(); } inline matrix(const T* p) { set(p); } inline matrix(const matrix& other) { for (uint32_t i = 0; i < R; i++) m_rows[i] = other.m_rows[i]; } inline matrix& operator=(const matrix& rhs) { if (this != &rhs) for (uint32_t i = 0; i < R; i++) m_rows[i] = rhs.m_rows[i]; return *this; } inline matrix(T val00, T val01, T val10, T val11) { set(val00, val01, val10, val11); } inline matrix(T val00, T val01, T val10, T val11, T val20, T val21) { set(val00, val01, val10, val11, val20, val21); } inline matrix(T val00, T val01, T val02, T val10, T val11, T val12, T val20, T val21, T val22) { set(val00, val01, val02, val10, val11, val12, val20, val21, val22); } inline matrix(T val00, T val01, T val02, T val03, T val10, T val11, T val12, T val13, T val20, T val21, T val22, T val23, T val30, T val31, T val32, T val33) { set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23, val30, val31, val32, val33); } inline matrix(T val00, T val01, T val02, T val03, T val10, T val11, T val12, T val13, T val20, T val21, T val22, T val23) { set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23); } inline void set(const float* p) { for (uint32_t i = 0; i < R; i++) { m_rows[i].set(p); p += C; } } inline void set(T val00, T val01, T val10, T val11) { m_rows[0].set(val00, val01); if (R >= 2) { m_rows[1].set(val10, val11); for (uint32_t i = 2; i < R; i++) m_rows[i].clear(); } } inline void set(T val00, T val01, T val10, T val11, T val20, T val21) { m_rows[0].set(val00, val01); if (R >= 2) { m_rows[1].set(val10, val11); if (R >= 3) { m_rows[2].set(val20, val21); for (uint32_t i = 3; i < R; i++) m_rows[i].clear(); } } } inline void set(T val00, T val01, T val02, T val10, T val11, T val12, T val20, T val21, T val22) { m_rows[0].set(val00, val01, val02); if (R >= 2) { m_rows[1].set(val10, val11, val12); if (R >= 3) { m_rows[2].set(val20, val21, val22); for (uint32_t i = 3; i < R; i++) m_rows[i].clear(); } } } inline void set(T val00, T val01, T val02, T val03, T val10, T val11, T val12, T val13, T val20, T val21, T val22, T val23, T val30, T val31, T val32, T val33) { m_rows[0].set(val00, val01, val02, val03); if (R >= 2) { m_rows[1].set(val10, val11, val12, val13); if (R >= 3) { m_rows[2].set(val20, val21, val22, val23); if (R >= 4) { m_rows[3].set(val30, val31, val32, val33); for (uint32_t i = 4; i < R; i++) m_rows[i].clear(); } } } } inline void set(T val00, T val01, T val02, T val03, T val10, T val11, T val12, T val13, T val20, T val21, T val22, T val23) { m_rows[0].set(val00, val01, val02, val03); if (R >= 2) { m_rows[1].set(val10, val11, val12, val13); if (R >= 3) { m_rows[2].set(val20, val21, val22, val23); for (uint32_t i = 3; i < R; i++) m_rows[i].clear(); } } } inline uint32_t get_num_rows() const { return num_rows; } inline uint32_t get_num_cols() const { return num_cols; } inline uint32_t get_total_elements() const { return num_rows * num_cols; } inline T operator()(uint32_t r, uint32_t c) const { assert((r < R) && (c < C)); return m_rows[r][c]; } inline T& operator()(uint32_t r, uint32_t c) { assert((r < R) && (c < C)); return m_rows[r][c]; } inline const row_vec& operator[](uint32_t r) const { assert(r < R); return m_rows[r]; } inline row_vec& operator[](uint32_t r) { assert(r < R); return m_rows[r]; } inline const row_vec& get_row(uint32_t r) const { return (*this)[r]; } inline row_vec& get_row(uint32_t r) { return (*this)[r]; } inline void set_row(uint32_t r, const row_vec& v) { (*this)[r] = v; } inline col_vec get_col(uint32_t c) const { assert(c < C); col_vec result; for (uint32_t i = 0; i < R; i++) result[i] = m_rows[i][c]; return result; } inline void set_col(uint32_t c, const col_vec& col) { assert(c < C); for (uint32_t i = 0; i < R; i++) m_rows[i][c] = col[i]; } inline void set_col(uint32_t c, const subcol_vec& col) { assert(c < C); for (uint32_t i = 0; i < (R - 1); i++) m_rows[i][c] = col[i]; m_rows[R - 1][c] = 0.0f; } inline const row_vec& get_translate() const { return m_rows[R - 1]; } inline matrix& set_translate(const row_vec& r) { m_rows[R - 1] = r; return *this; } inline matrix& set_translate(const subrow_vec& r) { m_rows[R - 1] = row_vec(r).as_point(); return *this; } inline const T* get_ptr() const { return reinterpret_cast(&m_rows[0]); } inline T* get_ptr() { return reinterpret_cast(&m_rows[0]); } inline matrix& operator+=(const matrix& other) { for (uint32_t i = 0; i < R; i++) m_rows[i] += other.m_rows[i]; return *this; } inline matrix& operator-=(const matrix& other) { for (uint32_t i = 0; i < R; i++) m_rows[i] -= other.m_rows[i]; return *this; } inline matrix& operator*=(T val) { for (uint32_t i = 0; i < R; i++) m_rows[i] *= val; return *this; } inline matrix& operator/=(T val) { for (uint32_t i = 0; i < R; i++) m_rows[i] /= val; return *this; } inline matrix& operator*=(const matrix& other) { matrix result; matrix_mul_helper(result, *this, other); *this = result; return *this; } friend inline matrix operator+(const matrix& lhs, const matrix& rhs) { matrix result; for (uint32_t i = 0; i < R; i++) result[i] = lhs.m_rows[i] + rhs.m_rows[i]; return result; } friend inline matrix operator-(const matrix& lhs, const matrix& rhs) { matrix result; for (uint32_t i = 0; i < R; i++) result[i] = lhs.m_rows[i] - rhs.m_rows[i]; return result; } friend inline matrix operator*(const matrix& lhs, T val) { matrix result; for (uint32_t i = 0; i < R; i++) result[i] = lhs.m_rows[i] * val; return result; } friend inline matrix operator/(const matrix& lhs, T val) { matrix result; for (uint32_t i = 0; i < R; i++) result[i] = lhs.m_rows[i] / val; return result; } friend inline matrix operator*(T val, const matrix& rhs) { matrix result; for (uint32_t i = 0; i < R; i++) result[i] = val * rhs.m_rows[i]; return result; } #if 0 template friend inline matrix operator*(const matrix& lhs, const matrix& rhs) { matrix result; return matrix_mul_helper(result, lhs, rhs); } #endif friend inline matrix operator*(const matrix& lhs, const matrix& rhs) { matrix result; return matrix_mul_helper(result, lhs, rhs); } friend inline row_vec operator*(const col_vec& a, const matrix& b) { return transform(a, b); } inline matrix operator+() const { return *this; } inline matrix operator-() const { matrix result; for (uint32_t i = 0; i < R; i++) result[i] = -m_rows[i]; return result; } inline matrix& clear() { for (uint32_t i = 0; i < R; i++) m_rows[i].clear(); return *this; } inline matrix& set_zero_matrix() { clear(); return *this; } inline matrix& set_identity_matrix() { for (uint32_t i = 0; i < R; i++) { m_rows[i].clear(); m_rows[i][i] = 1.0f; } return *this; } inline matrix& set_scale_matrix(float s) { clear(); for (int i = 0; i < (R - 1); i++) m_rows[i][i] = s; m_rows[R - 1][C - 1] = 1.0f; return *this; } inline matrix& set_scale_matrix(const row_vec& s) { clear(); for (uint32_t i = 0; i < R; i++) m_rows[i][i] = s[i]; return *this; } inline matrix& set_scale_matrix(float x, float y) { set_identity_matrix(); m_rows[0].set_x(x); m_rows[1].set_y(y); return *this; } inline matrix& set_scale_matrix(float x, float y, float z) { set_identity_matrix(); m_rows[0].set_x(x); m_rows[1].set_y(y); m_rows[2].set_z(z); return *this; } inline matrix& set_translate_matrix(const row_vec& s) { set_identity_matrix(); set_translate(s); return *this; } inline matrix& set_translate_matrix(float x, float y) { set_identity_matrix(); set_translate(row_vec(x, y).as_point()); return *this; } inline matrix& set_translate_matrix(float x, float y, float z) { set_identity_matrix(); set_translate(row_vec(x, y, z).as_point()); return *this; } inline matrix get_transposed() const { static_assert(R == C); matrix result; for (uint32_t i = 0; i < R; i++) for (uint32_t j = 0; j < C; j++) result.m_rows[i][j] = m_rows[j][i]; return result; } inline matrix get_transposed_nonsquare() const { matrix result; for (uint32_t i = 0; i < R; i++) for (uint32_t j = 0; j < C; j++) result[j][i] = m_rows[i][j]; return result; } inline matrix& transpose_in_place() { matrix result; for (uint32_t i = 0; i < R; i++) for (uint32_t j = 0; j < C; j++) result.m_rows[i][j] = m_rows[j][i]; *this = result; return *this; } // Frobenius Norm T get_norm() const { T result = 0; for (uint32_t i = 0; i < R; i++) for (uint32_t j = 0; j < C; j++) result += m_rows[i][j] * m_rows[i][j]; return static_cast(sqrt(result)); } inline matrix get_power(T p) const { matrix result; for (uint32_t i = 0; i < R; i++) for (uint32_t j = 0; j < C; j++) result[i][j] = static_cast(pow(m_rows[i][j], p)); return result; } inline matrix<1, R, T> numpy_dot(const matrix<1, C, T>& b) const { matrix<1, R, T> result; for (uint32_t r = 0; r < R; r++) { T sum = 0; for (uint32_t c = 0; c < C; c++) sum += m_rows[r][c] * b[0][c]; result[0][r] = static_cast(sum); } return result; } bool invert(matrix& result) const { static_assert(R == C); result.set_identity_matrix(); matrix mat(*this); for (uint32_t c = 0; c < C; c++) { uint32_t max_r = c; for (uint32_t r = c + 1; r < R; r++) if (fabs(mat[r][c]) > fabs(mat[max_r][c])) max_r = r; if (mat[max_r][c] == 0.0f) { result.set_identity_matrix(); return false; } std::swap(mat[c], mat[max_r]); std::swap(result[c], result[max_r]); result[c] /= mat[c][c]; mat[c] /= mat[c][c]; for (uint32_t row = 0; row < R; row++) { if (row != c) { const row_vec temp(mat[row][c]); mat[row] -= row_vec::mul_components(mat[c], temp); result[row] -= row_vec::mul_components(result[c], temp); } } } return true; } matrix& invert_in_place() { matrix result; invert(result); *this = result; return *this; } matrix get_inverse() const { matrix result; invert(result); return result; } T get_det() const { static_assert(R == C); return det_helper(*this, R); } bool equal_tol(const matrix& b, float tol) const { for (uint32_t r = 0; r < R; r++) if (!row_vec::equal_tol(m_rows[r], b.m_rows[r], tol)) return false; return true; } bool is_square() const { return R == C; } double get_trace() const { static_assert(is_square()); T total = 0; for (uint32_t i = 0; i < R; i++) total += (*this)(i, i); return total; } void print() const { for (uint32_t r = 0; r < R; r++) { for (uint32_t c = 0; c < C; c++) printf("%3.7f ", (*this)(r, c)); printf("\n"); } } // This method transforms a vec by a matrix (D3D-style: row vector on left). // Confusingly, note that the data type is named "col_vec", but mathematically it's actually written as a row vector (of size equal to the # matrix rows, which is why it's called a "col_vec" in this class). // 1xR * RxC = 1xC // This dots against the matrix columns. static inline row_vec transform(const col_vec& a, const matrix& b) { row_vec result(b[0] * a[0]); for (uint32_t r = 1; r < R; r++) result += b[r] * a[r]; return result; } // This method transforms a vec by a matrix (D3D-style: row vector on left). // Last component of vec is assumed to be 1. static inline row_vec transform_point(const col_vec& a, const matrix& b) { row_vec result(0); for (int r = 0; r < (R - 1); r++) result += b[r] * a[r]; result += b[R - 1]; return result; } // This method transforms a vec by a matrix (D3D-style: row vector on left). // Last component of vec is assumed to be 0. static inline row_vec transform_vector(const col_vec& a, const matrix& b) { row_vec result(0); for (int r = 0; r < (R - 1); r++) result += b[r] * a[r]; return result; } // This method transforms a vec by a matrix (D3D-style: row vector on left). // Last component of vec is assumed to be 1. static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) { subcol_vec result(0); for (int r = 0; r < static_cast(R); r++) { const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; for (int c = 0; c < static_cast(C - 1); c++) result[c] += b[r][c] * s; } return result; } // This method transforms a vec by a matrix (D3D-style: row vector on left). // Last component of vec is assumed to be 0. static inline subcol_vec transform_vector(const subcol_vec& a, const matrix& b) { subcol_vec result(0); for (int r = 0; r < static_cast(R - 1); r++) { const T s = a[r]; for (int c = 0; c < static_cast(C - 1); c++) result[c] += b[r][c] * s; } return result; } // Like transform() above, but the matrix is effectively transposed before the multiply. static inline col_vec transform_transposed(const col_vec& a, const matrix& b) { static_assert(R == C); col_vec result; for (uint32_t r = 0; r < R; r++) result[r] = b[r].dot(a); return result; } // Like transform() above, but the matrix is effectively transposed before the multiply. // Last component of vec is assumed to be 0. static inline col_vec transform_vector_transposed(const col_vec& a, const matrix& b) { static_assert(R == C); col_vec result; for (uint32_t r = 0; r < R; r++) { T s = 0; for (uint32_t c = 0; c < (C - 1); c++) s += b[r][c] * a[c]; result[r] = s; } return result; } // This method transforms a vec by a matrix (D3D-style: row vector on left), but the matrix is effectively transposed before the multiply. // Last component of vec is assumed to be 1. static inline subcol_vec transform_point_transposed(const subcol_vec& a, const matrix& b) { static_assert(R == C); subcol_vec result(0); for (int r = 0; r < R; r++) { const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; for (int c = 0; c < (C - 1); c++) result[c] += b[c][r] * s; } return result; } // This method transforms a vec by a matrix (D3D-style: row vector on left), but the matrix is effectively transposed before the multiply. // Last component of vec is assumed to be 0. static inline subcol_vec transform_vector_transposed(const subcol_vec& a, const matrix& b) { static_assert(R == C); subcol_vec result(0); for (int r = 0; r < static_cast(R - 1); r++) { const T s = a[r]; for (int c = 0; c < static_cast(C - 1); c++) result[c] += b[c][r] * s; } return result; } // This method transforms a matrix by a vector (OGL style, col vector on the right). // Note that the data type is named "row_vec", but mathematically it's actually written as a column vector (of size equal to the # matrix cols). // RxC * Cx1 = Rx1 // This dots against the matrix rows. static inline col_vec transform(const matrix& b, const row_vec& a) { col_vec result; for (int r = 0; r < static_cast(R); r++) result[r] = b[r].dot(a); return result; } // This method transforms a matrix by a vector (OGL style, col vector on the right), except the matrix is effectively transposed before the multiply. // Note that the data type is named "row_vec", but mathematically it's actually written as a column vector (of size equal to the # matrix cols). // RxC * Cx1 = Rx1 // This dots against the matrix cols. static inline col_vec transform_transposed(const matrix& b, const row_vec& a) { static_assert(R == C); row_vec result(b[0] * a[0]); for (int r = 1; r < static_cast(R); r++) result += b[r] * a[r]; return col_vec(result); } static inline matrix& mul_components(matrix& result, const matrix& lhs, const matrix& rhs) { for (uint32_t r = 0; r < R; r++) result[r] = row_vec::mul_components(lhs[r], rhs[r]); return result; } static inline matrix& concat(matrix& lhs, const matrix& rhs) { return matrix_mul_helper(lhs, matrix(lhs), rhs); } inline matrix& concat_in_place(const matrix& rhs) { return concat(*this, rhs); } static inline matrix& multiply(matrix& result, const matrix& lhs, const matrix& rhs) { matrix temp; matrix* pResult = ((&result == &lhs) || (&result == &rhs)) ? &temp : &result; matrix_mul_helper(*pResult, lhs, rhs); if (pResult != &result) result = *pResult; return result; } static matrix make_zero_matrix() { matrix result; result.clear(); return result; } static matrix make_identity_matrix() { matrix result; result.set_identity_matrix(); return result; } static matrix make_translate_matrix(const row_vec& t) { return matrix(basisu::cIdentity).set_translate(t); } static matrix make_translate_matrix(float x, float y) { return matrix(basisu::cIdentity).set_translate_matrix(x, y); } static matrix make_translate_matrix(float x, float y, float z) { return matrix(basisu::cIdentity).set_translate_matrix(x, y, z); } static inline matrix make_scale_matrix(float s) { return matrix().set_scale_matrix(s); } static inline matrix make_scale_matrix(const row_vec& s) { return matrix().set_scale_matrix(s); } static inline matrix make_scale_matrix(float x, float y) { static_assert(R >= 3 && C >= 3); matrix result; result.set_identity_matrix(); result.m_rows[0][0] = x; result.m_rows[1][1] = y; return result; } static inline matrix make_scale_matrix(float x, float y, float z) { static_assert(R >= 4 && C >= 4); matrix result; result.set_identity_matrix(); result.m_rows[0][0] = x; result.m_rows[1][1] = y; result.m_rows[2][2] = z; return result; } // Helpers derived from Graphics Gems 1 and 2 (Matrices and Transformations, Ronald N. Goldman) static matrix make_rotate_matrix(const vec<3, T>& axis, T ang) { static_assert(R >= 3 && C >= 3); vec<3, T> norm_axis(axis.get_normalized()); double cos_a = cos(ang); double inv_cos_a = 1.0f - cos_a; double sin_a = sin(ang); const T x = norm_axis[0]; const T y = norm_axis[1]; const T z = norm_axis[2]; const double x2 = norm_axis[0] * norm_axis[0]; const double y2 = norm_axis[1] * norm_axis[1]; const double z2 = norm_axis[2] * norm_axis[2]; matrix result; result.set_identity_matrix(); result[0][0] = (T)((inv_cos_a * x2) + cos_a); result[1][0] = (T)((inv_cos_a * x * y) + (sin_a * z)); result[2][0] = (T)((inv_cos_a * x * z) - (sin_a * y)); result[0][1] = (T)((inv_cos_a * x * y) - (sin_a * z)); result[1][1] = (T)((inv_cos_a * y2) + cos_a); result[2][1] = (T)((inv_cos_a * y * z) + (sin_a * x)); result[0][2] = (T)((inv_cos_a * x * z) + (sin_a * y)); result[1][2] = (T)((inv_cos_a * y * z) - (sin_a * x)); result[2][2] = (T)((inv_cos_a * z2) + cos_a); return result; } static inline matrix make_rotate_matrix(T ang) { static_assert(R >= 2 && C >= 2); matrix ret(basisu::cIdentity); const T sin_a = static_cast(sin(ang)); const T cos_a = static_cast(cos(ang)); ret[0][0] = +cos_a; ret[0][1] = -sin_a; ret[1][0] = +sin_a; ret[1][1] = +cos_a; return ret; } static inline matrix make_rotate_matrix(uint32_t axis, T ang) { vec<3, T> axis_vec; axis_vec.clear(); axis_vec[axis] = 1.0f; return make_rotate_matrix(axis_vec, ang); } static inline matrix make_cross_product_matrix(const vec<3, scalar_type>& c) { static_assert((num_rows >= 3) && (num_cols >= 3)); matrix ret(basisu::cClear); ret[0][1] = c[2]; ret[0][2] = -c[1]; ret[1][0] = -c[2]; ret[1][2] = c[0]; ret[2][0] = c[1]; ret[2][1] = -c[0]; return ret; } static inline matrix make_reflection_matrix(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q) { static_assert((num_rows == 4) && (num_cols == 4)); matrix ret; assert(n.is_vector() && q.is_vector()); ret = make_identity_matrix() - 2.0f * make_tensor_product_matrix(n, n); ret.set_translate((2.0f * q.dot(n) * n).as_point()); return ret; } static inline matrix make_tensor_product_matrix(const row_vec& v, const row_vec& w) { matrix ret; for (int r = 0; r < num_rows; r++) ret[r] = row_vec::mul_components(v.broadcast(r), w); return ret; } static inline matrix make_uniform_scaling_matrix(const vec<4, scalar_type>& q, scalar_type c) { static_assert((num_rows == 4) && (num_cols == 4)); assert(q.is_vector()); matrix ret; ret = c * make_identity_matrix(); ret.set_translate(((1.0f - c) * q).as_point()); return ret; } static inline matrix make_nonuniform_scaling_matrix(const vec<4, scalar_type>& q, scalar_type c, const vec<4, scalar_type>& w) { static_assert((num_rows == 4) && (num_cols == 4)); assert(q.is_vector() && w.is_vector()); matrix ret; ret = make_identity_matrix() - (1.0f - c) * make_tensor_product_matrix(w, w); ret.set_translate(((1.0f - c) * q.dot(w) * w).as_point()); return ret; } // n = normal of plane, q = point on plane static inline matrix make_ortho_projection_matrix(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q) { assert(n.is_vector() && q.is_vector()); matrix ret; ret = make_identity_matrix() - make_tensor_product_matrix(n, n); ret.set_translate((q.dot(n) * n).as_point()); return ret; } static inline matrix make_parallel_projection(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q, const vec<4, scalar_type>& w) { assert(n.is_vector() && q.is_vector() && w.is_vector()); matrix ret; ret = make_identity_matrix() - (make_tensor_product_matrix(n, w) / (w.dot(n))); ret.set_translate(((q.dot(n) / w.dot(n)) * w).as_point()); return ret; } protected: row_vec m_rows[R]; static T det_helper(const matrix& a, uint32_t n) { // Algorithm ported from Numerical Recipes in C. T d; matrix m; if (n == 2) d = a(0, 0) * a(1, 1) - a(1, 0) * a(0, 1); else { d = 0; for (uint32_t j1 = 1; j1 <= n; j1++) { for (uint32_t i = 2; i <= n; i++) { int j2 = 1; for (uint32_t j = 1; j <= n; j++) { if (j != j1) { m(i - 2, j2 - 1) = a(i - 1, j - 1); j2++; } } } d += (((1 + j1) & 1) ? -1.0f : 1.0f) * a(1 - 1, j1 - 1) * det_helper(m, n - 1); } } return d; } }; typedef matrix<2, 2, float> matrix22F; typedef matrix<2, 2, double> matrix22D; typedef matrix<3, 3, float> matrix33F; typedef matrix<3, 3, double> matrix33D; typedef matrix<4, 4, float> matrix44F; typedef matrix<4, 4, double> matrix44D; typedef matrix<8, 8, float> matrix88F; // These helpers create good old D3D-style matrices. inline matrix44F matrix44F_make_perspective_offcenter_lh(float l, float r, float b, float t, float nz, float fz) { float two_nz = 2.0f * nz; float one_over_width = 1.0f / (r - l); float one_over_height = 1.0f / (t - b); matrix44F view_to_proj; view_to_proj[0].set(two_nz * one_over_width, 0.0f, 0.0f, 0.0f); view_to_proj[1].set(0.0f, two_nz * one_over_height, 0.0f, 0.0f); view_to_proj[2].set(-(l + r) * one_over_width, -(t + b) * one_over_height, fz / (fz - nz), 1.0f); view_to_proj[3].set(0.0f, 0.0f, -view_to_proj[2][2] * nz, 0.0f); return view_to_proj; } // fov_y: full Y field of view (radians) // aspect: viewspace width/height inline matrix44F matrix44F_make_perspective_fov_lh(float fov_y, float aspect, float nz, float fz) { double sin_fov = sin(0.5f * fov_y); double cos_fov = cos(0.5f * fov_y); float y_scale = static_cast(cos_fov / sin_fov); float x_scale = static_cast(y_scale / aspect); matrix44F view_to_proj; view_to_proj[0].set(x_scale, 0, 0, 0); view_to_proj[1].set(0, y_scale, 0, 0); view_to_proj[2].set(0, 0, fz / (fz - nz), 1); view_to_proj[3].set(0, 0, -nz * fz / (fz - nz), 0); return view_to_proj; } inline matrix44F matrix44F_make_ortho_offcenter_lh(float l, float r, float b, float t, float nz, float fz) { matrix44F view_to_proj; view_to_proj[0].set(2.0f / (r - l), 0.0f, 0.0f, 0.0f); view_to_proj[1].set(0.0f, 2.0f / (t - b), 0.0f, 0.0f); view_to_proj[2].set(0.0f, 0.0f, 1.0f / (fz - nz), 0.0f); view_to_proj[3].set((l + r) / (l - r), (t + b) / (b - t), nz / (nz - fz), 1.0f); return view_to_proj; } inline matrix44F matrix44F_make_ortho_lh(float w, float h, float nz, float fz) { return matrix44F_make_ortho_offcenter_lh(-w * .5f, w * .5f, -h * .5f, h * .5f, nz, fz); } inline matrix44F matrix44F_make_projection_to_screen_d3d(int x, int y, int w, int h, float min_z, float max_z) { matrix44F proj_to_screen; proj_to_screen[0].set(w * .5f, 0.0f, 0.0f, 0.0f); proj_to_screen[1].set(0, h * -.5f, 0.0f, 0.0f); proj_to_screen[2].set(0, 0.0f, max_z - min_z, 0.0f); proj_to_screen[3].set(x + w * .5f, y + h * .5f, min_z, 1.0f); return proj_to_screen; } inline matrix44F matrix44F_make_lookat_lh(const vec3F& camera_pos, const vec3F& look_at, const vec3F& camera_up, float camera_roll_ang_in_radians) { vec4F col2(look_at - camera_pos); assert(col2.is_vector()); if (col2.normalize() == 0.0f) col2.set(0, 0, 1, 0); vec4F col1(camera_up); assert(col1.is_vector()); if (!col2[0] && !col2[2]) col1.set(-1.0f, 0.0f, 0.0f, 0.0f); if ((col1.dot(col2)) > .9999f) col1.set(0.0f, 1.0f, 0.0f, 0.0f); vec4F col0(vec4F::cross3(col1, col2).normalize_in_place()); col1 = vec4F::cross3(col2, col0).normalize_in_place(); matrix44F rotm(matrix44F::make_identity_matrix()); rotm.set_col(0, col0); rotm.set_col(1, col1); rotm.set_col(2, col2); return matrix44F::make_translate_matrix(-camera_pos[0], -camera_pos[1], -camera_pos[2]) * rotm * matrix44F::make_rotate_matrix(2, camera_roll_ang_in_radians); } template R matrix_NxN_create_DCT() { assert(R::num_rows == R::num_cols); const uint32_t N = R::num_cols; R result; for (uint32_t k = 0; k < N; k++) { for (uint32_t n = 0; n < N; n++) { double f; if (!k) f = 1.0f / sqrt(float(N)); else f = sqrt(2.0f / float(N)) * cos((basisu::cPiD * (2.0f * float(n) + 1.0f) * float(k)) / (2.0f * float(N))); result(k, n) = static_cast(f); } } return result; } template R matrix_NxN_DCT(const R& a, const R& dct) { R temp; matrix_mul_helper(temp, dct, a); R result; matrix_mul_helper_transpose_rhs(result, temp, dct); return result; } template R matrix_NxN_IDCT(const R& b, const R& dct) { R temp; matrix_mul_helper_transpose_lhs(temp, dct, b); R result; matrix_mul_helper(result, temp, dct); return result; } template matrix matrix_kronecker_product(const X& a, const Y& b) { matrix result; for (uint32_t r = 0; r < X::num_rows; r++) { for (uint32_t c = 0; c < X::num_cols; c++) { for (uint32_t i = 0; i < Y::num_rows; i++) for (uint32_t j = 0; j < Y::num_cols; j++) result(r * Y::num_rows + i, c * Y::num_cols + j) = a(r, c) * b(i, j); } } return result; } template matrix matrix_combine_vertically(const X& a, const Y& b) { matrix result; for (uint32_t r = 0; r < X::num_rows; r++) for (uint32_t c = 0; c < X::num_cols; c++) result(r, c) = a(r, c); for (uint32_t r = 0; r < Y::num_rows; r++) for (uint32_t c = 0; c < Y::num_cols; c++) result(r + X::num_rows, c) = b(r, c); return result; } inline matrix88F get_haar8() { matrix22F haar2( 1, 1, 1, -1); matrix22F i2( 1, 0, 0, 1); matrix44F i4( 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); matrix<1, 2, float> b0; b0(0, 0) = 1; b0(0, 1) = 1; matrix<1, 2, float> b1; b1(0, 0) = 1.0f; b1(0, 1) = -1.0f; matrix<2, 4, float> haar4_0 = matrix_kronecker_product(haar2, b0); matrix<2, 4, float> haar4_1 = matrix_kronecker_product(i2, b1); matrix<4, 4, float> haar4 = matrix_combine_vertically(haar4_0, haar4_1); matrix<4, 8, float> haar8_0 = matrix_kronecker_product(haar4, b0); matrix<4, 8, float> haar8_1 = matrix_kronecker_product(i4, b1); haar8_0[2] *= sqrtf(2); haar8_0[3] *= sqrtf(2); haar8_1 *= 2.0f; matrix<8, 8, float> haar8 = matrix_combine_vertically(haar8_0, haar8_1); return haar8; } inline matrix44F get_haar4() { const float sqrt2 = 1.4142135623730951f; return matrix44F( .5f * 1, .5f * 1, .5f * 1, .5f * 1, .5f * 1, .5f * 1, .5f * -1, .5f * -1, .5f * sqrt2, .5f * -sqrt2, 0, 0, 0, 0, .5f * sqrt2, .5f * -sqrt2); } template inline matrix<2, 2, T> get_inverse_2x2(const matrix<2, 2, T>& m) { double a = m[0][0]; double b = m[0][1]; double c = m[1][0]; double d = m[1][1]; double det = a * d - b * c; if (det != 0.0f) det = 1.0f / det; matrix<2, 2, T> result; result[0][0] = static_cast(d * det); result[0][1] = static_cast(-b * det); result[1][0] = static_cast(-c * det); result[1][1] = static_cast(a * det); return result; } } // namespace bu_math namespace basisu { class tracked_stat { public: tracked_stat() { clear(); } inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } inline void update(int32_t val) { m_num++; m_total += val; m_total2 += val * val; } inline tracked_stat& operator += (uint32_t val) { update(val); return *this; } inline uint32_t get_number_of_values() { return m_num; } inline uint64_t get_total() const { return m_total; } inline uint64_t get_total2() const { return m_total2; } inline float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; inline float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } inline float get_variance() const { float s = get_std_dev(); return s * s; } private: uint32_t m_num; int64_t m_total; int64_t m_total2; }; class tracked_stat_dbl { public: tracked_stat_dbl() { clear(); } inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } inline void update(double val) { m_num++; m_total += val; m_total2 += val * val; } inline tracked_stat_dbl& operator += (double val) { update(val); return *this; } inline uint64_t get_number_of_values() { return m_num; } inline double get_total() const { return m_total; } inline double get_total2() const { return m_total2; } inline double get_average() const { return m_num ? m_total / (double)m_num : 0.0f; }; inline double get_std_dev() const { return m_num ? sqrt((double)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } inline double get_variance() const { double s = get_std_dev(); return s * s; } private: uint64_t m_num; double m_total; double m_total2; }; template struct stats { uint32_t m_n; FloatType m_total, m_total_sq; // total, total of squares values FloatType m_avg, m_avg_sq; // mean, mean of the squared values FloatType m_rms; // sqrt(m_avg_sq) FloatType m_std_dev, m_var; // population standard deviation and variance FloatType m_mad; // mean absolute deviation FloatType m_min, m_max, m_range; // min and max values, and max-min FloatType m_len; // length of values as a vector (Euclidean norm or L2 norm) FloatType m_coeff_of_var; // coefficient of variation (std_dev/mean), High CV: Indicates greater variability relative to the mean, meaning the data values are more spread out, // Low CV : Indicates less variability relative to the mean, meaning the data values are more consistent. FloatType m_skewness; // Skewness = 0: The data is perfectly symmetric around the mean, // Skewness > 0: The data is positively skewed (right-skewed), // Skewness < 0: The data is negatively skewed (left-skewed) // 0-.5 approx. symmetry, .5-1 moderate skew, >= 1 highly skewed FloatType m_kurtosis; // Excess Kurtosis: Kurtosis = 0: The distribution has normal kurtosis (mesokurtic) // Kurtosis > 0: The distribution is leptokurtic, with heavy tails and a sharp peak // Kurtosis < 0: The distribution is platykurtic, with light tails and a flatter peak bool m_any_zero; FloatType m_median; uint32_t m_median_index; stats() { clear(); } void clear() { m_n = 0; m_total = 0, m_total_sq = 0; m_avg = 0, m_avg_sq = 0; m_rms = 0; m_std_dev = 0, m_var = 0; m_mad = 0; m_min = BIG_FLOAT_VAL, m_max = -BIG_FLOAT_VAL; m_range = 0.0f; m_len = 0; m_coeff_of_var = 0; m_skewness = 0; m_kurtosis = 0; m_any_zero = false; m_median = 0; m_median_index = 0; } template void calc_median(uint32_t n, const T* pVals, uint32_t stride = 1) { m_median = 0; m_median_index = 0; if (!n) return; basisu::vector< std::pair > vals(n); for (uint32_t i = 0; i < n; i++) { vals[i].first = pVals[i * stride]; vals[i].second = i; } std::sort(vals.begin(), vals.end(), [](const std::pair& a, const std::pair& b) { return a.first < b.first; }); m_median = vals[n / 2].first; if ((n & 1) == 0) m_median = (m_median + vals[(n / 2) - 1].first) * .5f; m_median_index = vals[n / 2].second; } template void calc(uint32_t n, const T* pVals, uint32_t stride = 1, bool calc_median_flag = false) { clear(); if (!n) return; if (calc_median_flag) calc_median(n, pVals, stride); m_n = n; for (uint32_t i = 0; i < n; i++) { FloatType v = (FloatType)pVals[i * stride]; if (v == 0.0f) m_any_zero = true; m_total += v; m_total_sq += v * v; if (!i) { m_min = v; m_max = v; } else { m_min = minimum(m_min, v); m_max = maximum(m_max, v); } } m_range = m_max - m_min; m_len = sqrt(m_total_sq); const FloatType nd = (FloatType)n; m_avg = m_total / nd; m_avg_sq = m_total_sq / nd; m_rms = sqrt(m_avg_sq); for (uint32_t i = 0; i < n; i++) { FloatType v = (FloatType)pVals[i * stride]; FloatType d = v - m_avg; const FloatType d2 = d * d; const FloatType d3 = d2 * d; const FloatType d4 = d3 * d; m_var += d2; m_mad += fabs(d); m_skewness += d3; m_kurtosis += d4; } m_var /= nd; m_mad /= nd; m_std_dev = sqrt(m_var); m_coeff_of_var = (m_avg != 0.0f) ? (m_std_dev / fabs(m_avg)) : 0.0f; FloatType k3 = m_std_dev * m_std_dev * m_std_dev; FloatType k4 = k3 * m_std_dev; m_skewness = (k3 != 0.0f) ? ((m_skewness / nd) / k3) : 0.0f; m_kurtosis = (k4 != 0.0f) ? (((m_kurtosis / nd) / k4) - 3.0f) : 0.0f; } // Only compute average, variance and standard deviation. template void calc_simplified(uint32_t n, const T* pVals, uint32_t stride = 1) { clear(); if (!n) return; m_n = n; for (uint32_t i = 0; i < n; i++) { FloatType v = (FloatType)pVals[i * stride]; m_total += v; } const FloatType nd = (FloatType)n; m_avg = m_total / nd; for (uint32_t i = 0; i < n; i++) { FloatType v = (FloatType)pVals[i * stride]; FloatType d = v - m_avg; const FloatType d2 = d * d; m_var += d2; } m_var /= nd; m_std_dev = sqrt(m_var); } }; template struct comparative_stats { FloatType m_cov; // covariance FloatType m_pearson; // Pearson Correlation Coefficient (r) [-1,1] FloatType m_mse; // mean squared error FloatType m_rmse; // root mean squared error FloatType m_mae; // mean abs error FloatType m_rmsle; // root mean squared log error FloatType m_euclidean_dist; // euclidean distance between values as vectors FloatType m_cosine_sim; // normalized dot products of values as vectors FloatType m_min_diff, m_max_diff; // minimum/maximum abs difference between values comparative_stats() { clear(); } void clear() { m_cov = 0; m_pearson = 0; m_mse = 0; m_rmse = 0; m_mae = 0; m_rmsle = 0; m_euclidean_dist = 0; m_cosine_sim = 0; m_min_diff = 0; m_max_diff = 0; } template void calc(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats *pA_stats = nullptr, const stats *pB_stats = nullptr) { clear(); if (!n) return; stats temp_a_stats; if (!pA_stats) { pA_stats = &temp_a_stats; temp_a_stats.calc(n, pA, a_stride); } stats temp_b_stats; if (!pB_stats) { pB_stats = &temp_b_stats; temp_b_stats.calc(n, pB, b_stride); } for (uint32_t i = 0; i < n; i++) { const FloatType fa = (FloatType)pA[i * a_stride]; const FloatType fb = (FloatType)pB[i * b_stride]; if ((pA_stats->m_min >= 0.0f) && (pB_stats->m_min >= 0.0f)) { const FloatType ld = log(fa + 1.0f) - log(fb + 1.0f); m_rmsle += ld * ld; } const FloatType diff = fa - fb; const FloatType abs_diff = fabs(diff); m_mse += diff * diff; m_mae += abs_diff; m_min_diff = i ? minimum(m_min_diff, abs_diff) : abs_diff; m_max_diff = maximum(m_max_diff, abs_diff); const FloatType da = fa - pA_stats->m_avg; const FloatType db = fb - pB_stats->m_avg; m_cov += da * db; m_cosine_sim += fa * fb; } const FloatType nd = (FloatType)n; m_euclidean_dist = sqrt(m_mse); m_mse /= nd; m_rmse = sqrt(m_mse); m_mae /= nd; m_cov /= nd; FloatType dv = (pA_stats->m_std_dev * pB_stats->m_std_dev); if (dv != 0.0f) m_pearson = m_cov / dv; if ((pA_stats->m_min >= 0.0) && (pB_stats->m_min >= 0.0f)) m_rmsle = sqrt(m_rmsle / nd); FloatType c = pA_stats->m_len * pB_stats->m_len; if (c != 0.0f) m_cosine_sim /= c; else m_cosine_sim = 0.0f; } // Only computes Pearson, cov, mse, rmse, Euclidean distance template void calc_pearson(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) { clear(); if (!n) return; stats temp_a_stats; if (!pA_stats) { pA_stats = &temp_a_stats; temp_a_stats.calc(n, pA, a_stride); } stats temp_b_stats; if (!pB_stats) { pB_stats = &temp_b_stats; temp_b_stats.calc(n, pB, b_stride); } for (uint32_t i = 0; i < n; i++) { const FloatType fa = (FloatType)pA[i * a_stride]; const FloatType fb = (FloatType)pB[i * b_stride]; const FloatType diff = fa - fb; m_mse += diff * diff; const FloatType da = fa - pA_stats->m_avg; const FloatType db = fb - pB_stats->m_avg; m_cov += da * db; } const FloatType nd = (FloatType)n; m_euclidean_dist = sqrt(m_mse); m_mse /= nd; m_rmse = sqrt(m_mse); m_cov /= nd; FloatType dv = (pA_stats->m_std_dev * pB_stats->m_std_dev); if (dv != 0.0f) m_pearson = m_cov / dv; } // Only computes MSE, RMSE, eclidiean distance, and covariance. template void calc_simplified(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) { clear(); if (!n) return; stats temp_a_stats; if (!pA_stats) { pA_stats = &temp_a_stats; temp_a_stats.calc(n, pA, a_stride); } stats temp_b_stats; if (!pB_stats) { pB_stats = &temp_b_stats; temp_b_stats.calc(n, pB, b_stride); } for (uint32_t i = 0; i < n; i++) { const FloatType fa = (FloatType)pA[i * a_stride]; const FloatType fb = (FloatType)pB[i * b_stride]; const FloatType diff = fa - fb; m_mse += diff * diff; const FloatType da = fa - pA_stats->m_avg; const FloatType db = fb - pB_stats->m_avg; m_cov += da * db; } const FloatType nd = (FloatType)n; m_euclidean_dist = sqrt(m_mse); m_mse /= nd; m_rmse = sqrt(m_mse); m_cov /= nd; } // Only computes covariance. template void calc_cov(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) { clear(); if (!n) return; stats temp_a_stats; if (!pA_stats) { pA_stats = &temp_a_stats; temp_a_stats.calc(n, pA, a_stride); } stats temp_b_stats; if (!pB_stats) { pB_stats = &temp_b_stats; temp_b_stats.calc(n, pB, b_stride); } for (uint32_t i = 0; i < n; i++) { const FloatType fa = (FloatType)pA[i * a_stride]; const FloatType fb = (FloatType)pB[i * b_stride]; const FloatType da = fa - pA_stats->m_avg; const FloatType db = fb - pB_stats->m_avg; m_cov += da * db; } const FloatType nd = (FloatType)n; m_cov /= nd; } }; class stat_history { public: stat_history(uint32_t size) { init(size); } void init(uint32_t size) { clear(); m_samples.reserve(size); m_samples.resize(0); m_max_samples = size; } inline void clear() { m_samples.resize(0); m_max_samples = 0; } inline void update(double val) { m_samples.push_back(val); if (m_samples.size() > m_max_samples) m_samples.erase_index(0); } inline size_t size() { return m_samples.size(); } struct stats { double m_avg = 0; double m_std_dev = 0; double m_var = 0; double m_mad = 0; double m_min_val = 0; double m_max_val = 0; void clear() { basisu::clear_obj(*this); } }; inline void get_stats(stats& s) { s.clear(); if (m_samples.empty()) return; double total = 0, total2 = 0; for (size_t i = 0; i < m_samples.size(); i++) { const double v = m_samples[i]; total += v; total2 += v * v; if (!i) { s.m_min_val = v; s.m_max_val = v; } else { s.m_min_val = basisu::minimum(s.m_min_val, v); s.m_max_val = basisu::maximum(s.m_max_val, v); } } const double n = (double)m_samples.size(); s.m_avg = total / n; s.m_std_dev = sqrt((n * total2 - total * total)) / n; s.m_var = (n * total2 - total * total) / (n * n); double sc = 0; for (size_t i = 0; i < m_samples.size(); i++) { const double v = m_samples[i]; s.m_mad += fabs(v - s.m_avg); sc += basisu::square(v - s.m_avg); } sc = sqrt(sc / n); s.m_mad /= n; } private: uint32_t m_max_samples; basisu::vector m_samples; }; // bfloat16 helpers, see: // https://en.wikipedia.org/wiki/Bfloat16_floating-point_format typedef union { uint32_t u; float f; } float32_union; typedef uint16_t bfloat16; inline float bfloat16_to_float(bfloat16 bfloat16) { float32_union float_union; float_union.u = ((uint32_t)bfloat16) << 16; return float_union.f; } inline bfloat16 float_to_bfloat16(float input, bool round_flag = true) { float32_union float_union; float_union.f = input; uint32_t exponent = (float_union.u >> 23) & 0xFF; // Check if the number is denormalized in float32 (exponent == 0) if (exponent == 0) { // Handle denormalized float32 as zero in bfloat16 return 0x0000; } // Extract the top 16 bits (sign, exponent, and 7 most significant bits of the mantissa) uint32_t upperBits = float_union.u >> 16; if (round_flag) { // Check the most significant bit of the lower 16 bits for rounding uint32_t lowerBits = float_union.u & 0xFFFF; // Round to nearest or even if ((lowerBits & 0x8000) && ((lowerBits > 0x8000) || ((lowerBits == 0x8000) && (upperBits & 1))) ) { // Round up upperBits += 1; // Check for overflow in the exponent after rounding up if (((upperBits & 0x7F80) == 0x7F80) && ((upperBits & 0x007F) == 0)) { // Exponent overflow (the upper bits became all 1s) // Set the result to infinity upperBits = (upperBits & 0x8000) | 0x7F80; // Preserve the sign bit, set exponent to 0xFF, and mantissa to 0 } } } return (bfloat16)upperBits; } inline int bfloat16_get_exp(bfloat16 v) { return (int)((v >> 7) & 0xFF) - 127; } inline int bfloat16_get_mantissa(bfloat16 v) { return (v & 0x7F); } inline int bfloat16_get_sign(bfloat16 v) { return (v & 0x8000) ? -1 : 1; } inline bool bfloat16_is_nan_or_inf(bfloat16 v) { return ((v >> 7) & 0xFF) == 0xFF; } inline bool bfloat16_is_zero(bfloat16 v) { return (v & 0x7FFF) == 0; } inline bfloat16 bfloat16_init(int sign, int exp, int mant) { uint16_t res = (sign < 0) ? 0x8000 : 0; assert((exp >= -126) && (res <= 127)); res |= ((exp + 127) << 7); assert((mant >= 0) && (mant < 128)); res |= mant; return res; } } // namespace basisu