initial commit, 4.5 stable

2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions
--- a/thirdparty/jolt_physics/Jolt/AABBTree/AABBTreeBuilder.cpp
+++ b/thirdparty/jolt_physics/Jolt/AABBTree/AABBTreeBuilder.cpp
@@ -0,0 +1,242 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/AABBTree/AABBTreeBuilder.h>
+
+JPH_NAMESPACE_BEGIN
+
+uint AABBTreeBuilder::Node::GetMinDepth(const Array<Node> &inNodes) const
+{
+	if (HasChildren())
+	{
+		uint left = inNodes[mChild[0]].GetMinDepth(inNodes);
+		uint right = inNodes[mChild[1]].GetMinDepth(inNodes);
+		return min(left, right) + 1;
+	}
+	else
+		return 1;
+}
+
+uint AABBTreeBuilder::Node::GetMaxDepth(const Array<Node> &inNodes) const
+{
+	if (HasChildren())
+	{
+		uint left = inNodes[mChild[0]].GetMaxDepth(inNodes);
+		uint right = inNodes[mChild[1]].GetMaxDepth(inNodes);
+		return max(left, right) + 1;
+	}
+	else
+		return 1;
+}
+
+uint AABBTreeBuilder::Node::GetNodeCount(const Array<Node> &inNodes) const
+{
+	if (HasChildren())
+		return inNodes[mChild[0]].GetNodeCount(inNodes) + inNodes[mChild[1]].GetNodeCount(inNodes) + 1;
+	else
+		return 1;
+}
+
+uint AABBTreeBuilder::Node::GetLeafNodeCount(const Array<Node> &inNodes) const
+{
+	if (HasChildren())
+		return inNodes[mChild[0]].GetLeafNodeCount(inNodes) + inNodes[mChild[1]].GetLeafNodeCount(inNodes);
+	else
+		return 1;
+}
+
+uint AABBTreeBuilder::Node::GetTriangleCountInTree(const Array<Node> &inNodes) const
+{
+	if (HasChildren())
+		return inNodes[mChild[0]].GetTriangleCountInTree(inNodes) + inNodes[mChild[1]].GetTriangleCountInTree(inNodes);
+	else
+		return GetTriangleCount();
+}
+
+void AABBTreeBuilder::Node::GetTriangleCountPerNode(const Array<Node> &inNodes, float &outAverage, uint &outMin, uint &outMax) const
+{
+	outMin = INT_MAX;
+	outMax = 0;
+	outAverage = 0;
+	uint avg_divisor = 0;
+	GetTriangleCountPerNodeInternal(inNodes, outAverage, avg_divisor, outMin, outMax);
+	if (avg_divisor > 0)
+		outAverage /= avg_divisor;
+}
+
+float AABBTreeBuilder::Node::CalculateSAHCost(const Array<Node> &inNodes, float inCostTraversal, float inCostLeaf) const
+{
+	float surface_area = mBounds.GetSurfaceArea();
+	return surface_area > 0.0f? CalculateSAHCostInternal(inNodes, inCostTraversal / surface_area, inCostLeaf / surface_area) : 0.0f;
+}
+
+void AABBTreeBuilder::Node::GetNChildren(const Array<Node> &inNodes, uint inN, Array<const Node*> &outChildren) const
+{
+	JPH_ASSERT(outChildren.empty());
+
+	// Check if there is anything to expand
+	if (!HasChildren())
+		return;
+
+	// Start with the children of this node
+	outChildren.push_back(&inNodes[mChild[0]]);
+	outChildren.push_back(&inNodes[mChild[1]]);
+
+	size_t next = 0;
+	bool all_triangles = true;
+	while (outChildren.size() < inN)
+	{
+		// If we have looped over all nodes, start over with the first node again
+		if (next >= outChildren.size())
+		{
+			// If there only triangle nodes left, we have to terminate
+			if (all_triangles)
+				return;
+			next = 0;
+			all_triangles = true;
+		}
+
+		// Try to expand this node into its two children
+		const Node *to_expand = outChildren[next];
+		if (to_expand->HasChildren())
+		{
+			outChildren.erase(outChildren.begin() + next);
+			outChildren.push_back(&inNodes[to_expand->mChild[0]]);
+			outChildren.push_back(&inNodes[to_expand->mChild[1]]);
+			all_triangles = false;
+		}
+		else
+		{
+			++next;
+		}
+	}
+}
+
+float AABBTreeBuilder::Node::CalculateSAHCostInternal(const Array<Node> &inNodes, float inCostTraversalDivSurfaceArea, float inCostLeafDivSurfaceArea) const
+{
+	if (HasChildren())
+		return inCostTraversalDivSurfaceArea * mBounds.GetSurfaceArea()
+			+ inNodes[mChild[0]].CalculateSAHCostInternal(inNodes, inCostTraversalDivSurfaceArea, inCostLeafDivSurfaceArea)
+			+ inNodes[mChild[1]].CalculateSAHCostInternal(inNodes, inCostTraversalDivSurfaceArea, inCostLeafDivSurfaceArea);
+	else
+		return inCostLeafDivSurfaceArea * mBounds.GetSurfaceArea() * GetTriangleCount();
+}
+
+void AABBTreeBuilder::Node::GetTriangleCountPerNodeInternal(const Array<Node> &inNodes, float &outAverage, uint &outAverageDivisor, uint &outMin, uint &outMax) const
+{
+	if (HasChildren())
+	{
+		inNodes[mChild[0]].GetTriangleCountPerNodeInternal(inNodes, outAverage, outAverageDivisor, outMin, outMax);
+		inNodes[mChild[1]].GetTriangleCountPerNodeInternal(inNodes, outAverage, outAverageDivisor, outMin, outMax);
+	}
+	else
+	{
+		outAverage += GetTriangleCount();
+		outAverageDivisor++;
+		outMin = min(outMin, GetTriangleCount());
+		outMax = max(outMax, GetTriangleCount());
+	}
+}
+
+AABBTreeBuilder::AABBTreeBuilder(TriangleSplitter &inSplitter, uint inMaxTrianglesPerLeaf) :
+	mTriangleSplitter(inSplitter),
+	mMaxTrianglesPerLeaf(inMaxTrianglesPerLeaf)
+{
+}
+
+AABBTreeBuilder::Node *AABBTreeBuilder::Build(AABBTreeBuilderStats &outStats)
+{
+	TriangleSplitter::Range initial = mTriangleSplitter.GetInitialRange();
+
+	// Worst case for number of nodes: 1 leaf node per triangle. At each level above, the number of nodes is half that of the level below.
+	// This means that at most we'll be allocating 2x the number of triangles in nodes.
+	mNodes.reserve(2 * initial.Count());
+	mTriangles.reserve(initial.Count());
+
+	// Build the tree
+	Node &root = mNodes[BuildInternal(initial)];
+
+	// Collect stats
+	float avg_triangles_per_leaf;
+	uint min_triangles_per_leaf, max_triangles_per_leaf;
+	root.GetTriangleCountPerNode(mNodes, avg_triangles_per_leaf, min_triangles_per_leaf, max_triangles_per_leaf);
+
+	mTriangleSplitter.GetStats(outStats.mSplitterStats);
+
+	outStats.mSAHCost = root.CalculateSAHCost(mNodes, 1.0f, 1.0f);
+	outStats.mMinDepth = root.GetMinDepth(mNodes);
+	outStats.mMaxDepth = root.GetMaxDepth(mNodes);
+	outStats.mNodeCount = root.GetNodeCount(mNodes);
+	outStats.mLeafNodeCount = root.GetLeafNodeCount(mNodes);
+	outStats.mMaxTrianglesPerLeaf = mMaxTrianglesPerLeaf;
+	outStats.mTreeMinTrianglesPerLeaf = min_triangles_per_leaf;
+	outStats.mTreeMaxTrianglesPerLeaf = max_triangles_per_leaf;
+	outStats.mTreeAvgTrianglesPerLeaf = avg_triangles_per_leaf;
+
+	return &root;
+}
+
+uint AABBTreeBuilder::BuildInternal(const TriangleSplitter::Range &inTriangles)
+{
+	// Check if there are too many triangles left
+	if (inTriangles.Count() > mMaxTrianglesPerLeaf)
+	{
+		// Split triangles in two batches
+		TriangleSplitter::Range left, right;
+		if (!mTriangleSplitter.Split(inTriangles, left, right))
+		{
+			// When the trace below triggers:
+			//
+			// This code builds a tree structure to accelerate collision detection.
+			// At top level it will start with all triangles in a mesh and then divides the triangles into two batches.
+			// This process repeats until until the batch size is smaller than mMaxTrianglePerLeaf.
+			//
+			// It uses a TriangleSplitter to find a good split. When this warning triggers, the splitter was not able
+			// to create a reasonable split for the triangles. This usually happens when the triangles in a batch are
+			// intersecting. They could also be overlapping when projected on the 3 coordinate axis.
+			//
+			// To solve this issue, you could try to pass your mesh through a mesh cleaning / optimization algorithm.
+			// You could also inspect the triangles that cause this issue and see if that part of the mesh can be fixed manually.
+			//
+			// When you do not fix this warning, the tree will be less efficient for collision detection, but it will still work.
+			JPH_IF_DEBUG(Trace("AABBTreeBuilder: Doing random split for %d triangles (max per node: %u)!", (int)inTriangles.Count(), mMaxTrianglesPerLeaf);)
+			int half = inTriangles.Count() / 2;
+			JPH_ASSERT(half > 0);
+			left = TriangleSplitter::Range(inTriangles.mBegin, inTriangles.mBegin + half);
+			right = TriangleSplitter::Range(inTriangles.mBegin + half, inTriangles.mEnd);
+		}
+
+		// Recursively build
+		const uint node_index = (uint)mNodes.size();
+		mNodes.push_back(Node());
+		uint left_index = BuildInternal(left);
+		uint right_index = BuildInternal(right);
+		Node &node = mNodes[node_index];
+		node.mChild[0] = left_index;
+		node.mChild[1] = right_index;
+		node.mBounds = mNodes[node.mChild[0]].mBounds;
+		node.mBounds.Encapsulate(mNodes[node.mChild[1]].mBounds);
+		return node_index;
+	}
+
+	// Create leaf node
+	const uint node_index = (uint)mNodes.size();
+	mNodes.push_back(Node());
+	Node &node = mNodes.back();
+	node.mTrianglesBegin = (uint)mTriangles.size();
+	node.mNumTriangles = inTriangles.mEnd - inTriangles.mBegin;
+	const VertexList &v = mTriangleSplitter.GetVertices();
+	for (uint i = inTriangles.mBegin; i < inTriangles.mEnd; ++i)
+	{
+		const IndexedTriangle &t = mTriangleSplitter.GetTriangle(i);
+		mTriangles.push_back(t);
+		node.mBounds.Encapsulate(v, t);
+	}
+
+	return node_index;
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/AABBTree/AABBTreeBuilder.h
+++ b/thirdparty/jolt_physics/Jolt/AABBTree/AABBTreeBuilder.h
@@ -0,0 +1,118 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/TriangleSplitter/TriangleSplitter.h>
+#include <Jolt/Geometry/AABox.h>
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+struct AABBTreeBuilderStats
+{
+	///@name Splitter stats
+	TriangleSplitter::Stats	mSplitterStats;							///< Stats returned by the triangle splitter algorithm
+
+	///@name Tree structure
+	float					mSAHCost = 0.0f;						///< Surface Area Heuristic cost of this tree
+	int						mMinDepth = 0;							///< Minimal depth of tree (number of nodes)
+	int						mMaxDepth = 0;							///< Maximum depth of tree (number of nodes)
+	int						mNodeCount = 0;							///< Number of nodes in the tree
+	int						mLeafNodeCount = 0;						///< Number of leaf nodes (that contain triangles)
+
+	///@name Configured stats
+	int						mMaxTrianglesPerLeaf = 0;				///< Configured max triangles per leaf
+
+	///@name Actual stats
+	int						mTreeMinTrianglesPerLeaf = 0;			///< Minimal amount of triangles in a leaf
+	int						mTreeMaxTrianglesPerLeaf = 0;			///< Maximal amount of triangles in a leaf
+	float					mTreeAvgTrianglesPerLeaf = 0.0f;		///< Average amount of triangles in leaf nodes
+};
+
+/// Helper class to build an AABB tree
+class JPH_EXPORT AABBTreeBuilder
+{
+public:
+	/// A node in the tree, contains the AABox for the tree and any child nodes or triangles
+	class Node
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		/// Indicates that there is no child
+		static constexpr uint cInvalidNodeIndex = ~uint(0);
+
+		/// Get number of triangles in this node
+		inline uint			GetTriangleCount() const				{ return mNumTriangles; }
+
+		/// Check if this node has any children
+		inline bool			HasChildren() const						{ return mChild[0] != cInvalidNodeIndex || mChild[1] != cInvalidNodeIndex; }
+
+		/// Min depth of tree
+		uint				GetMinDepth(const Array<Node> &inNodes) const;
+
+		/// Max depth of tree
+		uint				GetMaxDepth(const Array<Node> &inNodes) const;
+
+		/// Number of nodes in tree
+		uint				GetNodeCount(const Array<Node> &inNodes) const;
+
+		/// Number of leaf nodes in tree
+		uint				GetLeafNodeCount(const Array<Node> &inNodes) const;
+
+		/// Get triangle count in tree
+		uint				GetTriangleCountInTree(const Array<Node> &inNodes) const;
+
+		/// Calculate min and max triangles per node
+		void				GetTriangleCountPerNode(const Array<Node> &inNodes, float &outAverage, uint &outMin, uint &outMax) const;
+
+		/// Calculate the total cost of the tree using the surface area heuristic
+		float				CalculateSAHCost(const Array<Node> &inNodes, float inCostTraversal, float inCostLeaf) const;
+
+		/// Recursively get children (breadth first) to get in total inN children (or less if there are no more)
+		void				GetNChildren(const Array<Node> &inNodes, uint inN, Array<const Node *> &outChildren) const;
+
+		/// Bounding box
+		AABox				mBounds;
+
+		/// Triangles (if no child nodes)
+		uint				mTrianglesBegin; // Index into mTriangles
+		uint				mNumTriangles = 0;
+
+		/// Child node indices (if no triangles)
+		uint				mChild[2] = { cInvalidNodeIndex, cInvalidNodeIndex };
+
+	private:
+		friend class AABBTreeBuilder;
+
+		/// Recursive helper function to calculate cost of the tree
+		float				CalculateSAHCostInternal(const Array<Node> &inNodes, float inCostTraversalDivSurfaceArea, float inCostLeafDivSurfaceArea) const;
+
+		/// Recursive helper function to calculate min and max triangles per node
+		void				GetTriangleCountPerNodeInternal(const Array<Node> &inNodes, float &outAverage, uint &outAverageDivisor, uint &outMin, uint &outMax) const;
+	};
+
+	/// Constructor
+							AABBTreeBuilder(TriangleSplitter &inSplitter, uint inMaxTrianglesPerLeaf = 16);
+
+	/// Recursively build tree, returns the root node of the tree
+	Node *					Build(AABBTreeBuilderStats &outStats);
+
+	/// Get all nodes
+	const Array<Node> &		GetNodes() const						{ return mNodes; }
+
+	/// Get all triangles
+	const Array<IndexedTriangle> &GetTriangles() const				{ return mTriangles; }
+
+private:
+	uint					BuildInternal(const TriangleSplitter::Range &inTriangles);
+
+	TriangleSplitter &		mTriangleSplitter;
+	const uint				mMaxTrianglesPerLeaf;
+	Array<Node>				mNodes;
+	Array<IndexedTriangle>	mTriangles;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/AABBTree/AABBTreeToBuffer.h
+++ b/thirdparty/jolt_physics/Jolt/AABBTree/AABBTreeToBuffer.h
@@ -0,0 +1,296 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/AABBTree/AABBTreeBuilder.h>
+#include <Jolt/Core/ByteBuffer.h>
+#include <Jolt/Geometry/IndexedTriangle.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Conversion algorithm that converts an AABB tree to an optimized binary buffer
+template <class TriangleCodec, class NodeCodec>
+class AABBTreeToBuffer
+{
+public:
+	/// Header for the tree
+	using NodeHeader = typename NodeCodec::Header;
+
+	/// Size in bytes of the header of the tree
+	static const int HeaderSize = NodeCodec::HeaderSize;
+
+	/// Maximum number of children per node in the tree
+	static const int NumChildrenPerNode = NodeCodec::NumChildrenPerNode;
+
+	/// Header for the triangles
+	using TriangleHeader = typename TriangleCodec::TriangleHeader;
+
+	/// Size in bytes of the header for the triangles
+	static const int TriangleHeaderSize = TriangleCodec::TriangleHeaderSize;
+
+	/// Convert AABB tree. Returns false if failed.
+	bool							Convert(const Array<IndexedTriangle> &inTriangles, const Array<AABBTreeBuilder::Node> &inNodes, const VertexList &inVertices, const AABBTreeBuilder::Node *inRoot, bool inStoreUserData, const char *&outError)
+	{
+		typename NodeCodec::EncodingContext node_ctx;
+		typename TriangleCodec::EncodingContext tri_ctx(inVertices);
+
+		// Child nodes out of loop so we don't constantly realloc it
+		Array<const AABBTreeBuilder::Node *> child_nodes;
+		child_nodes.reserve(NumChildrenPerNode);
+
+		// First calculate how big the tree is going to be.
+		// Since the tree can be huge for very large meshes, we don't want
+		// to reallocate the buffer as it may cause out of memory situations.
+		// This loop mimics the construction loop below.
+		uint64 total_size = HeaderSize + TriangleHeaderSize;
+		size_t node_count = 1; // Start with root node
+		size_t to_process_max_size = 1; // Track size of queues so we can do a single reserve below
+		size_t to_process_triangles_max_size = 0;
+		{	// A scope to free the memory associated with to_estimate and to_estimate_triangles
+			Array<const AABBTreeBuilder::Node *> to_estimate;
+			Array<const AABBTreeBuilder::Node *> to_estimate_triangles;
+			to_estimate.push_back(inRoot);
+			for (;;)
+			{
+				while (!to_estimate.empty())
+				{
+					// Get the next node to process
+					const AABBTreeBuilder::Node *node = to_estimate.back();
+					to_estimate.pop_back();
+
+					// Update total size
+					node_ctx.PrepareNodeAllocate(node, total_size);
+
+					if (node->HasChildren())
+					{
+						// Collect the first NumChildrenPerNode sub-nodes in the tree
+						child_nodes.clear(); // Won't free the memory
+						node->GetNChildren(inNodes, NumChildrenPerNode, child_nodes);
+
+						// Increment the number of nodes we're going to store
+						node_count += child_nodes.size();
+
+						// Insert in reverse order so we estimate left child first when taking nodes from the back
+						for (int idx = int(child_nodes.size()) - 1; idx >= 0; --idx)
+						{
+							// Store triangles in separate list so we process them last
+							const AABBTreeBuilder::Node *child = child_nodes[idx];
+							if (child->HasChildren())
+							{
+								to_estimate.push_back(child);
+								to_process_max_size = max(to_estimate.size(), to_process_max_size);
+							}
+							else
+							{
+								to_estimate_triangles.push_back(child);
+								to_process_triangles_max_size = max(to_estimate_triangles.size(), to_process_triangles_max_size);
+							}
+						}
+					}
+					else
+					{
+						// Update total size
+						tri_ctx.PreparePack(&inTriangles[node->mTrianglesBegin], node->mNumTriangles, inStoreUserData, total_size);
+					}
+				}
+
+				// If we've got triangles to estimate, loop again with just the triangles
+				if (to_estimate_triangles.empty())
+					break;
+				else
+					to_estimate.swap(to_estimate_triangles);
+			}
+		}
+
+		// Finalize the prepare stage for the triangle context
+		tri_ctx.FinalizePreparePack(total_size);
+
+		// Reserve the buffer
+		if (size_t(total_size) != total_size)
+		{
+			outError = "AABBTreeToBuffer: Out of memory!";
+			return false;
+		}
+		mTree.reserve(size_t(total_size));
+
+		// Add headers
+		NodeHeader *header = HeaderSize > 0? mTree.Allocate<NodeHeader>() : nullptr;
+		TriangleHeader *triangle_header = TriangleHeaderSize > 0? mTree.Allocate<TriangleHeader>() : nullptr;
+
+		struct NodeData
+		{
+			const AABBTreeBuilder::Node *	mNode = nullptr;							// Node that this entry belongs to
+			Vec3							mNodeBoundsMin;								// Quantized node bounds
+			Vec3							mNodeBoundsMax;
+			size_t							mNodeStart = size_t(-1);					// Start of node in mTree
+			size_t							mTriangleStart = size_t(-1);				// Start of the triangle data in mTree
+			size_t							mChildNodeStart[NumChildrenPerNode];		// Start of the children of the node in mTree
+			size_t							mChildTrianglesStart[NumChildrenPerNode];	// Start of the triangle data in mTree
+			size_t *						mParentChildNodeStart = nullptr;			// Where to store mNodeStart (to patch mChildNodeStart of my parent)
+			size_t *						mParentTrianglesStart = nullptr;			// Where to store mTriangleStart (to patch mChildTrianglesStart of my parent)
+			uint							mNumChildren = 0;							// Number of children
+		};
+
+		Array<NodeData *> to_process;
+		to_process.reserve(to_process_max_size);
+		Array<NodeData *> to_process_triangles;
+		to_process_triangles.reserve(to_process_triangles_max_size);
+		Array<NodeData> node_list;
+		node_list.reserve(node_count); // Needed to ensure that array is not reallocated, so we can keep pointers in the array
+
+		NodeData root;
+		root.mNode = inRoot;
+		root.mNodeBoundsMin = inRoot->mBounds.mMin;
+		root.mNodeBoundsMax = inRoot->mBounds.mMax;
+		node_list.push_back(root);
+		to_process.push_back(&node_list.back());
+
+		for (;;)
+		{
+			while (!to_process.empty())
+			{
+				// Get the next node to process
+				NodeData *node_data = to_process.back();
+				to_process.pop_back();
+
+				// Due to quantization box could have become bigger, not smaller
+				JPH_ASSERT(AABox(node_data->mNodeBoundsMin, node_data->mNodeBoundsMax).Contains(node_data->mNode->mBounds), "AABBTreeToBuffer: Bounding box became smaller!");
+
+				// Collect the first NumChildrenPerNode sub-nodes in the tree
+				child_nodes.clear(); // Won't free the memory
+				node_data->mNode->GetNChildren(inNodes, NumChildrenPerNode, child_nodes);
+				node_data->mNumChildren = (uint)child_nodes.size();
+
+				// Fill in default child bounds
+				Vec3 child_bounds_min[NumChildrenPerNode], child_bounds_max[NumChildrenPerNode];
+				for (size_t i = 0; i < NumChildrenPerNode; ++i)
+					if (i < child_nodes.size())
+					{
+						child_bounds_min[i] = child_nodes[i]->mBounds.mMin;
+						child_bounds_max[i] = child_nodes[i]->mBounds.mMax;
+					}
+					else
+					{
+						child_bounds_min[i] = Vec3::sZero();
+						child_bounds_max[i] = Vec3::sZero();
+					}
+
+				// Start a new node
+				node_data->mNodeStart = node_ctx.NodeAllocate(node_data->mNode, node_data->mNodeBoundsMin, node_data->mNodeBoundsMax, child_nodes, child_bounds_min, child_bounds_max, mTree, outError);
+				if (node_data->mNodeStart == size_t(-1))
+					return false;
+
+				if (node_data->mNode->HasChildren())
+				{
+					// Insert in reverse order so we process left child first when taking nodes from the back
+					for (int idx = int(child_nodes.size()) - 1; idx >= 0; --idx)
+					{
+						const AABBTreeBuilder::Node *child_node = child_nodes[idx];
+
+						// Due to quantization box could have become bigger, not smaller
+						JPH_ASSERT(AABox(child_bounds_min[idx], child_bounds_max[idx]).Contains(child_node->mBounds), "AABBTreeToBuffer: Bounding box became smaller!");
+
+						// Add child to list of nodes to be processed
+						NodeData child;
+						child.mNode = child_node;
+						child.mNodeBoundsMin = child_bounds_min[idx];
+						child.mNodeBoundsMax = child_bounds_max[idx];
+						child.mParentChildNodeStart = &node_data->mChildNodeStart[idx];
+						child.mParentTrianglesStart = &node_data->mChildTrianglesStart[idx];
+						node_list.push_back(child);
+
+						// Store triangles in separate list so we process them last
+						if (child_node->HasChildren())
+							to_process.push_back(&node_list.back());
+						else
+							to_process_triangles.push_back(&node_list.back());
+					}
+				}
+				else
+				{
+					// Add triangles
+					node_data->mTriangleStart = tri_ctx.Pack(&inTriangles[node_data->mNode->mTrianglesBegin], node_data->mNode->mNumTriangles, inStoreUserData, mTree, outError);
+					if (node_data->mTriangleStart == size_t(-1))
+						return false;
+				}
+
+				// Patch offset into parent
+				if (node_data->mParentChildNodeStart != nullptr)
+				{
+					*node_data->mParentChildNodeStart = node_data->mNodeStart;
+					*node_data->mParentTrianglesStart = node_data->mTriangleStart;
+				}
+			}
+
+			// If we've got triangles to process, loop again with just the triangles
+			if (to_process_triangles.empty())
+				break;
+			else
+				to_process.swap(to_process_triangles);
+		}
+
+		// Assert that our reservation was correct (we don't know if we swapped the arrays or not)
+		JPH_ASSERT(to_process_max_size == to_process.capacity() || to_process_triangles_max_size == to_process.capacity());
+		JPH_ASSERT(to_process_max_size == to_process_triangles.capacity() || to_process_triangles_max_size == to_process_triangles.capacity());
+
+		// Finalize all nodes
+		for (NodeData &n : node_list)
+			if (!node_ctx.NodeFinalize(n.mNode, n.mNodeStart, n.mNumChildren, n.mChildNodeStart, n.mChildTrianglesStart, mTree, outError))
+				return false;
+
+		// Finalize the triangles
+		tri_ctx.Finalize(inVertices, triangle_header, mTree);
+
+		// Validate that our reservations were correct
+		if (node_count != node_list.size())
+		{
+			outError = "Internal Error: Node memory estimate was incorrect, memory corruption!";
+			return false;
+		}
+		if (total_size != mTree.size())
+		{
+			outError = "Internal Error: Tree memory estimate was incorrect, memory corruption!";
+			return false;
+		}
+
+		// Finalize the nodes
+		return node_ctx.Finalize(header, inRoot, node_list[0].mNodeStart, node_list[0].mTriangleStart, outError);
+	}
+
+	/// Get resulting data
+	inline const ByteBuffer &		GetBuffer() const
+	{
+		return mTree;
+	}
+
+	/// Get resulting data
+	inline ByteBuffer &				GetBuffer()
+	{
+		return mTree;
+	}
+
+	/// Get header for tree
+	inline const NodeHeader *		GetNodeHeader() const
+	{
+		return mTree.Get<NodeHeader>(0);
+	}
+
+	/// Get header for triangles
+	inline const TriangleHeader *	GetTriangleHeader() const
+	{
+		return mTree.Get<TriangleHeader>(HeaderSize);
+	}
+
+	/// Get root of resulting tree
+	inline const void *				GetRoot() const
+	{
+		return mTree.Get<void>(HeaderSize + TriangleHeaderSize);
+	}
+
+private:
+	ByteBuffer						mTree;									///< Resulting tree structure
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
+++ b/thirdparty/jolt_physics/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
@@ -0,0 +1,323 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/ByteBuffer.h>
+#include <Jolt/Math/HalfFloat.h>
+#include <Jolt/AABBTree/AABBTreeBuilder.h>
+
+JPH_NAMESPACE_BEGIN
+
+class NodeCodecQuadTreeHalfFloat
+{
+public:
+	/// Number of child nodes of this node
+	static constexpr int				NumChildrenPerNode = 4;
+
+	/// Header for the tree
+	struct Header
+	{
+		Float3							mRootBoundsMin;
+		Float3							mRootBoundsMax;
+		uint32							mRootProperties;
+		uint8							mBlockIDBits;			///< Number of bits to address a triangle block
+		uint8							mPadding[3] = { 0 };
+	};
+
+	/// Size of the header (an empty struct is always > 0 bytes so this needs a separate variable)
+	static constexpr int				HeaderSize = sizeof(Header);
+
+	/// Stack size to use during DecodingContext::sWalkTree
+	static constexpr int				StackSize = 128;
+
+	/// Node properties
+	enum : uint32
+	{
+		TRIANGLE_COUNT_BITS				= 4,
+		TRIANGLE_COUNT_SHIFT			= 28,
+		TRIANGLE_COUNT_MASK				= (1 << TRIANGLE_COUNT_BITS) - 1,
+		OFFSET_BITS						= 28,
+		OFFSET_MASK						= (1 << OFFSET_BITS) - 1,
+		OFFSET_NON_SIGNIFICANT_BITS		= 2,
+		OFFSET_NON_SIGNIFICANT_MASK		= (1 << OFFSET_NON_SIGNIFICANT_BITS) - 1,
+	};
+
+	/// Node structure
+	struct Node
+	{
+		HalfFloat						mBoundsMinX[4];			///< 4 child bounding boxes
+		HalfFloat						mBoundsMinY[4];
+		HalfFloat						mBoundsMinZ[4];
+		HalfFloat						mBoundsMaxX[4];
+		HalfFloat						mBoundsMaxY[4];
+		HalfFloat						mBoundsMaxZ[4];
+		uint32							mNodeProperties[4];		///< 4 child node properties
+	};
+
+	static_assert(sizeof(Node) == 64, "Node should be 64 bytes");
+
+	/// This class encodes and compresses quad tree nodes
+	class EncodingContext
+	{
+	public:
+		/// Mimics the size a call to NodeAllocate() would add to the buffer
+		void							PrepareNodeAllocate(const AABBTreeBuilder::Node *inNode, uint64 &ioBufferSize) const
+		{
+			// We don't emit nodes for leafs
+			if (!inNode->HasChildren())
+				return;
+
+			// Add size of node
+			ioBufferSize += sizeof(Node);
+		}
+
+		/// Allocate a new node for inNode.
+		/// Algorithm can modify the order of ioChildren to indicate in which order children should be compressed
+		/// Algorithm can enlarge the bounding boxes of the children during compression and returns these in outChildBoundsMin, outChildBoundsMax
+		/// inNodeBoundsMin, inNodeBoundsMax is the bounding box if inNode possibly widened by compressing the parent node
+		/// Returns size_t(-1) on error and reports the error in outError
+		size_t							NodeAllocate(const AABBTreeBuilder::Node *inNode, Vec3Arg inNodeBoundsMin, Vec3Arg inNodeBoundsMax, Array<const AABBTreeBuilder::Node *> &ioChildren, Vec3 outChildBoundsMin[NumChildrenPerNode], Vec3 outChildBoundsMax[NumChildrenPerNode], ByteBuffer &ioBuffer, const char *&outError) const
+		{
+			// We don't emit nodes for leafs
+			if (!inNode->HasChildren())
+				return ioBuffer.size();
+
+			// Remember the start of the node
+			size_t node_start = ioBuffer.size();
+
+			// Fill in bounds
+			Node *node = ioBuffer.Allocate<Node>();
+
+			for (size_t i = 0; i < 4; ++i)
+			{
+				if (i < ioChildren.size())
+				{
+					const AABBTreeBuilder::Node *this_node = ioChildren[i];
+
+					// Copy bounding box
+					node->mBoundsMinX[i] = HalfFloatConversion::FromFloat<HalfFloatConversion::ROUND_TO_NEG_INF>(this_node->mBounds.mMin.GetX());
+					node->mBoundsMinY[i] = HalfFloatConversion::FromFloat<HalfFloatConversion::ROUND_TO_NEG_INF>(this_node->mBounds.mMin.GetY());
+					node->mBoundsMinZ[i] = HalfFloatConversion::FromFloat<HalfFloatConversion::ROUND_TO_NEG_INF>(this_node->mBounds.mMin.GetZ());
+					node->mBoundsMaxX[i] = HalfFloatConversion::FromFloat<HalfFloatConversion::ROUND_TO_POS_INF>(this_node->mBounds.mMax.GetX());
+					node->mBoundsMaxY[i] = HalfFloatConversion::FromFloat<HalfFloatConversion::ROUND_TO_POS_INF>(this_node->mBounds.mMax.GetY());
+					node->mBoundsMaxZ[i] = HalfFloatConversion::FromFloat<HalfFloatConversion::ROUND_TO_POS_INF>(this_node->mBounds.mMax.GetZ());
+
+					// Store triangle count
+					node->mNodeProperties[i] = this_node->GetTriangleCount() << TRIANGLE_COUNT_SHIFT;
+					if (this_node->GetTriangleCount() >= TRIANGLE_COUNT_MASK)
+					{
+						outError = "NodeCodecQuadTreeHalfFloat: Too many triangles";
+						return size_t(-1);
+					}
+				}
+				else
+				{
+					// Make this an invalid triangle node
+					node->mNodeProperties[i] = uint32(TRIANGLE_COUNT_MASK) << TRIANGLE_COUNT_SHIFT;
+
+					// Make bounding box invalid
+					node->mBoundsMinX[i] = HALF_FLT_MAX;
+					node->mBoundsMinY[i] = HALF_FLT_MAX;
+					node->mBoundsMinZ[i] = HALF_FLT_MAX;
+					node->mBoundsMaxX[i] = HALF_FLT_MAX;
+					node->mBoundsMaxY[i] = HALF_FLT_MAX;
+					node->mBoundsMaxZ[i] = HALF_FLT_MAX;
+				}
+			}
+
+			// Since we don't keep track of the bounding box while descending the tree, we keep the root bounds at all levels for triangle compression
+			for (int i = 0; i < NumChildrenPerNode; ++i)
+			{
+				outChildBoundsMin[i] = inNodeBoundsMin;
+				outChildBoundsMax[i] = inNodeBoundsMax;
+			}
+
+			return node_start;
+		}
+
+		/// Once all nodes have been added, this call finalizes all nodes by patching in the offsets of the child nodes (that were added after the node itself was added)
+		bool						NodeFinalize(const AABBTreeBuilder::Node *inNode, size_t inNodeStart, uint inNumChildren, const size_t *inChildrenNodeStart, const size_t *inChildrenTrianglesStart, ByteBuffer &ioBuffer, const char *&outError)
+		{
+			if (!inNode->HasChildren())
+				return true;
+
+			Node *node = ioBuffer.Get<Node>(inNodeStart);
+			for (uint i = 0; i < inNumChildren; ++i)
+			{
+				size_t offset;
+				if (node->mNodeProperties[i] != 0)
+				{
+					// This is a triangle block
+					offset = inChildrenTrianglesStart[i];
+
+					// Store highest block with triangles so we can count the number of bits we need
+					mHighestTriangleBlock = max(mHighestTriangleBlock, offset);
+				}
+				else
+				{
+					// This is a node block
+					offset = inChildrenNodeStart[i];
+				}
+
+				// Store offset of next node / triangles
+				if (offset & OFFSET_NON_SIGNIFICANT_MASK)
+				{
+					outError = "NodeCodecQuadTreeHalfFloat: Internal Error: Offset has non-significant bits set";
+					return false;
+				}
+				offset >>= OFFSET_NON_SIGNIFICANT_BITS;
+				if (offset > OFFSET_MASK)
+				{
+					outError = "NodeCodecQuadTreeHalfFloat: Offset too large. Too much data.";
+					return false;
+				}
+				node->mNodeProperties[i] |= uint32(offset);
+			}
+
+			return true;
+		}
+
+		/// Once all nodes have been finalized, this will finalize the header of the nodes
+		bool						Finalize(Header *outHeader, const AABBTreeBuilder::Node *inRoot, size_t inRootNodeStart, size_t inRootTrianglesStart, const char *&outError) const
+		{
+			// Check if we can address the root node
+			size_t offset = inRoot->HasChildren()? inRootNodeStart : inRootTrianglesStart;
+			if (offset & OFFSET_NON_SIGNIFICANT_MASK)
+			{
+				outError = "NodeCodecQuadTreeHalfFloat: Internal Error: Offset has non-significant bits set";
+				return false;
+			}
+			offset >>= OFFSET_NON_SIGNIFICANT_BITS;
+			if (offset > OFFSET_MASK)
+			{
+				outError = "NodeCodecQuadTreeHalfFloat: Offset too large. Too much data.";
+				return false;
+			}
+
+			// If the root has triangles, we need to take that offset instead since the mHighestTriangleBlock will be zero
+			size_t highest_triangle_block = inRootTrianglesStart != size_t(-1)? inRootTrianglesStart : mHighestTriangleBlock;
+			highest_triangle_block >>= OFFSET_NON_SIGNIFICANT_BITS;
+
+			inRoot->mBounds.mMin.StoreFloat3(&outHeader->mRootBoundsMin);
+			inRoot->mBounds.mMax.StoreFloat3(&outHeader->mRootBoundsMax);
+			outHeader->mRootProperties = uint32(offset) + (inRoot->GetTriangleCount() << TRIANGLE_COUNT_SHIFT);
+			outHeader->mBlockIDBits = uint8(32 - CountLeadingZeros(uint32(highest_triangle_block)));
+			if (inRoot->GetTriangleCount() >= TRIANGLE_COUNT_MASK)
+			{
+				outError = "NodeCodecQuadTreeHalfFloat: Too many triangles";
+				return false;
+			}
+
+			return true;
+		}
+
+	private:
+		size_t						mHighestTriangleBlock = 0;
+	};
+
+	/// This class decodes and decompresses quad tree nodes
+	class DecodingContext
+	{
+	public:
+		/// Get the amount of bits needed to store an ID to a triangle block
+		inline static uint			sTriangleBlockIDBits(const Header *inHeader)
+		{
+			return inHeader->mBlockIDBits;
+		}
+
+		/// Convert a triangle block ID to the start of the triangle buffer
+		inline static const void *	sGetTriangleBlockStart(const uint8 *inBufferStart, uint inTriangleBlockID)
+		{
+			return inBufferStart + (inTriangleBlockID << OFFSET_NON_SIGNIFICANT_BITS);
+		}
+
+		/// Constructor
+		JPH_INLINE explicit			DecodingContext(const Header *inHeader)
+		{
+			// Start with the root node on the stack
+			mNodeStack[0] = inHeader->mRootProperties;
+		}
+
+		/// Walk the node tree calling the Visitor::VisitNodes for each node encountered and Visitor::VisitTriangles for each triangle encountered
+		template <class TriangleContext, class Visitor>
+		JPH_INLINE void				WalkTree(const uint8 *inBufferStart, const TriangleContext &inTriangleContext, Visitor &ioVisitor)
+		{
+			do
+			{
+				// Test if node contains triangles
+				uint32 node_properties = mNodeStack[mTop];
+				uint32 tri_count = node_properties >> TRIANGLE_COUNT_SHIFT;
+				if (tri_count == 0)
+				{
+					const Node *node = reinterpret_cast<const Node *>(inBufferStart + (node_properties << OFFSET_NON_SIGNIFICANT_BITS));
+
+					// Unpack bounds
+				#ifdef JPH_CPU_BIG_ENDIAN
+					Vec4 bounds_minx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinX[0] + (node->mBoundsMinX[1] << 16), node->mBoundsMinX[2] + (node->mBoundsMinX[3] << 16), 0, 0));
+					Vec4 bounds_miny = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinY[0] + (node->mBoundsMinY[1] << 16), node->mBoundsMinY[2] + (node->mBoundsMinY[3] << 16), 0, 0));
+					Vec4 bounds_minz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinZ[0] + (node->mBoundsMinZ[1] << 16), node->mBoundsMinZ[2] + (node->mBoundsMinZ[3] << 16), 0, 0));
+
+					Vec4 bounds_maxx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxX[0] + (node->mBoundsMaxX[1] << 16), node->mBoundsMaxX[2] + (node->mBoundsMaxX[3] << 16), 0, 0));
+					Vec4 bounds_maxy = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxY[0] + (node->mBoundsMaxY[1] << 16), node->mBoundsMaxY[2] + (node->mBoundsMaxY[3] << 16), 0, 0));
+					Vec4 bounds_maxz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxZ[0] + (node->mBoundsMaxZ[1] << 16), node->mBoundsMaxZ[2] + (node->mBoundsMaxZ[3] << 16), 0, 0));
+				#else
+					UVec4 bounds_minxy = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMinX[0]));
+					Vec4 bounds_minx = HalfFloatConversion::ToFloat(bounds_minxy);
+					Vec4 bounds_miny = HalfFloatConversion::ToFloat(bounds_minxy.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
+
+					UVec4 bounds_minzmaxx = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMinZ[0]));
+					Vec4 bounds_minz = HalfFloatConversion::ToFloat(bounds_minzmaxx);
+					Vec4 bounds_maxx = HalfFloatConversion::ToFloat(bounds_minzmaxx.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
+
+					UVec4 bounds_maxyz = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMaxY[0]));
+					Vec4 bounds_maxy = HalfFloatConversion::ToFloat(bounds_maxyz);
+					Vec4 bounds_maxz = HalfFloatConversion::ToFloat(bounds_maxyz.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
+				#endif
+
+					// Load properties for 4 children
+					UVec4 properties = UVec4::sLoadInt4(&node->mNodeProperties[0]);
+
+					// Check which sub nodes to visit
+					int num_results = ioVisitor.VisitNodes(bounds_minx, bounds_miny, bounds_minz, bounds_maxx, bounds_maxy, bounds_maxz, properties, mTop);
+
+					// Push them onto the stack
+					JPH_ASSERT(mTop + 4 < StackSize);
+					properties.StoreInt4(&mNodeStack[mTop]);
+					mTop += num_results;
+				}
+				else if (tri_count != TRIANGLE_COUNT_MASK) // TRIANGLE_COUNT_MASK indicates a padding node, normally we shouldn't visit these nodes but when querying with a big enough box you could touch HALF_FLT_MAX (about 65K)
+				{
+					// Node contains triangles, do individual tests
+					uint32 triangle_block_id = node_properties & OFFSET_MASK;
+					const void *triangles = sGetTriangleBlockStart(inBufferStart, triangle_block_id);
+
+					ioVisitor.VisitTriangles(inTriangleContext, triangles, tri_count, triangle_block_id);
+				}
+
+				// Check if we're done
+				if (ioVisitor.ShouldAbort())
+					break;
+
+				// Fetch next node until we find one that the visitor wants to see
+				do
+					--mTop;
+				while (mTop >= 0 && !ioVisitor.ShouldVisitNode(mTop));
+			}
+			while (mTop >= 0);
+		}
+
+		/// This can be used to have the visitor early out (ioVisitor.ShouldAbort() returns true) and later continue again (call WalkTree() again)
+		bool						IsDoneWalking() const
+		{
+			return mTop < 0;
+		}
+
+	private:
+		uint32						mNodeStack[StackSize];
+		int							mTop = 0;
+	};
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
+++ b/thirdparty/jolt_physics/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
@@ -0,0 +1,555 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/RayTriangle.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Store vertices in 64 bits and indices in 8 bits + 8 bit of flags per triangle like this:
+///
+/// TriangleBlockHeader,
+/// TriangleBlock (4 triangles and their flags in 16 bytes),
+/// TriangleBlock...
+/// [Optional] UserData (4 bytes per triangle)
+///
+/// Vertices are stored:
+///
+/// VertexData (1 vertex in 64 bits),
+/// VertexData...
+///
+/// They're compressed relative to the bounding box as provided by the node codec.
+class TriangleCodecIndexed8BitPackSOA4Flags
+{
+public:
+	class TriangleHeader
+	{
+	public:
+		Float3						mOffset;			///< Offset of all vertices
+		Float3						mScale;				///< Scale of all vertices, vertex_position = mOffset + mScale * compressed_vertex_position
+	};
+
+	/// Size of the header (an empty struct is always > 0 bytes so this needs a separate variable)
+	static constexpr int			TriangleHeaderSize = sizeof(TriangleHeader);
+
+	/// If this codec could return a different offset than the current buffer size when calling Pack()
+	static constexpr bool			ChangesOffsetOnPack = false;
+
+	/// Amount of bits per component
+	enum EComponentData : uint32
+	{
+		COMPONENT_BITS = 21,
+		COMPONENT_MASK = (1 << COMPONENT_BITS) - 1,
+	};
+
+	/// Packed X and Y coordinate
+	enum EVertexXY : uint32
+	{
+		COMPONENT_X = 0,
+		COMPONENT_Y1 = COMPONENT_BITS,
+		COMPONENT_Y1_BITS = 32 - COMPONENT_BITS,
+	};
+
+	/// Packed Z and Y coordinate
+	enum EVertexZY : uint32
+	{
+		COMPONENT_Z = 0,
+		COMPONENT_Y2 = COMPONENT_BITS,
+		COMPONENT_Y2_BITS = 31 - COMPONENT_BITS,
+	};
+
+	/// A single packed vertex
+	struct VertexData
+	{
+		uint32						mVertexXY;
+		uint32						mVertexZY;
+	};
+
+	static_assert(sizeof(VertexData) == 8, "Compiler added padding");
+
+	/// A block of 4 triangles
+	struct TriangleBlock
+	{
+		uint8						mIndices[3][4];				///< 8 bit indices to triangle vertices for 4 triangles in the form mIndices[vertex][triangle] where vertex in [0, 2] and triangle in [0, 3]
+		uint8						mFlags[4];					///< Triangle flags (could contain material and active edges)
+	};
+
+	static_assert(sizeof(TriangleBlock) == 16, "Compiler added padding");
+
+	enum ETriangleBlockHeaderFlags : uint32
+	{
+		OFFSET_TO_VERTICES_BITS = 29,							///< Offset from current block to start of vertices in bytes
+		OFFSET_TO_VERTICES_MASK = (1 << OFFSET_TO_VERTICES_BITS) - 1,
+		OFFSET_NON_SIGNIFICANT_BITS = 2,						///< The offset from the current block to the start of the vertices must be a multiple of 4 bytes
+		OFFSET_NON_SIGNIFICANT_MASK = (1 << OFFSET_NON_SIGNIFICANT_BITS) - 1,
+		OFFSET_TO_USERDATA_BITS = 3,							///< When user data is stored, this is the number of blocks to skip to get to the user data (0 = no user data)
+		OFFSET_TO_USERDATA_MASK = (1 << OFFSET_TO_USERDATA_BITS) - 1,
+	};
+
+	/// A triangle header, will be followed by one or more TriangleBlocks
+	struct TriangleBlockHeader
+	{
+		const VertexData *			GetVertexData() const		{ return reinterpret_cast<const VertexData *>(reinterpret_cast<const uint8 *>(this) + ((mFlags & OFFSET_TO_VERTICES_MASK) << OFFSET_NON_SIGNIFICANT_BITS)); }
+		const TriangleBlock *		GetTriangleBlock() const	{ return reinterpret_cast<const TriangleBlock *>(reinterpret_cast<const uint8 *>(this) + sizeof(TriangleBlockHeader)); }
+		const uint32 *				GetUserData() const			{ uint32 offset = mFlags >> OFFSET_TO_VERTICES_BITS; return offset == 0? nullptr : reinterpret_cast<const uint32 *>(GetTriangleBlock() + offset); }
+
+		uint32						mFlags;
+	};
+
+	static_assert(sizeof(TriangleBlockHeader) == 4, "Compiler added padding");
+
+	/// This class is used to validate that the triangle data will not be degenerate after compression
+	class ValidationContext
+	{
+	public:
+		/// Constructor
+									ValidationContext(const IndexedTriangleList &inTriangles, const VertexList &inVertices) :
+			mVertices(inVertices)
+		{
+			// Only used the referenced triangles, just like EncodingContext::Finalize does
+			for (const IndexedTriangle &i : inTriangles)
+				for (uint32 idx : i.mIdx)
+					mBounds.Encapsulate(Vec3(inVertices[idx]));
+		}
+
+		/// Test if a triangle will be degenerate after quantization
+		bool						IsDegenerate(const IndexedTriangle &inTriangle) const
+		{
+			// Quantize the triangle in the same way as EncodingContext::Finalize does
+			UVec4 quantized_vertex[3];
+			Vec3 compress_scale = Vec3::sReplicate(COMPONENT_MASK) / Vec3::sMax(mBounds.GetSize(), Vec3::sReplicate(1.0e-20f));
+			for (int i = 0; i < 3; ++i)
+				quantized_vertex[i] = ((Vec3(mVertices[inTriangle.mIdx[i]]) - mBounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
+			return quantized_vertex[0] == quantized_vertex[1] || quantized_vertex[1] == quantized_vertex[2] || quantized_vertex[0] == quantized_vertex[2];
+		}
+
+	private:
+		const VertexList &			mVertices;
+		AABox						mBounds;
+	};
+
+	/// This class is used to encode and compress triangle data into a byte buffer
+	class EncodingContext
+	{
+	public:
+		/// Indicates a vertex hasn't been seen yet in the triangle list
+		static constexpr uint32		cNotFound = 0xffffffff;
+
+		/// Construct the encoding context
+		explicit					EncodingContext(const VertexList &inVertices) :
+			mVertexMap(inVertices.size(), cNotFound)
+		{
+		}
+
+		/// Mimics the size a call to Pack() would add to the buffer
+		void						PreparePack(const IndexedTriangle *inTriangles, uint inNumTriangles, bool inStoreUserData, uint64 &ioBufferSize)
+		{
+			// Add triangle block header
+			ioBufferSize += sizeof(TriangleBlockHeader);
+
+			// Compute first vertex that this batch will use (ensuring there's enough room if none of the vertices are shared)
+			uint start_vertex = Clamp((int)mVertexCount - 256 + (int)inNumTriangles * 3, 0, (int)mVertexCount);
+
+			// Pack vertices
+			uint padded_triangle_count = AlignUp(inNumTriangles, 4);
+			for (uint t = 0; t < padded_triangle_count; t += 4)
+			{
+				// Add triangle block header
+				ioBufferSize += sizeof(TriangleBlock);
+
+				for (uint vertex_nr = 0; vertex_nr < 3; ++vertex_nr)
+					for (uint block_tri_idx = 0; block_tri_idx < 4; ++block_tri_idx)
+					{
+						// Fetch vertex index. Create degenerate triangles for padding triangles.
+						bool triangle_available = t + block_tri_idx < inNumTriangles;
+						uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[inNumTriangles - 1].mIdx[0];
+
+						// Check if we've seen this vertex before and if it is in the range that we can encode
+						uint32 &vertex_index = mVertexMap[src_vertex_index];
+						if (vertex_index == cNotFound || vertex_index < start_vertex)
+						{
+							// Add vertex
+							vertex_index = mVertexCount;
+							mVertexCount++;
+						}
+					}
+			}
+
+			// Add user data
+			if (inStoreUserData)
+				ioBufferSize += inNumTriangles * sizeof(uint32);
+		}
+
+		/// Mimics the size the Finalize() call would add to ioBufferSize
+		void						FinalizePreparePack(uint64 &ioBufferSize)
+		{
+			// Remember where the vertices are going to start in the output buffer
+			JPH_ASSERT(IsAligned(ioBufferSize, 4));
+			mVerticesStartIdx = size_t(ioBufferSize);
+
+			// Add vertices to buffer
+			ioBufferSize += uint64(mVertexCount) * sizeof(VertexData);
+
+			// Reserve the amount of memory we need for the vertices
+			mVertices.reserve(mVertexCount);
+
+			// Set vertex map back to 'not found'
+			for (uint32 &v : mVertexMap)
+				v = cNotFound;
+		}
+
+		/// Pack the triangles in inContainer to ioBuffer. This stores the mMaterialIndex of a triangle in the 8 bit flags.
+		/// Returns size_t(-1) on error.
+		size_t						Pack(const IndexedTriangle *inTriangles, uint inNumTriangles, bool inStoreUserData, ByteBuffer &ioBuffer, const char *&outError)
+		{
+			JPH_ASSERT(inNumTriangles > 0);
+
+			// Determine position of triangles start
+			size_t triangle_block_start = ioBuffer.size();
+
+			// Allocate triangle block header
+			TriangleBlockHeader *header = ioBuffer.Allocate<TriangleBlockHeader>();
+
+			// Compute first vertex that this batch will use (ensuring there's enough room if none of the vertices are shared)
+			uint start_vertex = Clamp((int)mVertices.size() - 256 + (int)inNumTriangles * 3, 0, (int)mVertices.size());
+
+			// Store the start vertex offset relative to TriangleBlockHeader
+			size_t offset_to_vertices = mVerticesStartIdx - triangle_block_start + size_t(start_vertex) * sizeof(VertexData);
+			if (offset_to_vertices & OFFSET_NON_SIGNIFICANT_MASK)
+			{
+				outError = "TriangleCodecIndexed8BitPackSOA4Flags: Internal Error: Offset has non-significant bits set";
+				return size_t(-1);
+			}
+			offset_to_vertices >>= OFFSET_NON_SIGNIFICANT_BITS;
+			if (offset_to_vertices > OFFSET_TO_VERTICES_MASK)
+			{
+				outError = "TriangleCodecIndexed8BitPackSOA4Flags: Offset to vertices doesn't fit. Too much data.";
+				return size_t(-1);
+			}
+			header->mFlags = uint32(offset_to_vertices);
+
+			// When we store user data we need to store the offset to the user data in TriangleBlocks
+			uint padded_triangle_count = AlignUp(inNumTriangles, 4);
+			if (inStoreUserData)
+			{
+				uint32 num_blocks = padded_triangle_count >> 2;
+				JPH_ASSERT(num_blocks <= OFFSET_TO_USERDATA_MASK);
+				header->mFlags |= num_blocks << OFFSET_TO_VERTICES_BITS;
+			}
+
+			// Pack vertices
+			for (uint t = 0; t < padded_triangle_count; t += 4)
+			{
+				TriangleBlock *block = ioBuffer.Allocate<TriangleBlock>();
+				for (uint vertex_nr = 0; vertex_nr < 3; ++vertex_nr)
+					for (uint block_tri_idx = 0; block_tri_idx < 4; ++block_tri_idx)
+					{
+						// Fetch vertex index. Create degenerate triangles for padding triangles.
+						bool triangle_available = t + block_tri_idx < inNumTriangles;
+						uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[inNumTriangles - 1].mIdx[0];
+
+						// Check if we've seen this vertex before and if it is in the range that we can encode
+						uint32 &vertex_index = mVertexMap[src_vertex_index];
+						if (vertex_index == cNotFound || vertex_index < start_vertex)
+						{
+							// Add vertex
+							vertex_index = (uint32)mVertices.size();
+							mVertices.push_back(src_vertex_index);
+						}
+
+						// Store vertex index
+						uint32 vertex_offset = vertex_index - start_vertex;
+						if (vertex_offset > 0xff)
+						{
+							outError = "TriangleCodecIndexed8BitPackSOA4Flags: Offset doesn't fit in 8 bit";
+							return size_t(-1);
+						}
+						block->mIndices[vertex_nr][block_tri_idx] = (uint8)vertex_offset;
+
+						// Store flags
+						uint32 flags = triangle_available? inTriangles[t + block_tri_idx].mMaterialIndex : 0;
+						if (flags > 0xff)
+						{
+							outError = "TriangleCodecIndexed8BitPackSOA4Flags: Material index doesn't fit in 8 bit";
+							return size_t(-1);
+						}
+						block->mFlags[block_tri_idx] = (uint8)flags;
+					}
+			}
+
+			// Store user data
+			if (inStoreUserData)
+			{
+				uint32 *user_data = ioBuffer.Allocate<uint32>(inNumTriangles);
+				for (uint t = 0; t < inNumTriangles; ++t)
+					user_data[t] = inTriangles[t].mUserData;
+			}
+
+			return triangle_block_start;
+		}
+
+		/// After all triangles have been packed, this finalizes the header and triangle buffer
+		void						Finalize(const VertexList &inVertices, TriangleHeader *ioHeader, ByteBuffer &ioBuffer) const
+		{
+			// Assert that our reservations were correct
+			JPH_ASSERT(mVertices.size() == mVertexCount);
+			JPH_ASSERT(ioBuffer.size() == mVerticesStartIdx);
+
+			// Check if anything to do
+			if (mVertices.empty())
+				return;
+
+			// Calculate bounding box
+			AABox bounds;
+			for (uint32 v : mVertices)
+				bounds.Encapsulate(Vec3(inVertices[v]));
+
+			// Compress vertices
+			VertexData *vertices = ioBuffer.Allocate<VertexData>(mVertices.size());
+			Vec3 compress_scale = Vec3::sReplicate(COMPONENT_MASK) / Vec3::sMax(bounds.GetSize(), Vec3::sReplicate(1.0e-20f));
+			for (uint32 v : mVertices)
+			{
+				UVec4 c = ((Vec3(inVertices[v]) - bounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
+				JPH_ASSERT(c.GetX() <= COMPONENT_MASK);
+				JPH_ASSERT(c.GetY() <= COMPONENT_MASK);
+				JPH_ASSERT(c.GetZ() <= COMPONENT_MASK);
+				vertices->mVertexXY = c.GetX() + (c.GetY() << COMPONENT_Y1);
+				vertices->mVertexZY = c.GetZ() + ((c.GetY() >> COMPONENT_Y1_BITS) << COMPONENT_Y2);
+				++vertices;
+			}
+
+			// Store decompression information
+			bounds.mMin.StoreFloat3(&ioHeader->mOffset);
+			(bounds.GetSize() / Vec3::sReplicate(COMPONENT_MASK)).StoreFloat3(&ioHeader->mScale);
+		}
+
+	private:
+		using VertexMap = Array<uint32>;
+
+		uint32						mVertexCount = 0;			///< Number of vertices calculated during PreparePack
+		size_t						mVerticesStartIdx = 0;		///< Start of the vertices in the output buffer, calculated during PreparePack
+		Array<uint32>				mVertices;					///< Output vertices as an index into the original vertex list (inVertices), sorted according to occurrence
+		VertexMap					mVertexMap;					///< Maps from the original mesh vertex index (inVertices) to the index in our output vertices (mVertices)
+	};
+
+	/// This class is used to decode and decompress triangle data packed by the EncodingContext
+	class DecodingContext
+	{
+	private:
+		/// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
+		JPH_INLINE void				Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const
+		{
+			// Get compressed data
+			UVec4 c1 = UVec4::sGatherInt4<8>(&inVertices->mVertexXY, inIndex);
+			UVec4 c2 = UVec4::sGatherInt4<8>(&inVertices->mVertexZY, inIndex);
+
+			// Unpack the x y and z component
+			UVec4 xc = UVec4::sAnd(c1, UVec4::sReplicate(COMPONENT_MASK));
+			UVec4 yc = UVec4::sOr(c1.LogicalShiftRight<COMPONENT_Y1>(), c2.LogicalShiftRight<COMPONENT_Y2>().LogicalShiftLeft<COMPONENT_Y1_BITS>());
+			UVec4 zc = UVec4::sAnd(c2, UVec4::sReplicate(COMPONENT_MASK));
+
+			// Convert to float
+			outX = Vec4::sFusedMultiplyAdd(xc.ToFloat(), mScaleX, mOffsetX);
+			outY = Vec4::sFusedMultiplyAdd(yc.ToFloat(), mScaleY, mOffsetY);
+			outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
+		}
+
+		/// Private helper function to unpack 4 triangles from a triangle block
+		JPH_INLINE void				Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const
+		{
+			// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
+			UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&inBlock->mIndices[0]));
+			UVec4 iv1 = indices.Expand4Byte0();
+			UVec4 iv2 = indices.Expand4Byte4();
+			UVec4 iv3 = indices.Expand4Byte8();
+
+		#ifdef JPH_CPU_BIG_ENDIAN
+			// On big endian systems we need to reverse the bytes
+			iv1 = iv1.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+			iv2 = iv2.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+			iv3 = iv3.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+		#endif
+
+			// Decompress the triangle data
+			Unpack(inVertices, iv1, outX1, outY1, outZ1);
+			Unpack(inVertices, iv2, outX2, outY2, outZ2);
+			Unpack(inVertices, iv3, outX3, outY3, outZ3);
+		}
+
+	public:
+		JPH_INLINE explicit			DecodingContext(const TriangleHeader *inHeader) :
+			mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)),
+			mOffsetY(Vec4::sReplicate(inHeader->mOffset.y)),
+			mOffsetZ(Vec4::sReplicate(inHeader->mOffset.z)),
+			mScaleX(Vec4::sReplicate(inHeader->mScale.x)),
+			mScaleY(Vec4::sReplicate(inHeader->mScale.y)),
+			mScaleZ(Vec4::sReplicate(inHeader->mScale.z))
+		{
+		}
+
+		/// Unpacks triangles in the format t1v1,t1v2,t1v3, t2v1,t2v2,t2v3, ...
+		JPH_INLINE void				Unpack(const void *inTriangleStart, uint32 inNumTriangles, Vec3 *outTriangles) const
+		{
+			JPH_ASSERT(inNumTriangles > 0);
+			const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
+			const VertexData *vertices = header->GetVertexData();
+			const TriangleBlock *t = header->GetTriangleBlock();
+			const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
+
+			int triangles_left = inNumTriangles;
+
+			do
+			{
+				// Unpack the vertices for 4 triangles
+				Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
+				Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
+
+				// Transpose it so we get normal vectors
+				Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed();
+				Mat44 v2 = Mat44(v2x, v2y, v2z, Vec4::sZero()).Transposed();
+				Mat44 v3 = Mat44(v3x, v3y, v3z, Vec4::sZero()).Transposed();
+
+				// Store triangle data
+				for (int i = 0; i < 4 && triangles_left > 0; ++i, --triangles_left)
+				{
+					*outTriangles++ = v1.GetColumn3(i);
+					*outTriangles++ = v2.GetColumn3(i);
+					*outTriangles++ = v3.GetColumn3(i);
+				}
+
+				++t;
+			}
+			while (t < end);
+		}
+
+		/// Tests a ray against the packed triangles
+		JPH_INLINE float			TestRay(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, const void *inTriangleStart, uint32 inNumTriangles, float inClosest, uint32 &outClosestTriangleIndex) const
+		{
+			JPH_ASSERT(inNumTriangles > 0);
+			const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
+			const VertexData *vertices = header->GetVertexData();
+			const TriangleBlock *t = header->GetTriangleBlock();
+			const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
+
+			Vec4 closest = Vec4::sReplicate(inClosest);
+			UVec4 closest_triangle_idx = UVec4::sZero();
+
+			UVec4 start_triangle_idx = UVec4::sZero();
+			do
+			{
+				// Unpack the vertices for 4 triangles
+				Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
+				Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
+
+				// Perform ray vs triangle test
+				Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
+
+				// Update closest with the smaller values
+				UVec4 smaller = Vec4::sLess(distance, closest);
+				closest = Vec4::sSelect(closest, distance, smaller);
+
+				// Update triangle index with the smallest values
+				UVec4 triangle_idx = start_triangle_idx + UVec4(0, 1, 2, 3);
+				closest_triangle_idx = UVec4::sSelect(closest_triangle_idx, triangle_idx, smaller);
+
+				// Next block
+				++t;
+				start_triangle_idx += UVec4::sReplicate(4);
+			}
+			while (t < end);
+
+			// Get the smallest component
+			Vec4::sSort4(closest, closest_triangle_idx);
+			outClosestTriangleIndex = closest_triangle_idx.GetX();
+			return closest.GetX();
+		}
+
+		/// Decode a single triangle
+		inline void					GetTriangle(const void *inTriangleStart, uint32 inTriangleIdx, Vec3 &outV1, Vec3 &outV2, Vec3 &outV3) const
+		{
+			const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
+			const VertexData *vertices = header->GetVertexData();
+			const TriangleBlock *block = header->GetTriangleBlock() + (inTriangleIdx >> 2);
+			uint32 block_triangle_idx = inTriangleIdx & 0b11;
+
+			// Get the 3 vertices
+			const VertexData &v1 = vertices[block->mIndices[0][block_triangle_idx]];
+			const VertexData &v2 = vertices[block->mIndices[1][block_triangle_idx]];
+			const VertexData &v3 = vertices[block->mIndices[2][block_triangle_idx]];
+
+			// Pack the vertices
+			UVec4 c1(v1.mVertexXY, v2.mVertexXY, v3.mVertexXY, 0);
+			UVec4 c2(v1.mVertexZY, v2.mVertexZY, v3.mVertexZY, 0);
+
+			// Unpack the x y and z component
+			UVec4 xc = UVec4::sAnd(c1, UVec4::sReplicate(COMPONENT_MASK));
+			UVec4 yc = UVec4::sOr(c1.LogicalShiftRight<COMPONENT_Y1>(), c2.LogicalShiftRight<COMPONENT_Y2>().LogicalShiftLeft<COMPONENT_Y1_BITS>());
+			UVec4 zc = UVec4::sAnd(c2, UVec4::sReplicate(COMPONENT_MASK));
+
+			// Convert to float
+			Vec4 vx = Vec4::sFusedMultiplyAdd(xc.ToFloat(), mScaleX, mOffsetX);
+			Vec4 vy = Vec4::sFusedMultiplyAdd(yc.ToFloat(), mScaleY, mOffsetY);
+			Vec4 vz = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
+
+			// Transpose it so we get normal vectors
+			Mat44 trans = Mat44(vx, vy, vz, Vec4::sZero()).Transposed();
+			outV1 = trans.GetAxisX();
+			outV2 = trans.GetAxisY();
+			outV3 = trans.GetAxisZ();
+		}
+
+		/// Get user data for a triangle
+		JPH_INLINE uint32			GetUserData(const void *inTriangleStart, uint32 inTriangleIdx) const
+		{
+			const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
+			const uint32 *user_data = header->GetUserData();
+			return user_data != nullptr? user_data[inTriangleIdx] : 0;
+		}
+
+		/// Get flags for entire triangle block
+		JPH_INLINE static void		sGetFlags(const void *inTriangleStart, uint32 inNumTriangles, uint8 *outTriangleFlags)
+		{
+			JPH_ASSERT(inNumTriangles > 0);
+			const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
+			const TriangleBlock *t = header->GetTriangleBlock();
+			const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
+
+			int triangles_left = inNumTriangles;
+			do
+			{
+				for (int i = 0; i < 4 && triangles_left > 0; ++i, --triangles_left)
+					*outTriangleFlags++ = t->mFlags[i];
+
+				++t;
+			}
+			while (t < end);
+		}
+
+		/// Get flags for a particular triangle
+		JPH_INLINE static uint8		sGetFlags(const void *inTriangleStart, int inTriangleIndex)
+		{
+			const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
+			const TriangleBlock *first_block = header->GetTriangleBlock();
+			return first_block[inTriangleIndex >> 2].mFlags[inTriangleIndex & 0b11];
+		}
+
+		/// Unpacks triangles and flags, convenience function
+		JPH_INLINE void				Unpack(const void *inTriangleStart, uint32 inNumTriangles, Vec3 *outTriangles, uint8 *outTriangleFlags) const
+		{
+			Unpack(inTriangleStart, inNumTriangles, outTriangles);
+			sGetFlags(inTriangleStart, inNumTriangles, outTriangleFlags);
+		}
+
+	private:
+		Vec4						mOffsetX;
+		Vec4						mOffsetY;
+		Vec4						mOffsetZ;
+		Vec4						mScaleX;
+		Vec4						mScaleY;
+		Vec4						mScaleZ;
+	};
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/ConfigurationString.h
+++ b/thirdparty/jolt_physics/Jolt/ConfigurationString.h
@@ -0,0 +1,109 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2023 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Construct a string that lists the most important configuration settings
+inline const char *GetConfigurationString()
+{
+	return JPH_IF_SINGLE_PRECISION_ELSE("Single", "Double") " precision "
+#if defined(JPH_CPU_X86)
+		"x86 "
+#elif defined(JPH_CPU_ARM)
+		"ARM "
+#elif defined(JPH_CPU_RISCV)
+		"RISC-V "
+#elif defined(JPH_CPU_PPC)
+		"PowerPC "
+	#ifdef JPH_CPU_BIG_ENDIAN
+		"(Big Endian) "
+	#else
+		"(Little Endian) "
+	#endif
+#elif defined(JPH_CPU_LOONGARCH)
+		"LoongArch "
+#elif defined(JPH_CPU_E2K)
+		"E2K "
+#elif defined(JPH_CPU_WASM)
+		"WASM "
+#else
+	#error Unknown CPU architecture
+#endif
+#if JPH_CPU_ADDRESS_BITS == 64
+		"64-bit "
+#elif JPH_CPU_ADDRESS_BITS == 32
+		"32-bit "
+#endif
+		"with instructions: "
+#ifdef JPH_USE_NEON
+		"NEON "
+#endif
+#ifdef JPH_USE_SSE
+		"SSE2 "
+#endif
+#ifdef JPH_USE_SSE4_1
+		"SSE4.1 "
+#endif
+#ifdef JPH_USE_SSE4_2
+		"SSE4.2 "
+#endif
+#ifdef JPH_USE_AVX
+		"AVX "
+#endif
+#ifdef JPH_USE_AVX2
+		"AVX2 "
+#endif
+#ifdef JPH_USE_AVX512
+		"AVX512 "
+#endif
+#ifdef JPH_USE_F16C
+		"F16C "
+#endif
+#ifdef JPH_USE_LZCNT
+		"LZCNT "
+#endif
+#ifdef JPH_USE_TZCNT
+		"TZCNT "
+#endif
+#ifdef JPH_USE_FMADD
+		"FMADD "
+#endif
+#ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
+		"(Cross Platform Deterministic) "
+#endif
+#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
+		"(FP Exceptions) "
+#endif
+#ifdef JPH_DEBUG_RENDERER
+		"(Debug Renderer) "
+#endif
+#ifdef JPH_PROFILE_ENABLED
+		"(Profile) "
+#endif
+#if defined(JPH_OBJECT_LAYER_BITS) && JPH_OBJECT_LAYER_BITS == 32
+		"(32-bit ObjectLayer) "
+#else
+		"(16-bit ObjectLayer) "
+#endif
+#ifdef JPH_ENABLE_ASSERTS
+		"(Assertions) "
+#endif
+#ifdef JPH_OBJECT_STREAM
+		"(ObjectStream) "
+#endif
+#ifdef JPH_DEBUG
+		"(Debug) "
+#endif
+#if defined(__cpp_rtti) && __cpp_rtti
+		"(C++ RTTI) "
+#endif
+#if defined(__cpp_exceptions) && __cpp_exceptions
+		"(C++ Exceptions) "
+#endif
+		;
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/ARMNeon.h
+++ b/thirdparty/jolt_physics/Jolt/Core/ARMNeon.h
@@ -0,0 +1,94 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2022 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#ifdef JPH_USE_NEON
+
+// Constructing NEON values
+#ifdef JPH_COMPILER_MSVC
+	#define JPH_NEON_INT32x4(v1, v2, v3, v4) { int64_t(v1) + (int64_t(v2) << 32), int64_t(v3) + (int64_t(v4) << 32) }
+	#define JPH_NEON_UINT32x4(v1, v2, v3, v4) { uint64_t(v1) + (uint64_t(v2) << 32), uint64_t(v3) + (uint64_t(v4) << 32) }
+	#define JPH_NEON_INT8x16(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) { int64_t(v1) + (int64_t(v2) << 8) + (int64_t(v3) << 16) + (int64_t(v4) << 24) + (int64_t(v5) << 32) + (int64_t(v6) << 40) + (int64_t(v7) << 48) + (int64_t(v8) << 56), int64_t(v9) + (int64_t(v10) << 8) + (int64_t(v11) << 16) + (int64_t(v12) << 24) + (int64_t(v13) << 32) + (int64_t(v14) << 40) + (int64_t(v15) << 48) + (int64_t(v16) << 56) }
+	#define JPH_NEON_UINT8x16(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) { uint64_t(v1) + (uint64_t(v2) << 8) + (uint64_t(v3) << 16) + (uint64_t(v4) << 24) + (uint64_t(v5) << 32) + (uint64_t(v6) << 40) + (uint64_t(v7) << 48) + (uint64_t(v8) << 56), uint64_t(v9) + (uint64_t(v10) << 8) + (uint64_t(v11) << 16) + (uint64_t(v12) << 24) + (uint64_t(v13) << 32) + (uint64_t(v14) << 40) + (uint64_t(v15) << 48) + (uint64_t(v16) << 56) }
+#else
+	#define JPH_NEON_INT32x4(v1, v2, v3, v4) { v1, v2, v3, v4 }
+	#define JPH_NEON_UINT32x4(v1, v2, v3, v4) { v1, v2, v3, v4 }
+	#define JPH_NEON_INT8x16(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) { v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 }
+	#define JPH_NEON_UINT8x16(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) { v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 }
+#endif
+
+// MSVC and GCC prior to version 12 don't define __builtin_shufflevector
+#if defined(JPH_COMPILER_MSVC) || (defined(JPH_COMPILER_GCC) && __GNUC__ < 12)
+	JPH_NAMESPACE_BEGIN
+
+	// Generic shuffle vector template
+	template <unsigned I1, unsigned I2, unsigned I3, unsigned I4>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4(float32x4_t inV1, float32x4_t inV2)
+	{
+		float32x4_t ret;
+		ret = vmovq_n_f32(vgetq_lane_f32(I1 >= 4? inV2 : inV1, I1 & 0b11));
+		ret = vsetq_lane_f32(vgetq_lane_f32(I2 >= 4? inV2 : inV1, I2 & 0b11), ret, 1);
+		ret = vsetq_lane_f32(vgetq_lane_f32(I3 >= 4? inV2 : inV1, I3 & 0b11), ret, 2);
+		ret = vsetq_lane_f32(vgetq_lane_f32(I4 >= 4? inV2 : inV1, I4 & 0b11), ret, 3);
+		return ret;
+	}
+
+	// Specializations
+	template <>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4<0, 1, 2, 2>(float32x4_t inV1, float32x4_t inV2)
+	{
+		return vcombine_f32(vget_low_f32(inV1), vdup_lane_f32(vget_high_f32(inV1), 0));
+	}
+
+	template <>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4<0, 1, 3, 3>(float32x4_t inV1, float32x4_t inV2)
+	{
+		return vcombine_f32(vget_low_f32(inV1), vdup_lane_f32(vget_high_f32(inV1), 1));
+	}
+
+	template <>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4<0, 1, 2, 3>(float32x4_t inV1, float32x4_t inV2)
+	{
+		return inV1;
+	}
+
+	template <>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4<1, 0, 3, 2>(float32x4_t inV1, float32x4_t inV2)
+	{
+		return vcombine_f32(vrev64_f32(vget_low_f32(inV1)), vrev64_f32(vget_high_f32(inV1)));
+	}
+
+	template <>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4<2, 2, 1, 0>(float32x4_t inV1, float32x4_t inV2)
+	{
+		return vcombine_f32(vdup_lane_f32(vget_high_f32(inV1), 0), vrev64_f32(vget_low_f32(inV1)));
+	}
+
+	template <>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4<2, 3, 0, 1>(float32x4_t inV1, float32x4_t inV2)
+	{
+		return vcombine_f32(vget_high_f32(inV1), vget_low_f32(inV1));
+	}
+
+	// Used extensively by cross product
+	template <>
+	JPH_INLINE float32x4_t NeonShuffleFloat32x4<1, 2, 0, 0>(float32x4_t inV1, float32x4_t inV2)
+	{
+		static uint8x16_t table = JPH_NEON_UINT8x16(0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03);
+		return vreinterpretq_f32_u8(vqtbl1q_u8(vreinterpretq_u8_f32(inV1), table));
+	}
+
+	// Shuffle a vector
+	#define JPH_NEON_SHUFFLE_F32x4(vec1, vec2, index1, index2, index3, index4) NeonShuffleFloat32x4<index1, index2, index3, index4>(vec1, vec2)
+	#define JPH_NEON_SHUFFLE_U32x4(vec1, vec2, index1, index2, index3, index4) vreinterpretq_u32_f32((NeonShuffleFloat32x4<index1, index2, index3, index4>(vreinterpretq_f32_u32(vec1), vreinterpretq_f32_u32(vec2))))
+
+	JPH_NAMESPACE_END
+#else
+	// Shuffle a vector
+	#define JPH_NEON_SHUFFLE_F32x4(vec1, vec2, index1, index2, index3, index4) __builtin_shufflevector(vec1, vec2, index1, index2, index3, index4)
+	#define JPH_NEON_SHUFFLE_U32x4(vec1, vec2, index1, index2, index3, index4) __builtin_shufflevector(vec1, vec2, index1, index2, index3, index4)
+#endif
+
+#endif // JPH_USE_NEON
--- a/thirdparty/jolt_physics/Jolt/Core/Array.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Array.h
@@ -0,0 +1,713 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/STLAllocator.h>
+#include <Jolt/Core/HashCombine.h>
+
+#ifdef JPH_USE_STD_VECTOR
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <vector>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+template <class T, class Allocator = STLAllocator<T>> using Array = std::vector<T, Allocator>;
+
+JPH_NAMESPACE_END
+
+#else
+
+JPH_NAMESPACE_BEGIN
+
+/// Simple replacement for std::vector
+///
+/// Major differences:
+/// - Memory is not initialized to zero (this was causing a lot of page faults when deserializing large MeshShapes / HeightFieldShapes)
+/// - Iterators are simple pointers (for now)
+/// - No exception safety
+/// - No specialization like std::vector<bool> has
+/// - Not all functions have been implemented
+template <class T, class Allocator = STLAllocator<T>>
+class [[nodiscard]] Array : private Allocator
+{
+public:
+	using value_type = T;
+	using allocator_type = Allocator;
+	using size_type = size_t;
+	using difference_type = typename Allocator::difference_type;
+	using pointer = T *;
+	using const_pointer = const T *;
+	using reference = T &;
+	using const_reference = const T &;
+
+	using const_iterator = const T *;
+	using iterator = T *;
+
+	/// An iterator that traverses the array in reverse order
+	class rev_it
+	{
+	public:
+		/// Constructor
+							rev_it() = default;
+		explicit			rev_it(T *inValue)				: mValue(inValue) { }
+
+		/// Copying
+							rev_it(const rev_it &) = default;
+		rev_it &			operator = (const rev_it &) = default;
+
+		/// Comparison
+		bool				operator == (const rev_it &inRHS) const { return mValue == inRHS.mValue; }
+		bool				operator != (const rev_it &inRHS) const { return mValue != inRHS.mValue; }
+
+		/// Arithmetics
+		rev_it &			operator ++ ()					{ --mValue; return *this; }
+		rev_it				operator ++ (int)				{ return rev_it(mValue--); }
+		rev_it &			operator -- ()					{ ++mValue; return *this; }
+		rev_it				operator -- (int)				{ return rev_it(mValue++); }
+
+		rev_it				operator + (int inValue)		{ return rev_it(mValue - inValue); }
+		rev_it				operator - (int inValue)		{ return rev_it(mValue + inValue); }
+
+		rev_it &			operator += (int inValue)		{ mValue -= inValue; return *this; }
+		rev_it &			operator -= (int inValue)		{ mValue += inValue; return *this; }
+
+		/// Access
+		T &					operator * () const				{ return *mValue; }
+		T &					operator -> () const			{ return *mValue; }
+
+	private:
+		T *					mValue;
+	};
+
+	/// A const iterator that traverses the array in reverse order
+	class crev_it
+	{
+	public:
+		/// Constructor
+							crev_it() = default;
+		explicit			crev_it(const T *inValue)		: mValue(inValue) { }
+
+		/// Copying
+							crev_it(const crev_it &) = default;
+		explicit			crev_it(const rev_it &inValue)	: mValue(inValue.mValue) { }
+		crev_it &			operator = (const crev_it &) = default;
+		crev_it &			operator = (const rev_it &inRHS) { mValue = inRHS.mValue; return *this; }
+
+		/// Comparison
+		bool				operator == (const crev_it &inRHS) const { return mValue == inRHS.mValue; }
+		bool				operator != (const crev_it &inRHS) const { return mValue != inRHS.mValue; }
+
+		/// Arithmetics
+		crev_it &			operator ++ ()					{ --mValue; return *this; }
+		crev_it				operator ++ (int)				{ return crev_it(mValue--); }
+		crev_it &			operator -- ()					{ ++mValue; return *this; }
+		crev_it				operator -- (int)				{ return crev_it(mValue++); }
+
+		crev_it				operator + (int inValue)		{ return crev_it(mValue - inValue); }
+		crev_it				operator - (int inValue)		{ return crev_it(mValue + inValue); }
+
+		crev_it &			operator += (int inValue)		{ mValue -= inValue; return *this; }
+		crev_it &			operator -= (int inValue)		{ mValue += inValue; return *this; }
+
+		/// Access
+		const T &			operator * () const				{ return *mValue; }
+		const T &			operator -> () const			{ return *mValue; }
+
+	private:
+		const T *			mValue;
+	};
+
+	using reverse_iterator = rev_it;
+	using const_reverse_iterator = crev_it;
+
+private:
+	/// Move elements from one location to another
+	inline void				move(pointer inDestination, pointer inSource, size_type inCount)
+	{
+		if constexpr (std::is_trivially_copyable<T>())
+			memmove(inDestination, inSource, inCount * sizeof(T));
+		else
+		{
+			if (inDestination < inSource)
+			{
+				for (T *destination_end = inDestination + inCount; inDestination < destination_end; ++inDestination, ++inSource)
+				{
+					new (inDestination) T(std::move(*inSource));
+					inSource->~T();
+				}
+			}
+			else
+			{
+				for (T *destination = inDestination + inCount - 1, *source = inSource + inCount - 1; destination >= inDestination; --destination, --source)
+				{
+					new (destination) T(std::move(*source));
+					source->~T();
+				}
+			}
+		}
+	}
+
+	/// Reallocate the data block to inNewCapacity
+	inline void				reallocate(size_type inNewCapacity)
+	{
+		JPH_ASSERT(inNewCapacity > 0 && inNewCapacity >= mSize);
+
+		pointer ptr;
+		if constexpr (AllocatorHasReallocate<Allocator>::sValue)
+		{
+			// Reallocate data block
+			ptr = get_allocator().reallocate(mElements, mCapacity, inNewCapacity);
+		}
+		else
+		{
+			// Copy data to a new location
+			ptr = get_allocator().allocate(inNewCapacity);
+			if (mElements != nullptr)
+			{
+				move(ptr, mElements, mSize);
+				get_allocator().deallocate(mElements, mCapacity);
+			}
+		}
+		mElements = ptr;
+		mCapacity = inNewCapacity;
+	}
+
+	/// Destruct elements [inStart, inEnd - 1]
+	inline void				destruct(size_type inStart, size_type inEnd)
+	{
+		if constexpr (!std::is_trivially_destructible<T>())
+			if (inStart < inEnd)
+				for (T *element = mElements + inStart, *element_end = mElements + inEnd; element < element_end; ++element)
+					element->~T();
+	}
+
+public:
+	/// Reserve array space
+	inline void				reserve(size_type inNewSize)
+	{
+		if (mCapacity < inNewSize)
+			reallocate(inNewSize);
+	}
+
+	/// Resize array to new length
+	inline void				resize(size_type inNewSize)
+	{
+		destruct(inNewSize, mSize);
+		reserve(inNewSize);
+
+		if constexpr (!std::is_trivially_constructible<T>())
+			for (T *element = mElements + mSize, *element_end = mElements + inNewSize; element < element_end; ++element)
+				new (element) T;
+		mSize = inNewSize;
+	}
+
+	/// Resize array to new length and initialize all elements with inValue
+	inline void				resize(size_type inNewSize, const T &inValue)
+	{
+		JPH_ASSERT(&inValue < mElements || &inValue >= mElements + mSize, "Can't pass an element from the array to resize");
+
+		destruct(inNewSize, mSize);
+		reserve(inNewSize);
+
+		for (T *element = mElements + mSize, *element_end = mElements + inNewSize; element < element_end; ++element)
+			new (element) T(inValue);
+		mSize = inNewSize;
+	}
+
+	/// Destruct all elements and set length to zero
+	inline void				clear()
+	{
+		destruct(0, mSize);
+		mSize = 0;
+	}
+
+private:
+	/// Grow the array by at least inAmount elements
+	inline void				grow(size_type inAmount = 1)
+	{
+		size_type min_size = mSize + inAmount;
+		if (min_size > mCapacity)
+		{
+			size_type new_capacity = max(min_size, mCapacity * 2);
+			reserve(new_capacity);
+		}
+	}
+
+	/// Free memory
+	inline void				free()
+	{
+		get_allocator().deallocate(mElements, mCapacity);
+		mElements = nullptr;
+		mCapacity = 0;
+	}
+
+	/// Destroy all elements and free memory
+	inline void				destroy()
+	{
+		if (mElements != nullptr)
+		{
+			clear();
+			free();
+		}
+	}
+
+public:
+	/// Replace the contents of this array with inBegin .. inEnd
+	template <class Iterator>
+	inline void				assign(Iterator inBegin, Iterator inEnd)
+	{
+		clear();
+		reserve(size_type(std::distance(inBegin, inEnd)));
+
+		for (Iterator element = inBegin; element != inEnd; ++element)
+			new (&mElements[mSize++]) T(*element);
+	}
+
+	/// Replace the contents of this array with inList
+	inline void				assign(std::initializer_list<T> inList)
+	{
+		clear();
+		reserve(size_type(inList.size()));
+
+		for (const T &v : inList)
+			new (&mElements[mSize++]) T(v);
+	}
+
+	/// Default constructor
+							Array() = default;
+
+	/// Constructor with allocator
+	explicit inline			Array(const Allocator &inAllocator) :
+		Allocator(inAllocator)
+	{
+	}
+
+	/// Constructor with length
+	explicit inline			Array(size_type inLength, const Allocator &inAllocator = { }) :
+		Allocator(inAllocator)
+	{
+		resize(inLength);
+	}
+
+	/// Constructor with length and value
+	inline					Array(size_type inLength, const T &inValue, const Allocator &inAllocator = { }) :
+		Allocator(inAllocator)
+	{
+		resize(inLength, inValue);
+	}
+
+	/// Constructor from initializer list
+	inline					Array(std::initializer_list<T> inList, const Allocator &inAllocator = { }) :
+		Allocator(inAllocator)
+	{
+		assign(inList);
+	}
+
+	/// Constructor from iterator
+	inline					Array(const_iterator inBegin, const_iterator inEnd, const Allocator &inAllocator = { }) :
+		Allocator(inAllocator)
+	{
+		assign(inBegin, inEnd);
+	}
+
+	/// Copy constructor
+	inline					Array(const Array<T, Allocator> &inRHS) :
+		Allocator(inRHS.get_allocator())
+	{
+		assign(inRHS.begin(), inRHS.end());
+	}
+
+	/// Move constructor
+	inline					Array(Array<T, Allocator> &&inRHS) noexcept :
+		Allocator(std::move(inRHS.get_allocator())),
+		mSize(inRHS.mSize),
+		mCapacity(inRHS.mCapacity),
+		mElements(inRHS.mElements)
+	{
+		inRHS.mSize = 0;
+		inRHS.mCapacity = 0;
+		inRHS.mElements = nullptr;
+	}
+
+	/// Destruct all elements
+	inline					~Array()
+	{
+		destroy();
+	}
+
+	/// Get the allocator
+	inline Allocator &		get_allocator()
+	{
+		return *this;
+	}
+
+	inline const Allocator &get_allocator() const
+	{
+		return *this;
+	}
+
+	/// Add element to the back of the array
+	inline void				push_back(const T &inValue)
+	{
+		JPH_ASSERT(&inValue < mElements || &inValue >= mElements + mSize, "Can't pass an element from the array to push_back");
+
+		grow();
+
+		T *element = mElements + mSize++;
+		new (element) T(inValue);
+	}
+
+	inline void				push_back(T &&inValue)
+	{
+		grow();
+
+		T *element = mElements + mSize++;
+		new (element) T(std::move(inValue));
+	}
+
+	/// Construct element at the back of the array
+	template <class... A>
+	inline T &				emplace_back(A &&... inValue)
+	{
+		grow();
+
+		T *element = mElements + mSize++;
+		new (element) T(std::forward<A>(inValue)...);
+		return *element;
+	}
+
+	/// Remove element from the back of the array
+	inline void				pop_back()
+	{
+		JPH_ASSERT(mSize > 0);
+		mElements[--mSize].~T();
+	}
+
+	/// Returns true if there are no elements in the array
+	inline bool				empty() const
+	{
+		return mSize == 0;
+	}
+
+	/// Returns amount of elements in the array
+	inline size_type		size() const
+	{
+		return mSize;
+	}
+
+	/// Returns maximum amount of elements the array can hold
+	inline size_type		capacity() const
+	{
+		return mCapacity;
+	}
+
+	/// Reduce the capacity of the array to match its size
+	void					shrink_to_fit()
+	{
+		if (mElements != nullptr)
+		{
+			if (mSize == 0)
+				free();
+			else if (mCapacity > mSize)
+				reallocate(mSize);
+		}
+	}
+
+	/// Swap the contents of two arrays
+	void					swap(Array<T, Allocator> &inRHS) noexcept
+	{
+		std::swap(get_allocator(), inRHS.get_allocator());
+		std::swap(mSize, inRHS.mSize);
+		std::swap(mCapacity, inRHS.mCapacity);
+		std::swap(mElements, inRHS.mElements);
+	}
+
+	template <class Iterator>
+	void					insert(const_iterator inPos, Iterator inBegin, Iterator inEnd)
+	{
+		size_type num_elements = size_type(std::distance(inBegin, inEnd));
+		if (num_elements > 0)
+		{
+			// After grow() inPos may be invalid
+			size_type first_element = inPos - mElements;
+
+			grow(num_elements);
+
+			T *element_begin = mElements + first_element;
+			T *element_end = element_begin + num_elements;
+			move(element_end, element_begin, mSize - first_element);
+
+			for (T *element = element_begin; element < element_end; ++element, ++inBegin)
+				new (element) T(*inBegin);
+
+			mSize += num_elements;
+		}
+	}
+
+	void					insert(const_iterator inPos, const T &inValue)
+	{
+		JPH_ASSERT(&inValue < mElements || &inValue >= mElements + mSize, "Can't pass an element from the array to insert");
+
+		// After grow() inPos may be invalid
+		size_type first_element = inPos - mElements;
+
+		grow();
+
+		T *element = mElements + first_element;
+		move(element + 1, element, mSize - first_element);
+
+		new (element) T(inValue);
+		mSize++;
+	}
+
+	/// Remove one element from the array
+	iterator				erase(const_iterator inIter)
+	{
+		size_type p = size_type(inIter - begin());
+		JPH_ASSERT(p < mSize);
+		mElements[p].~T();
+		if (p + 1 < mSize)
+			move(mElements + p, mElements + p + 1, mSize - p - 1);
+		--mSize;
+		return const_cast<iterator>(inIter);
+	}
+
+	/// Remove multiple element from the array
+	iterator				erase(const_iterator inBegin, const_iterator inEnd)
+	{
+		size_type p = size_type(inBegin - begin());
+		size_type n = size_type(inEnd - inBegin);
+		JPH_ASSERT(inEnd <= end());
+		destruct(p, p + n);
+		if (p + n < mSize)
+			move(mElements + p, mElements + p + n, mSize - p - n);
+		mSize -= n;
+		return const_cast<iterator>(inBegin);
+	}
+
+	/// Iterators
+	inline const_iterator	begin() const
+	{
+		return mElements;
+	}
+
+	inline const_iterator	end() const
+	{
+		return mElements + mSize;
+	}
+
+	inline crev_it			rbegin() const
+	{
+		return crev_it(mElements + mSize - 1);
+	}
+
+	inline crev_it			rend() const
+	{
+		return crev_it(mElements - 1);
+	}
+
+	inline const_iterator	cbegin() const
+	{
+		return begin();
+	}
+
+	inline const_iterator	cend() const
+	{
+		return end();
+	}
+
+	inline crev_it			crbegin() const
+	{
+		return rbegin();
+	}
+
+	inline crev_it			crend() const
+	{
+		return rend();
+	}
+
+	inline iterator			begin()
+	{
+		return mElements;
+	}
+
+	inline iterator			end()
+	{
+		return mElements + mSize;
+	}
+
+	inline rev_it			rbegin()
+	{
+		return rev_it(mElements + mSize - 1);
+	}
+
+	inline rev_it			rend()
+	{
+		return rev_it(mElements - 1);
+	}
+
+	inline const T *		data() const
+	{
+		return mElements;
+	}
+
+	inline T *				data()
+	{
+		return mElements;
+	}
+
+	/// Access element
+	inline T &				operator [] (size_type inIdx)
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return mElements[inIdx];
+	}
+
+	inline const T &		operator [] (size_type inIdx) const
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return mElements[inIdx];
+	}
+
+	/// Access element
+	inline T &				at(size_type inIdx)
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return mElements[inIdx];
+	}
+
+	inline const T &		at(size_type inIdx) const
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return mElements[inIdx];
+	}
+
+	/// First element in the array
+	inline const T &		front() const
+	{
+		JPH_ASSERT(mSize > 0);
+		return mElements[0];
+	}
+
+	inline T &				front()
+	{
+		JPH_ASSERT(mSize > 0);
+		return mElements[0];
+	}
+
+	/// Last element in the array
+	inline const T &		back() const
+	{
+		JPH_ASSERT(mSize > 0);
+		return mElements[mSize - 1];
+	}
+
+	inline T &				back()
+	{
+		JPH_ASSERT(mSize > 0);
+		return mElements[mSize - 1];
+	}
+
+	/// Assignment operator
+	Array<T, Allocator> &	operator = (const Array<T, Allocator> &inRHS)
+	{
+		if (static_cast<const void *>(this) != static_cast<const void *>(&inRHS))
+			assign(inRHS.begin(), inRHS.end());
+
+		return *this;
+	}
+
+	/// Assignment move operator
+	Array<T, Allocator> &	operator = (Array<T, Allocator> &&inRHS) noexcept
+	{
+		if (static_cast<const void *>(this) != static_cast<const void *>(&inRHS))
+		{
+			destroy();
+
+			get_allocator() = std::move(inRHS.get_allocator());
+
+			mSize = inRHS.mSize;
+			mCapacity = inRHS.mCapacity;
+			mElements = inRHS.mElements;
+
+			inRHS.mSize = 0;
+			inRHS.mCapacity = 0;
+			inRHS.mElements = nullptr;
+		}
+
+		return *this;
+	}
+
+	/// Assignment operator
+	Array<T, Allocator> &	operator = (std::initializer_list<T> inRHS)
+	{
+		assign(inRHS);
+
+		return *this;
+	}
+
+	/// Comparing arrays
+	bool					operator == (const Array<T, Allocator> &inRHS) const
+	{
+		if (mSize != inRHS.mSize)
+			return false;
+		for (size_type i = 0; i < mSize; ++i)
+			if (!(mElements[i] == inRHS.mElements[i]))
+				return false;
+		return true;
+	}
+
+	bool					operator != (const Array<T, Allocator> &inRHS) const
+	{
+		if (mSize != inRHS.mSize)
+			return true;
+		for (size_type i = 0; i < mSize; ++i)
+			if (mElements[i] != inRHS.mElements[i])
+				return true;
+		return false;
+	}
+
+	/// Get hash for this array
+	uint64					GetHash() const
+	{
+		// Hash length first
+		uint64 ret = Hash<uint32> { } (uint32(size()));
+
+		// Then hash elements
+		for (const T *element = mElements, *element_end = mElements + mSize; element < element_end; ++element)
+			HashCombine(ret, *element);
+
+		return ret;
+	}
+
+private:
+	size_type				mSize = 0;
+	size_type				mCapacity = 0;
+	T *						mElements = nullptr;
+};
+
+JPH_NAMESPACE_END
+
+JPH_SUPPRESS_WARNING_PUSH
+JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat")
+
+namespace std
+{
+	/// Declare std::hash for Array
+	template <class T, class Allocator>
+	struct hash<JPH::Array<T, Allocator>>
+	{
+		size_t operator () (const JPH::Array<T, Allocator> &inRHS) const
+		{
+			return std::size_t(inRHS.GetHash());
+		}
+	};
+}
+
+JPH_SUPPRESS_WARNING_POP
+
+#endif // JPH_USE_STD_VECTOR
--- a/thirdparty/jolt_physics/Jolt/Core/Atomics.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Atomics.h
@@ -0,0 +1,44 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <atomic>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+// Things we're using from STL
+using std::atomic;
+using std::memory_order;
+using std::memory_order_relaxed;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+
+/// Atomically compute the min(ioAtomic, inValue) and store it in ioAtomic, returns true if value was updated
+template <class T>
+bool AtomicMin(atomic<T> &ioAtomic, const T inValue, const memory_order inMemoryOrder = memory_order_seq_cst)
+{
+	T cur_value = ioAtomic.load(memory_order_relaxed);
+	while (cur_value > inValue)
+		if (ioAtomic.compare_exchange_weak(cur_value, inValue, inMemoryOrder))
+			return true;
+	return false;
+}
+
+/// Atomically compute the max(ioAtomic, inValue) and store it in ioAtomic, returns true if value was updated
+template <class T>
+bool AtomicMax(atomic<T> &ioAtomic, const T inValue, const memory_order inMemoryOrder = memory_order_seq_cst)
+{
+	T cur_value = ioAtomic.load(memory_order_relaxed);
+	while (cur_value < inValue)
+		if (ioAtomic.compare_exchange_weak(cur_value, inValue, inMemoryOrder))
+			return true;
+	return false;
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/BinaryHeap.h
+++ b/thirdparty/jolt_physics/Jolt/Core/BinaryHeap.h
@@ -0,0 +1,96 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Push a new element into a binary max-heap.
+/// [inBegin, inEnd - 1) must be a a valid heap. Element inEnd - 1 will be inserted into the heap. The heap will be [inBegin, inEnd) after this call.
+/// inPred is a function that returns true if the first element is less or equal than the second element.
+/// See: https://en.wikipedia.org/wiki/Binary_heap
+template <typename Iterator, typename Pred>
+void BinaryHeapPush(Iterator inBegin, Iterator inEnd, Pred inPred)
+{
+	using diff_t = typename std::iterator_traits<Iterator>::difference_type;
+	using elem_t = typename std::iterator_traits<Iterator>::value_type;
+
+	// New heap size
+	diff_t count = std::distance(inBegin, inEnd);
+
+	// Start from the last element
+	diff_t current = count - 1;
+	while (current > 0)
+	{
+		// Get current element
+		elem_t &current_elem = *(inBegin + current);
+
+		// Get parent element
+		diff_t parent = (current - 1) >> 1;
+		elem_t &parent_elem = *(inBegin + parent);
+
+		// Sort them so that the parent is larger than the child
+		if (inPred(parent_elem, current_elem))
+		{
+			std::swap(parent_elem, current_elem);
+			current = parent;
+		}
+		else
+		{
+			// When there's no change, we're done
+			break;
+		}
+	}
+}
+
+/// Pop an element from a binary max-heap.
+/// [inBegin, inEnd) must be a valid heap. The largest element will be removed from the heap. The heap will be [inBegin, inEnd - 1) after this call.
+/// inPred is a function that returns true if the first element is less or equal than the second element.
+/// See: https://en.wikipedia.org/wiki/Binary_heap
+template <typename Iterator, typename Pred>
+void BinaryHeapPop(Iterator inBegin, Iterator inEnd, Pred inPred)
+{
+	using diff_t = typename std::iterator_traits<Iterator>::difference_type;
+
+	// Begin by moving the highest element to the end, this is the popped element
+	std::swap(*(inEnd - 1), *inBegin);
+
+	// New heap size
+	diff_t count = std::distance(inBegin, inEnd) - 1;
+
+	// Start from the root
+	diff_t largest = 0;
+	for (;;)
+	{
+		// Get first child
+		diff_t child = (largest << 1) + 1;
+
+		// Check if we're beyond the end of the heap, if so the 2nd child is also beyond the end
+		if (child >= count)
+			break;
+
+		// Remember the largest element from the previous iteration
+		diff_t prev_largest = largest;
+
+		// Check if first child is bigger, if so select it
+		if (inPred(*(inBegin + largest), *(inBegin + child)))
+			largest = child;
+
+		// Switch to the second child
+		++child;
+
+		// Check if second child is bigger, if so select it
+		if (child < count && inPred(*(inBegin + largest), *(inBegin + child)))
+			largest = child;
+
+		// If there was no change, we're done
+		if (prev_largest == largest)
+			break;
+
+		// Swap element
+		std::swap(*(inBegin + prev_largest), *(inBegin + largest));
+	}
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/ByteBuffer.h
+++ b/thirdparty/jolt_physics/Jolt/Core/ByteBuffer.h
@@ -0,0 +1,74 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/STLAlignedAllocator.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Underlying data type for ByteBuffer
+using ByteBufferVector = Array<uint8, STLAlignedAllocator<uint8, JPH_CACHE_LINE_SIZE>>;
+
+/// Simple byte buffer, aligned to a cache line
+class ByteBuffer : public ByteBufferVector
+{
+public:
+	/// Align the size to a multiple of inSize, returns the length after alignment
+	size_t			Align(size_t inSize)
+	{
+		// Assert power of 2
+		JPH_ASSERT(IsPowerOf2(inSize));
+
+		// Calculate new size and resize buffer
+		size_t s = AlignUp(size(), inSize);
+		resize(s, 0);
+
+		return s;
+	}
+
+	/// Allocate block of data of inSize elements and return the pointer
+	template <class Type>
+	Type *			Allocate(size_t inSize = 1)
+	{
+		// Reserve space
+		size_t s = size();
+		resize(s + inSize * sizeof(Type));
+
+		// Get data pointer
+		Type *data = reinterpret_cast<Type *>(&at(s));
+
+		// Construct elements
+		for (Type *d = data, *d_end = data + inSize; d < d_end; ++d)
+			new (d) Type;
+
+		// Return pointer
+		return data;
+	}
+
+	/// Append inData to the buffer
+	template <class Type>
+	void			AppendVector(const Array<Type> &inData)
+	{
+		size_t size = inData.size() * sizeof(Type);
+		uint8 *data = Allocate<uint8>(size);
+		memcpy(data, &inData[0], size);
+	}
+
+	/// Get object at inPosition (an offset in bytes)
+	template <class Type>
+	const Type *	Get(size_t inPosition) const
+	{
+		return reinterpret_cast<const Type *>(&at(inPosition));
+	}
+
+	/// Get object at inPosition (an offset in bytes)
+	template <class Type>
+	Type *			Get(size_t inPosition)
+	{
+		return reinterpret_cast<Type *>(&at(inPosition));
+	}
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Color.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/Color.cpp
@@ -0,0 +1,38 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/Color.h>
+
+JPH_NAMESPACE_BEGIN
+
+// Predefined colors
+const Color Color::sBlack(0, 0, 0);
+const Color Color::sDarkRed(128, 0, 0);
+const Color Color::sRed(255, 0, 0);
+const Color Color::sDarkGreen(0, 128, 0);
+const Color Color::sGreen(0, 255, 0);
+const Color Color::sDarkBlue(0, 0, 128);
+const Color Color::sBlue(0, 0, 255);
+const Color Color::sYellow(255, 255, 0);
+const Color Color::sPurple(255, 0, 255);
+const Color Color::sCyan(0, 255, 255);
+const Color Color::sOrange(255, 128, 0);
+const Color Color::sDarkOrange(128, 64, 0);
+const Color Color::sGrey(128, 128, 128);
+const Color Color::sLightGrey(192, 192, 192);
+const Color Color::sWhite(255, 255, 255);
+
+// Generated by: http://phrogz.net/css/distinct-colors.html (this algo: https://en.wikipedia.org/wiki/Color_difference#CMC_l:c_.281984.29)
+static constexpr Color sColors[] = { Color(255, 0, 0), Color(204, 143, 102), Color(226, 242, 0), Color(41, 166, 124), Color(0, 170, 255), Color(69, 38, 153), Color(153, 38, 130), Color(229, 57, 80), Color(204, 0, 0), Color(255, 170, 0), Color(85, 128, 0), Color(64, 255, 217), Color(0, 75, 140), Color(161, 115, 230), Color(242, 61, 157), Color(178, 101, 89), Color(140, 94, 0), Color(181, 217, 108), Color(64, 242, 255), Color(77, 117, 153), Color(157, 61, 242), Color(140, 0, 56), Color(127, 57, 32), Color(204, 173, 51), Color(64, 255, 64), Color(38, 145, 153), Color(0, 102, 255), Color(242, 0, 226), Color(153, 77, 107), Color(229, 92, 0), Color(140, 126, 70), Color(0, 179, 71), Color(0, 194, 242), Color(27, 0, 204), Color(230, 115, 222), Color(127, 0, 17) };
+
+Color Color::sGetDistinctColor(int inIndex)
+{
+	JPH_ASSERT(inIndex >= 0);
+
+	return sColors[inIndex % (sizeof(sColors) / sizeof(uint32))];
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Color.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Color.h
@@ -0,0 +1,84 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+class Color;
+
+/// Type to use for passing arguments to a function
+using ColorArg = Color;
+
+/// Class that holds an RGBA color with 8-bits per component
+class JPH_EXPORT_GCC_BUG_WORKAROUND [[nodiscard]] Color
+{
+public:
+	/// Constructors
+							Color() = default; ///< Intentionally not initialized for performance reasons
+							Color(const Color &inRHS) = default;
+	Color &					operator = (const Color &inRHS) = default;
+	explicit constexpr		Color(uint32 inColor)													: mU32(inColor) { }
+	constexpr				Color(uint8 inRed, uint8 inGreen, uint8 inBlue, uint8 inAlpha = 255)	: r(inRed), g(inGreen), b(inBlue), a(inAlpha) { }
+	constexpr				Color(ColorArg inRHS, uint8 inAlpha)									: r(inRHS.r), g(inRHS.g), b(inRHS.b), a(inAlpha) { }
+
+	/// Comparison
+	inline bool				operator == (ColorArg inRHS) const										{ return mU32 == inRHS.mU32; }
+	inline bool				operator != (ColorArg inRHS) const										{ return mU32 != inRHS.mU32; }
+
+	/// Convert to uint32
+	uint32					GetUInt32() const														{ return mU32; }
+
+	/// Element access, 0 = red, 1 = green, 2 = blue, 3 = alpha
+	inline uint8			operator () (uint inIdx) const											{ JPH_ASSERT(inIdx < 4); return (&r)[inIdx]; }
+	inline uint8 &			operator () (uint inIdx)												{ JPH_ASSERT(inIdx < 4); return (&r)[inIdx]; }
+
+	/// Multiply two colors
+	inline Color			operator * (const Color &inRHS) const									{ return Color(uint8((uint32(r) * inRHS.r) >> 8), uint8((uint32(g) * inRHS.g) >> 8), uint8((uint32(b) * inRHS.b) >> 8), uint8((uint32(a) * inRHS.a) >> 8)); }
+
+	/// Multiply color with intensity in the range [0, 1]
+	inline Color			operator * (float inIntensity) const									{ return Color(uint8(r * inIntensity), uint8(g * inIntensity), uint8(b * inIntensity), a); }
+
+	/// Convert to Vec4 with range [0, 1]
+	inline Vec4				ToVec4() const															{ return Vec4(r, g, b, a) / 255.0f; }
+
+	/// Get grayscale intensity of color
+	inline uint8			GetIntensity() const													{ return uint8((uint32(r) * 54 + g * 183 + b * 19) >> 8); }
+
+	/// Get a visually distinct color
+	static Color			sGetDistinctColor(int inIndex);
+
+	/// Predefined colors
+	static const Color		sBlack;
+	static const Color		sDarkRed;
+	static const Color		sRed;
+	static const Color		sDarkGreen;
+	static const Color		sGreen;
+	static const Color		sDarkBlue;
+	static const Color		sBlue;
+	static const Color		sYellow;
+	static const Color		sPurple;
+	static const Color		sCyan;
+	static const Color		sOrange;
+	static const Color		sDarkOrange;
+	static const Color		sGrey;
+	static const Color		sLightGrey;
+	static const Color		sWhite;
+
+	union
+	{
+		uint32				mU32;																	///< Combined value for red, green, blue and alpha
+		struct
+		{
+			uint8			r;																		///< Red channel
+			uint8			g;																		///< Green channel
+			uint8			b;																		///< Blue channel
+			uint8			a;																		///< Alpha channel
+		};
+	};
+};
+
+static_assert(std::is_trivial<Color>(), "Is supposed to be a trivial type!");
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Core.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Core.h
@@ -0,0 +1,634 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+// Jolt library version
+#define JPH_VERSION_MAJOR 5
+#define JPH_VERSION_MINOR 3
+#define JPH_VERSION_PATCH 0
+
+// Determine which features the library was compiled with
+#ifdef JPH_DOUBLE_PRECISION
+	#define JPH_VERSION_FEATURE_BIT_1 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_1 0
+#endif
+#ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
+	#define JPH_VERSION_FEATURE_BIT_2 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_2 0
+#endif
+#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
+	#define JPH_VERSION_FEATURE_BIT_3 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_3 0
+#endif
+#ifdef JPH_PROFILE_ENABLED
+	#define JPH_VERSION_FEATURE_BIT_4 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_4 0
+#endif
+#ifdef JPH_EXTERNAL_PROFILE
+	#define JPH_VERSION_FEATURE_BIT_5 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_5 0
+#endif
+#ifdef JPH_DEBUG_RENDERER
+	#define JPH_VERSION_FEATURE_BIT_6 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_6 0
+#endif
+#ifdef JPH_DISABLE_TEMP_ALLOCATOR
+	#define JPH_VERSION_FEATURE_BIT_7 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_7 0
+#endif
+#ifdef JPH_DISABLE_CUSTOM_ALLOCATOR
+	#define JPH_VERSION_FEATURE_BIT_8 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_8 0
+#endif
+#if defined(JPH_OBJECT_LAYER_BITS) && JPH_OBJECT_LAYER_BITS == 32
+	#define JPH_VERSION_FEATURE_BIT_9 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_9 0
+#endif
+#ifdef JPH_ENABLE_ASSERTS
+	#define JPH_VERSION_FEATURE_BIT_10 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_10 0
+#endif
+#ifdef JPH_OBJECT_STREAM
+	#define JPH_VERSION_FEATURE_BIT_11 1
+#else
+	#define JPH_VERSION_FEATURE_BIT_11 0
+#endif
+#define JPH_VERSION_FEATURES (uint64(JPH_VERSION_FEATURE_BIT_1) | (JPH_VERSION_FEATURE_BIT_2 << 1) | (JPH_VERSION_FEATURE_BIT_3 << 2) | (JPH_VERSION_FEATURE_BIT_4 << 3) | (JPH_VERSION_FEATURE_BIT_5 << 4) | (JPH_VERSION_FEATURE_BIT_6 << 5) | (JPH_VERSION_FEATURE_BIT_7 << 6) | (JPH_VERSION_FEATURE_BIT_8 << 7) | (JPH_VERSION_FEATURE_BIT_9 << 8) | (JPH_VERSION_FEATURE_BIT_10 << 9) | (JPH_VERSION_FEATURE_BIT_11 << 10))
+
+// Combine the version and features in a single ID
+#define JPH_VERSION_ID ((JPH_VERSION_FEATURES << 24) | (JPH_VERSION_MAJOR << 16) | (JPH_VERSION_MINOR << 8) | JPH_VERSION_PATCH)
+
+// Determine platform
+#if defined(JPH_PLATFORM_BLUE)
+	// Correct define already defined, this overrides everything else
+#elif defined(_WIN32) || defined(_WIN64)
+	#include <winapifamily.h>
+	#if WINAPI_FAMILY == WINAPI_FAMILY_APP
+		#define JPH_PLATFORM_WINDOWS_UWP // Building for Universal Windows Platform
+	#endif
+	#define JPH_PLATFORM_WINDOWS
+#elif defined(__ANDROID__) // Android is linux too, so that's why we check it first
+	#define JPH_PLATFORM_ANDROID
+#elif defined(__linux__)
+	#define JPH_PLATFORM_LINUX
+#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
+	#define JPH_PLATFORM_BSD
+#elif defined(__APPLE__)
+	#include <TargetConditionals.h>
+	#if defined(TARGET_OS_IPHONE) && !TARGET_OS_IPHONE
+		#define JPH_PLATFORM_MACOS
+	#else
+		#define JPH_PLATFORM_IOS
+	#endif
+#elif defined(__EMSCRIPTEN__)
+	#define JPH_PLATFORM_WASM
+#endif
+
+// Platform helper macros
+#ifdef JPH_PLATFORM_ANDROID
+	#define JPH_IF_NOT_ANDROID(x)
+#else
+	#define JPH_IF_NOT_ANDROID(x) x
+#endif
+
+// Determine compiler
+#if defined(__clang__)
+	#define JPH_COMPILER_CLANG
+#elif defined(__GNUC__)
+	#define JPH_COMPILER_GCC
+#elif defined(_MSC_VER)
+	#define JPH_COMPILER_MSVC
+#endif
+
+#if defined(__MINGW64__) || defined (__MINGW32__)
+	#define JPH_COMPILER_MINGW
+#endif
+
+// Detect CPU architecture
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
+	// X86 CPU architecture
+	#define JPH_CPU_X86
+	#if defined(__x86_64__) || defined(_M_X64)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
+	#define JPH_USE_SSE
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 32
+
+	// Detect enabled instruction sets
+	#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && !defined(JPH_USE_AVX512)
+		#define JPH_USE_AVX512
+	#endif
+	#if (defined(__AVX2__) || defined(JPH_USE_AVX512)) && !defined(JPH_USE_AVX2)
+		#define JPH_USE_AVX2
+	#endif
+	#if (defined(__AVX__) || defined(JPH_USE_AVX2)) && !defined(JPH_USE_AVX)
+		#define JPH_USE_AVX
+	#endif
+	#if (defined(__SSE4_2__) || defined(JPH_USE_AVX)) && !defined(JPH_USE_SSE4_2)
+		#define JPH_USE_SSE4_2
+	#endif
+	#if (defined(__SSE4_1__) || defined(JPH_USE_SSE4_2)) && !defined(JPH_USE_SSE4_1)
+		#define JPH_USE_SSE4_1
+	#endif
+	#if (defined(__F16C__) || defined(JPH_USE_AVX2)) && !defined(JPH_USE_F16C)
+		#define JPH_USE_F16C
+	#endif
+	#if (defined(__LZCNT__) || defined(JPH_USE_AVX2)) && !defined(JPH_USE_LZCNT)
+		#define JPH_USE_LZCNT
+	#endif
+	#if (defined(__BMI__) || defined(JPH_USE_AVX2)) && !defined(JPH_USE_TZCNT)
+		#define JPH_USE_TZCNT
+	#endif
+	#ifndef JPH_CROSS_PLATFORM_DETERMINISTIC // FMA is not compatible with cross platform determinism
+		#if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC)
+			#if defined(__FMA__) && !defined(JPH_USE_FMADD)
+				#define JPH_USE_FMADD
+			#endif
+		#elif defined(JPH_COMPILER_MSVC)
+			#if defined(__AVX2__) && !defined(JPH_USE_FMADD) // AVX2 also enables fused multiply add
+				#define JPH_USE_FMADD
+			#endif
+		#else
+			#error Undefined compiler
+		#endif
+	#endif
+#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
+	// ARM CPU architecture
+	#define JPH_CPU_ARM
+	#if defined(__aarch64__) || defined(_M_ARM64)
+		#define JPH_CPU_ADDRESS_BITS 64
+		#define JPH_USE_NEON
+		#define JPH_VECTOR_ALIGNMENT 16
+		#define JPH_DVECTOR_ALIGNMENT 32
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+		#define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries
+		#define JPH_DVECTOR_ALIGNMENT 8
+	#endif
+#elif defined(__riscv)
+	// RISC-V CPU architecture
+	#define JPH_CPU_RISCV
+	#if __riscv_xlen == 64
+		#define JPH_CPU_ADDRESS_BITS 64
+		#define JPH_VECTOR_ALIGNMENT 16
+		#define JPH_DVECTOR_ALIGNMENT 32
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+		#define JPH_VECTOR_ALIGNMENT 16
+		#define JPH_DVECTOR_ALIGNMENT 8
+	#endif
+#elif defined(JPH_PLATFORM_WASM)
+	// WebAssembly CPU architecture
+	#define JPH_CPU_WASM
+	#if defined(__wasm64__)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 32
+	#ifdef __wasm_simd128__
+		#define JPH_USE_SSE
+		#define JPH_USE_SSE4_1
+		#define JPH_USE_SSE4_2
+	#endif
+#elif defined(__powerpc__) || defined(__powerpc64__)
+	// PowerPC CPU architecture
+	#define JPH_CPU_PPC
+	#if defined(__powerpc64__)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
+	#ifdef _BIG_ENDIAN
+		#define JPH_CPU_BIG_ENDIAN
+	#endif
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 8
+#elif defined(__loongarch__)
+	// LoongArch CPU architecture
+	#define JPH_CPU_LOONGARCH
+	#if defined(__loongarch64)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 8
+#elif defined(__e2k__)
+	// E2K CPU architecture (MCST Elbrus 2000)
+	#define JPH_CPU_E2K
+	#define JPH_CPU_ADDRESS_BITS 64
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 32
+
+	// Compiler flags on e2k arch determine CPU features
+	#if defined(__SSE__) && !defined(JPH_USE_SSE)
+		#define JPH_USE_SSE
+	#endif
+#else
+	#error Unsupported CPU architecture
+#endif
+
+// If this define is set, Jolt is compiled as a shared library
+#ifdef JPH_SHARED_LIBRARY
+	#ifdef JPH_BUILD_SHARED_LIBRARY
+		// While building the shared library, we must export these symbols
+		#if defined(JPH_PLATFORM_WINDOWS) && !defined(JPH_COMPILER_MINGW)
+			#define JPH_EXPORT __declspec(dllexport)
+		#else
+			#define JPH_EXPORT __attribute__ ((visibility ("default")))
+			#if defined(JPH_COMPILER_GCC)
+				// Prevents an issue with GCC attribute parsing (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69585)
+				#define JPH_EXPORT_GCC_BUG_WORKAROUND [[gnu::visibility("default")]]
+			#endif
+		#endif
+	#else
+		// When linking against Jolt, we must import these symbols
+		#if defined(JPH_PLATFORM_WINDOWS) && !defined(JPH_COMPILER_MINGW)
+			#define JPH_EXPORT __declspec(dllimport)
+		#else
+			#define JPH_EXPORT __attribute__ ((visibility ("default")))
+			#if defined(JPH_COMPILER_GCC)
+				// Prevents an issue with GCC attribute parsing (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69585)
+				#define JPH_EXPORT_GCC_BUG_WORKAROUND [[gnu::visibility("default")]]
+			#endif
+		#endif
+	#endif
+#else
+	// If the define is not set, we use static linking and symbols don't need to be imported or exported
+	#define JPH_EXPORT
+#endif
+
+#ifndef JPH_EXPORT_GCC_BUG_WORKAROUND
+	#define JPH_EXPORT_GCC_BUG_WORKAROUND JPH_EXPORT
+#endif
+
+// Macro used by the RTTI macros to not export a function
+#define JPH_NO_EXPORT
+
+// Pragmas to store / restore the warning state and to disable individual warnings
+#ifdef JPH_COMPILER_CLANG
+#define JPH_PRAGMA(x)					_Pragma(#x)
+#define JPH_SUPPRESS_WARNING_PUSH		JPH_PRAGMA(clang diagnostic push)
+#define JPH_SUPPRESS_WARNING_POP		JPH_PRAGMA(clang diagnostic pop)
+#define JPH_CLANG_SUPPRESS_WARNING(w)	JPH_PRAGMA(clang diagnostic ignored w)
+#if __clang_major__ >= 13
+	#define JPH_CLANG_13_PLUS_SUPPRESS_WARNING(w) JPH_CLANG_SUPPRESS_WARNING(w)
+#else
+	#define JPH_CLANG_13_PLUS_SUPPRESS_WARNING(w)
+#endif
+#if __clang_major__ >= 16
+	#define JPH_CLANG_16_PLUS_SUPPRESS_WARNING(w) JPH_CLANG_SUPPRESS_WARNING(w)
+#else
+	#define JPH_CLANG_16_PLUS_SUPPRESS_WARNING(w)
+#endif
+#else
+#define JPH_CLANG_SUPPRESS_WARNING(w)
+#define JPH_CLANG_13_PLUS_SUPPRESS_WARNING(w)
+#define JPH_CLANG_16_PLUS_SUPPRESS_WARNING(w)
+#endif
+#ifdef JPH_COMPILER_GCC
+#define JPH_PRAGMA(x)					_Pragma(#x)
+#define JPH_SUPPRESS_WARNING_PUSH		JPH_PRAGMA(GCC diagnostic push)
+#define JPH_SUPPRESS_WARNING_POP		JPH_PRAGMA(GCC diagnostic pop)
+#define JPH_GCC_SUPPRESS_WARNING(w)		JPH_PRAGMA(GCC diagnostic ignored w)
+#else
+#define JPH_GCC_SUPPRESS_WARNING(w)
+#endif
+#ifdef JPH_COMPILER_MSVC
+#define JPH_PRAGMA(x)					__pragma(x)
+#define JPH_SUPPRESS_WARNING_PUSH		JPH_PRAGMA(warning (push))
+#define JPH_SUPPRESS_WARNING_POP		JPH_PRAGMA(warning (pop))
+#define JPH_MSVC_SUPPRESS_WARNING(w)	JPH_PRAGMA(warning (disable : w))
+#if _MSC_VER >= 1920 && _MSC_VER < 1930
+	#define JPH_MSVC2019_SUPPRESS_WARNING(w) JPH_MSVC_SUPPRESS_WARNING(w)
+#else
+	#define JPH_MSVC2019_SUPPRESS_WARNING(w)
+#endif
+#else
+#define JPH_MSVC_SUPPRESS_WARNING(w)
+#define JPH_MSVC2019_SUPPRESS_WARNING(w)
+#endif
+
+// Disable common warnings triggered by Jolt when compiling with -Wall
+#define JPH_SUPPRESS_WARNINGS																	\
+	JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat")												\
+	JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic")										\
+	JPH_CLANG_SUPPRESS_WARNING("-Wfloat-equal")													\
+	JPH_CLANG_SUPPRESS_WARNING("-Wsign-conversion")												\
+	JPH_CLANG_SUPPRESS_WARNING("-Wold-style-cast")												\
+	JPH_CLANG_SUPPRESS_WARNING("-Wgnu-anonymous-struct")										\
+	JPH_CLANG_SUPPRESS_WARNING("-Wnested-anon-types")											\
+	JPH_CLANG_SUPPRESS_WARNING("-Wglobal-constructors")											\
+	JPH_CLANG_SUPPRESS_WARNING("-Wexit-time-destructors")										\
+	JPH_CLANG_SUPPRESS_WARNING("-Wnonportable-system-include-path")								\
+	JPH_CLANG_SUPPRESS_WARNING("-Wlanguage-extension-token")									\
+	JPH_CLANG_SUPPRESS_WARNING("-Wunused-parameter")											\
+	JPH_CLANG_SUPPRESS_WARNING("-Wformat-nonliteral")											\
+	JPH_CLANG_SUPPRESS_WARNING("-Wcovered-switch-default")										\
+	JPH_CLANG_SUPPRESS_WARNING("-Wcast-align")													\
+	JPH_CLANG_SUPPRESS_WARNING("-Winvalid-offsetof")											\
+	JPH_CLANG_SUPPRESS_WARNING("-Wgnu-zero-variadic-macro-arguments")							\
+	JPH_CLANG_SUPPRESS_WARNING("-Wdocumentation-unknown-command")								\
+	JPH_CLANG_SUPPRESS_WARNING("-Wctad-maybe-unsupported")										\
+	JPH_CLANG_SUPPRESS_WARNING("-Wswitch-default")												\
+	JPH_CLANG_13_PLUS_SUPPRESS_WARNING("-Wdeprecated-copy")										\
+	JPH_CLANG_13_PLUS_SUPPRESS_WARNING("-Wdeprecated-copy-with-dtor")							\
+	JPH_CLANG_16_PLUS_SUPPRESS_WARNING("-Wunsafe-buffer-usage")									\
+	JPH_IF_NOT_ANDROID(JPH_CLANG_SUPPRESS_WARNING("-Wimplicit-int-float-conversion"))			\
+																								\
+	JPH_GCC_SUPPRESS_WARNING("-Wcomment")														\
+	JPH_GCC_SUPPRESS_WARNING("-Winvalid-offsetof")												\
+	JPH_GCC_SUPPRESS_WARNING("-Wclass-memaccess")												\
+	JPH_GCC_SUPPRESS_WARNING("-Wpedantic")														\
+	JPH_GCC_SUPPRESS_WARNING("-Wunused-parameter")												\
+	JPH_GCC_SUPPRESS_WARNING("-Wmaybe-uninitialized")											\
+																								\
+	JPH_MSVC_SUPPRESS_WARNING(4619) /* #pragma warning: there is no warning number 'XXXX' */	\
+	JPH_MSVC_SUPPRESS_WARNING(4514) /* 'X' : unreferenced inline function has been removed */	\
+	JPH_MSVC_SUPPRESS_WARNING(4710) /* 'X' : function not inlined */							\
+	JPH_MSVC_SUPPRESS_WARNING(4711) /* function 'X' selected for automatic inline expansion */	\
+	JPH_MSVC_SUPPRESS_WARNING(4714) /* function 'X' marked as __forceinline not inlined */		\
+	JPH_MSVC_SUPPRESS_WARNING(4820) /* 'X': 'Y' bytes padding added after data member 'Z' */	\
+	JPH_MSVC_SUPPRESS_WARNING(4100) /* 'X' : unreferenced formal parameter */					\
+	JPH_MSVC_SUPPRESS_WARNING(4626) /* 'X' : assignment operator was implicitly defined as deleted because a base class assignment operator is inaccessible or deleted */ \
+	JPH_MSVC_SUPPRESS_WARNING(5027) /* 'X' : move assignment operator was implicitly defined as deleted because a base class move assignment operator is inaccessible or deleted */ \
+	JPH_MSVC_SUPPRESS_WARNING(4365) /* 'argument' : conversion from 'X' to 'Y', signed / unsigned mismatch */ \
+	JPH_MSVC_SUPPRESS_WARNING(4324) /* 'X' : structure was padded due to alignment specifier */ \
+	JPH_MSVC_SUPPRESS_WARNING(4625) /* 'X' : copy constructor was implicitly defined as deleted because a base class copy constructor is inaccessible or deleted */ \
+	JPH_MSVC_SUPPRESS_WARNING(5026) /* 'X': move constructor was implicitly defined as deleted because a base class move constructor is inaccessible or deleted */ \
+	JPH_MSVC_SUPPRESS_WARNING(4623) /* 'X' : default constructor was implicitly defined as deleted */ \
+	JPH_MSVC_SUPPRESS_WARNING(4201) /* nonstandard extension used: nameless struct/union */		\
+	JPH_MSVC_SUPPRESS_WARNING(4371) /* 'X': layout of class may have changed from a previous version of the compiler due to better packing of member 'Y' */ \
+	JPH_MSVC_SUPPRESS_WARNING(5045) /* Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified */ \
+	JPH_MSVC_SUPPRESS_WARNING(4583) /* 'X': destructor is not implicitly called */				\
+	JPH_MSVC_SUPPRESS_WARNING(4582) /* 'X': constructor is not implicitly called */				\
+	JPH_MSVC_SUPPRESS_WARNING(5219) /* implicit conversion from 'X' to 'Y', possible loss of data  */ \
+	JPH_MSVC_SUPPRESS_WARNING(4826) /* Conversion from 'X *' to 'JPH::uint64' is sign-extended. This may cause unexpected runtime behavior. (32-bit) */ \
+	JPH_MSVC_SUPPRESS_WARNING(5264) /* 'X': 'const' variable is not used */						\
+	JPH_MSVC_SUPPRESS_WARNING(4251) /* class 'X' needs to have DLL-interface to be used by clients of class 'Y' */ \
+	JPH_MSVC_SUPPRESS_WARNING(4738) /* storing 32-bit float result in memory, possible loss of performance */ \
+	JPH_MSVC2019_SUPPRESS_WARNING(5246) /* the initialization of a subobject should be wrapped in braces */
+
+// OS-specific includes
+#if defined(JPH_PLATFORM_WINDOWS)
+	#define JPH_BREAKPOINT		__debugbreak()
+#elif defined(JPH_PLATFORM_BLUE)
+	// Configuration for a popular game console.
+	// This file is not distributed because it would violate an NDA.
+	// Creating one should only be a couple of minutes of work if you have the documentation for the platform
+	// (you only need to define JPH_BREAKPOINT, JPH_PLATFORM_BLUE_GET_TICKS, JPH_PLATFORM_BLUE_MUTEX*, JPH_PLATFORM_BLUE_RWLOCK*, JPH_PLATFORM_BLUE_SEMAPHORE* and include the right header).
+	#include <Jolt/Core/PlatformBlue.h>
+#elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) || defined(JPH_PLATFORM_BSD)
+	#if defined(JPH_CPU_X86)
+		#define JPH_BREAKPOINT	__asm volatile ("int $0x3")
+	#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
+		#define JPH_BREAKPOINT	__builtin_trap()
+	#else
+		#error Unknown CPU architecture
+	#endif
+#elif defined(JPH_PLATFORM_WASM)
+	#define JPH_BREAKPOINT		do { } while (false) // Not supported
+#else
+	#error Unknown platform
+#endif
+
+// Begin the JPH namespace
+#define JPH_NAMESPACE_BEGIN																		\
+	JPH_SUPPRESS_WARNING_PUSH																	\
+	JPH_SUPPRESS_WARNINGS																		\
+	namespace JPH {
+
+// End the JPH namespace
+#define JPH_NAMESPACE_END																		\
+	}																							\
+	JPH_SUPPRESS_WARNING_POP
+
+// Suppress warnings generated by the standard template library
+#define JPH_SUPPRESS_WARNINGS_STD_BEGIN															\
+	JPH_SUPPRESS_WARNING_PUSH																	\
+	JPH_MSVC_SUPPRESS_WARNING(4365)																\
+	JPH_MSVC_SUPPRESS_WARNING(4619)																\
+	JPH_MSVC_SUPPRESS_WARNING(4710)																\
+	JPH_MSVC_SUPPRESS_WARNING(4711)																\
+	JPH_MSVC_SUPPRESS_WARNING(4820)																\
+	JPH_MSVC_SUPPRESS_WARNING(4514)																\
+	JPH_MSVC_SUPPRESS_WARNING(5262)																\
+	JPH_MSVC_SUPPRESS_WARNING(5264)																\
+	JPH_MSVC_SUPPRESS_WARNING(4738)																\
+	JPH_MSVC_SUPPRESS_WARNING(5045)
+
+#define JPH_SUPPRESS_WARNINGS_STD_END															\
+	JPH_SUPPRESS_WARNING_POP
+
+// Standard C++ includes
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <float.h>
+#include <limits.h>
+#include <string.h>
+#include <utility>
+#include <cmath>
+#include <sstream>
+#include <functional>
+#include <algorithm>
+#include <cstdint>
+#ifdef JPH_COMPILER_MSVC
+	#include <malloc.h> // for alloca
+#endif
+#if defined(JPH_USE_SSE)
+	#include <immintrin.h>
+#elif defined(JPH_USE_NEON)
+	#ifdef JPH_COMPILER_MSVC
+		#include <intrin.h>
+		#include <arm64_neon.h>
+	#else
+		#include <arm_neon.h>
+	#endif
+#endif
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+// Commonly used STL types
+using std::min;
+using std::max;
+using std::abs;
+using std::sqrt;
+using std::ceil;
+using std::floor;
+using std::trunc;
+using std::round;
+using std::fmod;
+using std::string_view;
+using std::function;
+using std::numeric_limits;
+using std::isfinite;
+using std::isnan;
+using std::ostream;
+using std::istream;
+
+// Standard types
+using uint = unsigned int;
+using uint8 = std::uint8_t;
+using uint16 = std::uint16_t;
+using uint32 = std::uint32_t;
+using uint64 = std::uint64_t;
+
+// Assert sizes of types
+static_assert(sizeof(uint) >= 4, "Invalid size of uint");
+static_assert(sizeof(uint8) == 1, "Invalid size of uint8");
+static_assert(sizeof(uint16) == 2, "Invalid size of uint16");
+static_assert(sizeof(uint32) == 4, "Invalid size of uint32");
+static_assert(sizeof(uint64) == 8, "Invalid size of uint64");
+static_assert(sizeof(void *) == (JPH_CPU_ADDRESS_BITS == 64? 8 : 4), "Invalid size of pointer" );
+
+// Determine if we want extra debugging code to be active
+#if !defined(NDEBUG) && !defined(JPH_NO_DEBUG)
+	#define JPH_DEBUG
+#endif
+
+// Define inline macro
+#if defined(JPH_NO_FORCE_INLINE)
+	#define JPH_INLINE inline
+#elif defined(JPH_COMPILER_CLANG)
+	#define JPH_INLINE __inline__ __attribute__((always_inline))
+#elif defined(JPH_COMPILER_GCC)
+	// On gcc 14 using always_inline in debug mode causes error: "inlining failed in call to 'always_inline' 'XXX': function not considered for inlining"
+	// See: https://github.com/jrouwe/JoltPhysics/issues/1096
+	#if __GNUC__ >= 14 && defined(JPH_DEBUG)
+		#define JPH_INLINE inline
+	#else
+		#define JPH_INLINE __inline__ __attribute__((always_inline))
+	#endif
+#elif defined(JPH_COMPILER_MSVC)
+	#define JPH_INLINE __forceinline
+#else
+	#error Undefined
+#endif
+
+// Cache line size (used for aligning to cache line)
+#ifndef JPH_CACHE_LINE_SIZE
+	#define JPH_CACHE_LINE_SIZE 64
+#endif
+
+// Define macro to get current function name
+#if defined(JPH_COMPILER_CLANG) || defined(JPH_COMPILER_GCC)
+	#define JPH_FUNCTION_NAME	__PRETTY_FUNCTION__
+#elif defined(JPH_COMPILER_MSVC)
+	#define JPH_FUNCTION_NAME	__FUNCTION__
+#else
+	#error Undefined
+#endif
+
+// Stack allocation
+#define JPH_STACK_ALLOC(n)		alloca(n)
+
+// Shorthand for #ifdef JPH_DEBUG / #endif
+#ifdef JPH_DEBUG
+	#define JPH_IF_DEBUG(...)	__VA_ARGS__
+	#define JPH_IF_NOT_DEBUG(...)
+#else
+	#define JPH_IF_DEBUG(...)
+	#define JPH_IF_NOT_DEBUG(...) __VA_ARGS__
+#endif
+
+// Shorthand for #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED / #endif
+#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
+	#define JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(...)	__VA_ARGS__
+#else
+	#define JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(...)
+#endif
+
+// Helper macros to detect if we're running in single or double precision mode
+#ifdef JPH_DOUBLE_PRECISION
+	#define JPH_IF_SINGLE_PRECISION(...)
+	#define JPH_IF_SINGLE_PRECISION_ELSE(s, d) d
+	#define JPH_IF_DOUBLE_PRECISION(...) __VA_ARGS__
+#else
+	#define JPH_IF_SINGLE_PRECISION(...) __VA_ARGS__
+	#define JPH_IF_SINGLE_PRECISION_ELSE(s, d) s
+	#define JPH_IF_DOUBLE_PRECISION(...)
+#endif
+
+// Helper macro to detect if the debug renderer is active
+#ifdef JPH_DEBUG_RENDERER
+	#define JPH_IF_DEBUG_RENDERER(...) __VA_ARGS__
+	#define JPH_IF_NOT_DEBUG_RENDERER(...)
+#else
+	#define JPH_IF_DEBUG_RENDERER(...)
+	#define JPH_IF_NOT_DEBUG_RENDERER(...) __VA_ARGS__
+#endif
+
+// Macro to indicate that a parameter / variable is unused
+#define JPH_UNUSED(x)			(void)x
+
+// Macro to enable floating point precise mode and to disable fused multiply add instructions
+#if defined(JPH_COMPILER_GCC) || defined(JPH_CROSS_PLATFORM_DETERMINISTIC)
+	// We compile without -ffast-math and -ffp-contract=fast, so we don't need to disable anything
+	#define JPH_PRECISE_MATH_ON
+	#define JPH_PRECISE_MATH_OFF
+#elif defined(JPH_COMPILER_CLANG)
+	// We compile without -ffast-math because pragma float_control(precise, on) doesn't seem to actually negate all of the -ffast-math effects and causes the unit tests to fail (even if the pragma is added to all files)
+	// On clang 14 and later we can turn off float contraction through a pragma (before it was buggy), so if FMA is on we can disable it through this macro
+	#if (defined(JPH_CPU_ARM) && !defined(JPH_PLATFORM_ANDROID) && __clang_major__ >= 16) || (defined(JPH_CPU_X86) && __clang_major__ >= 14)
+		#define JPH_PRECISE_MATH_ON						\
+			_Pragma("float_control(precise, on, push)")	\
+			_Pragma("clang fp contract(off)")
+		#define JPH_PRECISE_MATH_OFF					\
+			_Pragma("float_control(pop)")
+	#elif __clang_major__ >= 14 && (defined(JPH_USE_FMADD) || defined(FP_FAST_FMA))
+		#define JPH_PRECISE_MATH_ON						\
+			_Pragma("clang fp contract(off)")
+		#define JPH_PRECISE_MATH_OFF					\
+			_Pragma("clang fp contract(on)")
+	#else
+		#define JPH_PRECISE_MATH_ON
+		#define JPH_PRECISE_MATH_OFF
+	#endif
+#elif defined(JPH_COMPILER_MSVC)
+	// Unfortunately there is no way to push the state of fp_contract, so we have to assume it was turned on before JPH_PRECISE_MATH_ON
+	#define JPH_PRECISE_MATH_ON							\
+		__pragma(float_control(precise, on, push))		\
+		__pragma(fp_contract(off))
+	#define JPH_PRECISE_MATH_OFF						\
+		__pragma(fp_contract(on))						\
+		__pragma(float_control(pop))
+#else
+	#error Undefined
+#endif
+
+// Check if Thread Sanitizer is enabled
+#ifdef __has_feature
+	#if __has_feature(thread_sanitizer)
+		#define JPH_TSAN_ENABLED
+	#endif
+#else
+	#ifdef __SANITIZE_THREAD__
+		#define JPH_TSAN_ENABLED
+	#endif
+#endif
+
+// Attribute to disable Thread Sanitizer for a particular function
+#ifdef JPH_TSAN_ENABLED
+	#define JPH_TSAN_NO_SANITIZE __attribute__((no_sanitize("thread")))
+#else
+	#define JPH_TSAN_NO_SANITIZE
+#endif
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/FPControlWord.h
+++ b/thirdparty/jolt_physics/Jolt/Core/FPControlWord.h
@@ -0,0 +1,143 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+#if defined(JPH_CPU_WASM)
+
+// Not supported
+
+#elif defined(JPH_USE_SSE)
+
+/// Helper class that needs to be put on the stack to update the state of the floating point control word.
+/// This state is kept per thread.
+template <uint Value, uint Mask>
+class FPControlWord : public NonCopyable
+{
+public:
+				FPControlWord()
+	{
+		mPrevState = _mm_getcsr();
+		_mm_setcsr((mPrevState & ~Mask) | Value);
+	}
+
+				~FPControlWord()
+	{
+		_mm_setcsr((_mm_getcsr() & ~Mask) | (mPrevState & Mask));
+	}
+
+private:
+	uint		mPrevState;
+};
+
+#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)
+
+/// Helper class that needs to be put on the stack to update the state of the floating point control word.
+/// This state is kept per thread.
+template <unsigned int Value, unsigned int Mask>
+class FPControlWord : public NonCopyable
+{
+public:
+				FPControlWord()
+	{
+		// Read state before change
+		_controlfp_s(&mPrevState, 0, 0);
+
+		// Update the state
+		unsigned int dummy;
+		_controlfp_s(&dummy, Value, Mask);
+	}
+
+				~FPControlWord()
+	{
+		// Restore state
+		unsigned int dummy;
+		_controlfp_s(&dummy, mPrevState, Mask);
+	}
+
+private:
+	unsigned int mPrevState;
+};
+
+#elif defined(JPH_CPU_ARM) && defined(JPH_USE_NEON)
+
+/// Helper class that needs to be put on the stack to update the state of the floating point control word.
+/// This state is kept per thread.
+template <uint64 Value, uint64 Mask>
+class FPControlWord : public NonCopyable
+{
+public:
+				FPControlWord()
+	{
+		uint64 val;
+		asm volatile("mrs %0, fpcr" : "=r" (val));
+		mPrevState = val;
+		val &= ~Mask;
+		val |= Value;
+		asm volatile("msr fpcr, %0" : /* no output */ : "r" (val));
+	}
+
+				~FPControlWord()
+	{
+		uint64 val;
+		asm volatile("mrs %0, fpcr" : "=r" (val));
+		val &= ~Mask;
+		val |= mPrevState & Mask;
+		asm volatile("msr fpcr, %0" : /* no output */ : "r" (val));
+	}
+
+private:
+	uint64		mPrevState;
+};
+
+#elif defined(JPH_CPU_ARM)
+
+/// Helper class that needs to be put on the stack to update the state of the floating point control word.
+/// This state is kept per thread.
+template <uint32 Value, uint32 Mask>
+class FPControlWord : public NonCopyable
+{
+public:
+	FPControlWord()
+	{
+		uint32 val;
+		asm volatile("vmrs %0, fpscr" : "=r" (val));
+		mPrevState = val;
+		val &= ~Mask;
+		val |= Value;
+		asm volatile("vmsr fpscr, %0" : /* no output */ : "r" (val));
+	}
+
+	~FPControlWord()
+	{
+		uint32 val;
+		asm volatile("vmrs %0, fpscr" : "=r" (val));
+		val &= ~Mask;
+		val |= mPrevState & Mask;
+		asm volatile("vmsr fpscr, %0" : /* no output */ : "r" (val));
+	}
+
+private:
+	uint32		mPrevState;
+};
+
+#elif defined(JPH_CPU_RISCV)
+
+// RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions.
+
+#elif defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
+
+// Not implemented right now
+
+#else
+
+#error Unsupported CPU architecture
+
+#endif
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/FPException.h
+++ b/thirdparty/jolt_physics/Jolt/Core/FPException.h
@@ -0,0 +1,96 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/FPControlWord.h>
+
+JPH_NAMESPACE_BEGIN
+
+#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
+
+#if defined(JPH_CPU_WASM)
+
+// Not supported
+class FPExceptionsEnable { };
+class FPExceptionDisableInvalid { };
+class FPExceptionDisableDivByZero { };
+class FPExceptionDisableOverflow { };
+
+#elif defined(JPH_USE_SSE)
+
+/// Enable floating point divide by zero exception, overflow exceptions and exceptions on invalid numbers
+class FPExceptionsEnable : public FPControlWord<0, _MM_MASK_DIV_ZERO | _MM_MASK_INVALID | _MM_MASK_OVERFLOW> { };
+
+/// Disable invalid floating point value exceptions
+class FPExceptionDisableInvalid : public FPControlWord<_MM_MASK_INVALID, _MM_MASK_INVALID> { };
+
+/// Disable division by zero floating point exceptions
+class FPExceptionDisableDivByZero : public FPControlWord<_MM_MASK_DIV_ZERO, _MM_MASK_DIV_ZERO> { };
+
+/// Disable floating point overflow exceptions
+class FPExceptionDisableOverflow : public FPControlWord<_MM_MASK_OVERFLOW, _MM_MASK_OVERFLOW> { };
+
+#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)
+
+/// Enable floating point divide by zero exception, overflow exceptions and exceptions on invalid numbers
+class FPExceptionsEnable : public FPControlWord<0, _EM_INVALID | _EM_ZERODIVIDE | _EM_OVERFLOW> { };
+
+/// Disable invalid floating point value exceptions
+class FPExceptionDisableInvalid : public FPControlWord<_EM_INVALID, _EM_INVALID> { };
+
+/// Disable division by zero floating point exceptions
+class FPExceptionDisableDivByZero : public FPControlWord<_EM_ZERODIVIDE, _EM_ZERODIVIDE> { };
+
+/// Disable floating point overflow exceptions
+class FPExceptionDisableOverflow : public FPControlWord<_EM_OVERFLOW, _EM_OVERFLOW> { };
+
+#elif defined(JPH_CPU_ARM)
+
+/// Invalid operation exception bit
+static constexpr uint64 FP_IOE = 1 << 8;
+
+/// Enable divide by zero exception bit
+static constexpr uint64 FP_DZE = 1 << 9;
+
+/// Enable floating point overflow bit
+static constexpr uint64 FP_OFE = 1 << 10;
+
+/// Enable floating point divide by zero exception, overflow exceptions and exceptions on invalid numbers
+class FPExceptionsEnable : public FPControlWord<FP_IOE | FP_DZE | FP_OFE, FP_IOE | FP_DZE | FP_OFE> { };
+
+/// Disable invalid floating point value exceptions
+class FPExceptionDisableInvalid : public FPControlWord<0, FP_IOE> { };
+
+/// Disable division by zero floating point exceptions
+class FPExceptionDisableDivByZero : public FPControlWord<0, FP_DZE> { };
+
+/// Disable floating point overflow exceptions
+class FPExceptionDisableOverflow : public FPControlWord<0, FP_OFE> { };
+
+#elif defined(JPH_CPU_RISCV)
+
+#error "RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions. JPH_FLOATING_POINT_EXCEPTIONS_ENABLED must be disabled."
+
+#elif defined(JPH_CPU_PPC)
+
+#error PowerPC floating point exception handling to be implemented. JPH_FLOATING_POINT_EXCEPTIONS_ENABLED must be disabled.
+
+#else
+
+#error Unsupported CPU architecture
+
+#endif
+
+#else
+
+/// Dummy implementations
+class FPExceptionsEnable { };
+class FPExceptionDisableInvalid { };
+class FPExceptionDisableDivByZero { };
+class FPExceptionDisableOverflow { };
+
+#endif
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/FPFlushDenormals.h
+++ b/thirdparty/jolt_physics/Jolt/Core/FPFlushDenormals.h
@@ -0,0 +1,43 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/FPControlWord.h>
+
+JPH_NAMESPACE_BEGIN
+
+#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
+
+// Not supported
+class FPFlushDenormals { };
+
+#elif defined(JPH_USE_SSE)
+
+/// Helper class that needs to be put on the stack to enable flushing denormals to zero
+/// This can make floating point operations much faster when working with very small numbers
+class FPFlushDenormals : public FPControlWord<_MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_MASK> { };
+
+#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)
+
+/// Helper class that needs to be put on the stack to enable flushing denormals to zero
+/// This can make floating point operations much faster when working with very small numbers
+class FPFlushDenormals : public FPControlWord<_DN_FLUSH, _MCW_DN> { };
+
+#elif defined(JPH_CPU_ARM)
+
+/// Flush denormals to zero bit
+static constexpr uint64 FP_FZ = 1 << 24;
+
+/// Helper class that needs to be put on the stack to enable flushing denormals to zero
+/// This can make floating point operations much faster when working with very small numbers
+class FPFlushDenormals : public FPControlWord<FP_FZ, FP_FZ> { };
+
+#else
+
+#error Unsupported CPU architecture
+
+#endif
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Factory.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/Factory.cpp
@@ -0,0 +1,92 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/Factory.h>
+
+JPH_NAMESPACE_BEGIN
+
+Factory *Factory::sInstance = nullptr;
+
+void *Factory::CreateObject(const char *inName)
+{
+	const RTTI *ci = Find(inName);
+	return ci != nullptr? ci->CreateObject() : nullptr;
+}
+
+const RTTI *Factory::Find(const char *inName)
+{
+	ClassNameMap::iterator c = mClassNameMap.find(inName);
+	return c != mClassNameMap.end()? c->second : nullptr;
+}
+
+const RTTI *Factory::Find(uint32 inHash)
+{
+	ClassHashMap::iterator c = mClassHashMap.find(inHash);
+	return c != mClassHashMap.end()? c->second : nullptr;
+}
+
+bool Factory::Register(const RTTI *inRTTI)
+{
+	// Check if we already know the type
+	if (Find(inRTTI->GetName()) != nullptr)
+		return true;
+
+	// Insert this class by name
+	mClassNameMap.try_emplace(inRTTI->GetName(), inRTTI);
+
+	// Insert this class by hash
+	if (!mClassHashMap.try_emplace(inRTTI->GetHash(), inRTTI).second)
+	{
+		JPH_ASSERT(false, "Hash collision registering type!");
+		return false;
+	}
+
+	// Register base classes
+	for (int i = 0; i < inRTTI->GetBaseClassCount(); ++i)
+		if (!Register(inRTTI->GetBaseClass(i)))
+			return false;
+
+#ifdef JPH_OBJECT_STREAM
+	// Register attribute classes
+	for (int i = 0; i < inRTTI->GetAttributeCount(); ++i)
+	{
+		const RTTI *rtti = inRTTI->GetAttribute(i).GetMemberPrimitiveType();
+		if (rtti != nullptr && !Register(rtti))
+			return false;
+	}
+#endif // JPH_OBJECT_STREAM
+
+	return true;
+}
+
+bool Factory::Register(const RTTI **inRTTIs, uint inNumber)
+{
+	mClassHashMap.reserve(mClassHashMap.size() + inNumber);
+	mClassNameMap.reserve(mClassNameMap.size() + inNumber);
+
+	for (const RTTI **rtti = inRTTIs; rtti < inRTTIs + inNumber; ++rtti)
+		if (!Register(*rtti))
+			return false;
+
+	return true;
+}
+
+void Factory::Clear()
+{
+	mClassNameMap.clear();
+	mClassHashMap.clear();
+}
+
+Array<const RTTI *> Factory::GetAllClasses() const
+{
+	Array<const RTTI *> all_classes;
+	all_classes.reserve(mClassNameMap.size());
+	for (const ClassNameMap::value_type &c : mClassNameMap)
+		all_classes.push_back(c.second);
+	return all_classes;
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Factory.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Factory.h
@@ -0,0 +1,54 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/RTTI.h>
+#include <Jolt/Core/UnorderedMap.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// This class is responsible for creating instances of classes based on their name or hash and is mainly used for deserialization of saved data.
+class JPH_EXPORT Factory
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Create an object
+	void *						CreateObject(const char *inName);
+
+	/// Find type info for a specific class by name
+	const RTTI *				Find(const char *inName);
+
+	/// Find type info for a specific class by hash
+	const RTTI *				Find(uint32 inHash);
+
+	/// Register an object with the factory. Returns false on failure.
+	bool						Register(const RTTI *inRTTI);
+
+	/// Register a list of objects with the factory. Returns false on failure.
+	bool						Register(const RTTI **inRTTIs, uint inNumber);
+
+	/// Unregisters all types
+	void						Clear();
+
+	/// Get all registered classes
+	Array<const RTTI *>			GetAllClasses() const;
+
+	/// Singleton factory instance
+	static Factory *			sInstance;
+
+private:
+	using ClassNameMap = UnorderedMap<string_view, const RTTI *>;
+
+	using ClassHashMap = UnorderedMap<uint32, const RTTI *>;
+
+	/// Map of class names to type info
+	ClassNameMap				mClassNameMap;
+
+	// Map of class hash to type info
+	ClassHashMap				mClassHashMap;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/FixedSizeFreeList.h
+++ b/thirdparty/jolt_physics/Jolt/Core/FixedSizeFreeList.h
@@ -0,0 +1,122 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+#include <Jolt/Core/Mutex.h>
+#include <Jolt/Core/Atomics.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Class that allows lock free creation / destruction of objects (unless a new page of objects needs to be allocated)
+/// It contains a fixed pool of objects and also allows batching up a lot of objects to be destroyed
+/// and doing the actual free in a single atomic operation
+template <typename Object>
+class FixedSizeFreeList : public NonCopyable
+{
+private:
+	/// Storage type for an Object
+	struct ObjectStorage
+	{
+		/// The object we're storing
+		Object				mObject;
+
+		/// When the object is freed (or in the process of being freed as a batch) this will contain the next free object
+		/// When an object is in use it will contain the object's index in the free list
+		atomic<uint32>		mNextFreeObject;
+	};
+
+	static_assert(alignof(ObjectStorage) == alignof(Object), "Object not properly aligned");
+
+	/// Access the object storage given the object index
+	const ObjectStorage &	GetStorage(uint32 inObjectIndex) const	{ return mPages[inObjectIndex >> mPageShift][inObjectIndex & mObjectMask]; }
+	ObjectStorage &			GetStorage(uint32 inObjectIndex)		{ return mPages[inObjectIndex >> mPageShift][inObjectIndex & mObjectMask]; }
+
+	/// Size (in objects) of a single page
+	uint32					mPageSize;
+
+	/// Number of bits to shift an object index to the right to get the page number
+	uint32					mPageShift;
+
+	/// Mask to and an object index with to get the page number
+	uint32					mObjectMask;
+
+	/// Total number of pages that are usable
+	uint32					mNumPages;
+
+	/// Total number of objects that have been allocated
+	uint32					mNumObjectsAllocated;
+
+	/// Array of pages of objects
+	ObjectStorage **		mPages = nullptr;
+
+	/// Mutex that is used to allocate a new page if the storage runs out
+	/// This variable is aligned to the cache line to prevent false sharing with
+	/// the constants used to index into the list via `Get()`.
+	alignas(JPH_CACHE_LINE_SIZE) Mutex mPageMutex;
+
+	/// Number of objects that we currently have in the free list / new pages
+#ifdef JPH_ENABLE_ASSERTS
+	atomic<uint32>			mNumFreeObjects;
+#endif // JPH_ENABLE_ASSERTS
+
+	/// Simple counter that makes the first free object pointer update with every CAS so that we don't suffer from the ABA problem
+	atomic<uint32>			mAllocationTag;
+
+	/// Index of first free object, the first 32 bits of an object are used to point to the next free object
+	atomic<uint64>			mFirstFreeObjectAndTag;
+
+	/// The first free object to use when the free list is empty (may need to allocate a new page)
+	atomic<uint32>			mFirstFreeObjectInNewPage;
+
+public:
+	/// Invalid index
+	static const uint32		cInvalidObjectIndex = 0xffffffff;
+
+	/// Size of an object + bookkeeping for the freelist
+	static const int		ObjectStorageSize = sizeof(ObjectStorage);
+
+	/// Destructor
+	inline					~FixedSizeFreeList();
+
+	/// Initialize the free list, up to inMaxObjects can be allocated
+	inline void				Init(uint inMaxObjects, uint inPageSize);
+
+	/// Lockless construct a new object, inParameters are passed on to the constructor
+	template <typename... Parameters>
+	inline uint32			ConstructObject(Parameters &&... inParameters);
+
+	/// Lockless destruct an object and return it to the free pool
+	inline void				DestructObject(uint32 inObjectIndex);
+
+	/// Lockless destruct an object and return it to the free pool
+	inline void				DestructObject(Object *inObject);
+
+	/// A batch of objects that can be destructed
+	struct Batch
+	{
+		uint32				mFirstObjectIndex = cInvalidObjectIndex;
+		uint32				mLastObjectIndex = cInvalidObjectIndex;
+		uint32				mNumObjects = 0;
+	};
+
+	/// Add a object to an existing batch to be destructed.
+	/// Adding objects to a batch does not destroy or modify the objects, this will merely link them
+	/// so that the entire batch can be returned to the free list in a single atomic operation
+	inline void				AddObjectToBatch(Batch &ioBatch, uint32 inObjectIndex);
+
+	/// Lockless destruct batch of objects
+	inline void				DestructObjectBatch(Batch &ioBatch);
+
+	/// Access an object by index.
+	inline Object &			Get(uint32 inObjectIndex)				{ return GetStorage(inObjectIndex).mObject; }
+
+	/// Access an object by index.
+	inline const Object &	Get(uint32 inObjectIndex) const			{ return GetStorage(inObjectIndex).mObject; }
+};
+
+JPH_NAMESPACE_END
+
+#include "FixedSizeFreeList.inl"
--- a/thirdparty/jolt_physics/Jolt/Core/FixedSizeFreeList.inl
+++ b/thirdparty/jolt_physics/Jolt/Core/FixedSizeFreeList.inl
@@ -0,0 +1,215 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+JPH_NAMESPACE_BEGIN
+
+template <typename Object>
+FixedSizeFreeList<Object>::~FixedSizeFreeList()
+{
+	// Check if we got our Init call
+	if (mPages != nullptr)
+	{
+		// Ensure everything is freed before the freelist is destructed
+		JPH_ASSERT(mNumFreeObjects.load(memory_order_relaxed) == mNumPages * mPageSize);
+
+		// Free memory for pages
+		uint32 num_pages = mNumObjectsAllocated / mPageSize;
+		for (uint32 page = 0; page < num_pages; ++page)
+			AlignedFree(mPages[page]);
+		Free(mPages);
+	}
+}
+
+template <typename Object>
+void FixedSizeFreeList<Object>::Init(uint inMaxObjects, uint inPageSize)
+{
+	// Check sanity
+	JPH_ASSERT(inPageSize > 0 && IsPowerOf2(inPageSize));
+	JPH_ASSERT(mPages == nullptr);
+
+	// Store configuration parameters
+	mNumPages = (inMaxObjects + inPageSize - 1) / inPageSize;
+	mPageSize = inPageSize;
+	mPageShift = CountTrailingZeros(inPageSize);
+	mObjectMask = inPageSize - 1;
+	JPH_IF_ENABLE_ASSERTS(mNumFreeObjects = mNumPages * inPageSize;)
+
+	// Allocate page table
+	mPages = reinterpret_cast<ObjectStorage **>(Allocate(mNumPages * sizeof(ObjectStorage *)));
+
+	// We didn't yet use any objects of any page
+	mNumObjectsAllocated = 0;
+	mFirstFreeObjectInNewPage = 0;
+
+	// Start with 1 as the first tag
+	mAllocationTag = 1;
+
+	// Set first free object (with tag 0)
+	mFirstFreeObjectAndTag = cInvalidObjectIndex;
+}
+
+template <typename Object>
+template <typename... Parameters>
+uint32 FixedSizeFreeList<Object>::ConstructObject(Parameters &&... inParameters)
+{
+	for (;;)
+	{
+		// Get first object from the linked list
+		uint64 first_free_object_and_tag = mFirstFreeObjectAndTag.load(memory_order_acquire);
+		uint32 first_free = uint32(first_free_object_and_tag);
+		if (first_free == cInvalidObjectIndex)
+		{
+			// The free list is empty, we take an object from the page that has never been used before
+			first_free = mFirstFreeObjectInNewPage.fetch_add(1, memory_order_relaxed);
+			if (first_free >= mNumObjectsAllocated)
+			{
+				// Allocate new page
+				lock_guard lock(mPageMutex);
+				while (first_free >= mNumObjectsAllocated)
+				{
+					uint32 next_page = mNumObjectsAllocated / mPageSize;
+					if (next_page == mNumPages)
+						return cInvalidObjectIndex; // Out of space!
+					mPages[next_page] = reinterpret_cast<ObjectStorage *>(AlignedAllocate(mPageSize * sizeof(ObjectStorage), max<size_t>(alignof(ObjectStorage), JPH_CACHE_LINE_SIZE)));
+					mNumObjectsAllocated += mPageSize;
+				}
+			}
+
+			// Allocation successful
+			JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_sub(1, memory_order_relaxed);)
+			ObjectStorage &storage = GetStorage(first_free);
+			new (&storage.mObject) Object(std::forward<Parameters>(inParameters)...);
+			storage.mNextFreeObject.store(first_free, memory_order_release);
+			return first_free;
+		}
+		else
+		{
+			// Load next pointer
+			uint32 new_first_free = GetStorage(first_free).mNextFreeObject.load(memory_order_acquire);
+
+			// Construct a new first free object tag
+			uint64 new_first_free_object_and_tag = uint64(new_first_free) + (uint64(mAllocationTag.fetch_add(1, memory_order_relaxed)) << 32);
+
+			// Compare and swap
+			if (mFirstFreeObjectAndTag.compare_exchange_weak(first_free_object_and_tag, new_first_free_object_and_tag, memory_order_release))
+			{
+				// Allocation successful
+				JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_sub(1, memory_order_relaxed);)
+				ObjectStorage &storage = GetStorage(first_free);
+				new (&storage.mObject) Object(std::forward<Parameters>(inParameters)...);
+				storage.mNextFreeObject.store(first_free, memory_order_release);
+				return first_free;
+			}
+		}
+	}
+}
+
+template <typename Object>
+void FixedSizeFreeList<Object>::AddObjectToBatch(Batch &ioBatch, uint32 inObjectIndex)
+{
+	JPH_ASSERT(ioBatch.mNumObjects != uint32(-1), "Trying to reuse a batch that has already been freed");
+
+	// Reset next index
+	atomic<uint32> &next_free_object = GetStorage(inObjectIndex).mNextFreeObject;
+	JPH_ASSERT(next_free_object.load(memory_order_relaxed) == inObjectIndex, "Trying to add a object to the batch that is already in a free list");
+	next_free_object.store(cInvalidObjectIndex, memory_order_release);
+
+	// Link object in batch to free
+	if (ioBatch.mFirstObjectIndex == cInvalidObjectIndex)
+		ioBatch.mFirstObjectIndex = inObjectIndex;
+	else
+		GetStorage(ioBatch.mLastObjectIndex).mNextFreeObject.store(inObjectIndex, memory_order_release);
+	ioBatch.mLastObjectIndex = inObjectIndex;
+	ioBatch.mNumObjects++;
+}
+
+template <typename Object>
+void FixedSizeFreeList<Object>::DestructObjectBatch(Batch &ioBatch)
+{
+	if (ioBatch.mFirstObjectIndex != cInvalidObjectIndex)
+	{
+		// Call destructors
+		if constexpr (!std::is_trivially_destructible<Object>())
+		{
+			uint32 object_idx = ioBatch.mFirstObjectIndex;
+			do
+			{
+				ObjectStorage &storage = GetStorage(object_idx);
+				storage.mObject.~Object();
+				object_idx = storage.mNextFreeObject.load(memory_order_relaxed);
+			}
+			while (object_idx != cInvalidObjectIndex);
+		}
+
+		// Add to objects free list
+		ObjectStorage &storage = GetStorage(ioBatch.mLastObjectIndex);
+		for (;;)
+		{
+			// Get first object from the list
+			uint64 first_free_object_and_tag = mFirstFreeObjectAndTag.load(memory_order_acquire);
+			uint32 first_free = uint32(first_free_object_and_tag);
+
+			// Make it the next pointer of the last object in the batch that is to be freed
+			storage.mNextFreeObject.store(first_free, memory_order_release);
+
+			// Construct a new first free object tag
+			uint64 new_first_free_object_and_tag = uint64(ioBatch.mFirstObjectIndex) + (uint64(mAllocationTag.fetch_add(1, memory_order_relaxed)) << 32);
+
+			// Compare and swap
+			if (mFirstFreeObjectAndTag.compare_exchange_weak(first_free_object_and_tag, new_first_free_object_and_tag, memory_order_release))
+			{
+				// Free successful
+				JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_add(ioBatch.mNumObjects, memory_order_relaxed);)
+
+				// Mark the batch as freed
+#ifdef JPH_ENABLE_ASSERTS
+				ioBatch.mNumObjects = uint32(-1);
+#endif
+				return;
+			}
+		}
+	}
+}
+
+template <typename Object>
+void FixedSizeFreeList<Object>::DestructObject(uint32 inObjectIndex)
+{
+	JPH_ASSERT(inObjectIndex != cInvalidObjectIndex);
+
+	// Call destructor
+	ObjectStorage &storage = GetStorage(inObjectIndex);
+	storage.mObject.~Object();
+
+	// Add to object free list
+	for (;;)
+	{
+		// Get first object from the list
+		uint64 first_free_object_and_tag = mFirstFreeObjectAndTag.load(memory_order_acquire);
+		uint32 first_free = uint32(first_free_object_and_tag);
+
+		// Make it the next pointer of the last object in the batch that is to be freed
+		storage.mNextFreeObject.store(first_free, memory_order_release);
+
+		// Construct a new first free object tag
+		uint64 new_first_free_object_and_tag = uint64(inObjectIndex) + (uint64(mAllocationTag.fetch_add(1, memory_order_relaxed)) << 32);
+
+		// Compare and swap
+		if (mFirstFreeObjectAndTag.compare_exchange_weak(first_free_object_and_tag, new_first_free_object_and_tag, memory_order_release))
+		{
+			// Free successful
+			JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_add(1, memory_order_relaxed);)
+			return;
+		}
+	}
+}
+
+template<typename Object>
+inline void FixedSizeFreeList<Object>::DestructObject(Object *inObject)
+{
+	uint32 index = reinterpret_cast<ObjectStorage *>(inObject)->mNextFreeObject.load(memory_order_relaxed);
+	JPH_ASSERT(index < mNumObjectsAllocated);
+	DestructObject(index);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/HashCombine.h
+++ b/thirdparty/jolt_physics/Jolt/Core/HashCombine.h
@@ -0,0 +1,234 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Implements the FNV-1a hash algorithm
+/// @see https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+/// @param inData Data block of bytes
+/// @param inSize Number of bytes
+/// @param inSeed Seed of the hash (can be used to pass in the hash of a previous operation, otherwise leave default)
+/// @return Hash
+inline uint64 HashBytes(const void *inData, uint inSize, uint64 inSeed = 0xcbf29ce484222325UL)
+{
+	uint64 hash = inSeed;
+	for (const uint8 *data = reinterpret_cast<const uint8 *>(inData); data < reinterpret_cast<const uint8 *>(inData) + inSize; ++data)
+	{
+		hash ^= uint64(*data);
+		hash *= 0x100000001b3UL;
+	}
+	return hash;
+}
+
+/// Calculate the FNV-1a hash of inString.
+/// @see https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+constexpr uint64 HashString(const char *inString, uint64 inSeed = 0xcbf29ce484222325UL)
+{
+	uint64 hash = inSeed;
+	for (const char *c = inString; *c != 0; ++c)
+	{
+		hash ^= uint64(*c);
+		hash *= 0x100000001b3UL;
+	}
+	return hash;
+}
+
+/// A 64 bit hash function by Thomas Wang, Jan 1997
+/// See: http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
+/// @param inValue Value to hash
+/// @return Hash
+inline uint64 Hash64(uint64 inValue)
+{
+	uint64 hash = inValue;
+	hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
+	hash = hash ^ (hash >> 24);
+	hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
+	hash = hash ^ (hash >> 14);
+	hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
+	hash = hash ^ (hash >> 28);
+	hash = hash + (hash << 31);
+	return hash;
+}
+
+/// Fallback hash function that calls T::GetHash()
+template <class T>
+struct Hash
+{
+	uint64		operator () (const T &inValue) const
+	{
+		return inValue.GetHash();
+	}
+};
+
+/// A hash function for floats
+template <>
+struct Hash<float>
+{
+	uint64		operator () (float inValue) const
+	{
+		float value = inValue == 0.0f? 0.0f : inValue; // Convert -0.0f to 0.0f
+		return HashBytes(&value, sizeof(value));
+	}
+};
+
+/// A hash function for doubles
+template <>
+struct Hash<double>
+{
+	uint64		operator () (double inValue) const
+	{
+		double value = inValue == 0.0? 0.0 : inValue; // Convert -0.0 to 0.0
+		return HashBytes(&value, sizeof(value));
+	}
+};
+
+/// A hash function for character pointers
+template <>
+struct Hash<const char *>
+{
+	uint64		operator () (const char *inValue) const
+	{
+		return HashString(inValue);
+	}
+};
+
+/// A hash function for std::string_view
+template <>
+struct Hash<std::string_view>
+{
+	uint64		operator () (const std::string_view &inValue) const
+	{
+		return HashBytes(inValue.data(), uint(inValue.size()));
+	}
+};
+
+/// A hash function for String
+template <>
+struct Hash<String>
+{
+	uint64		operator () (const String &inValue) const
+	{
+		return HashBytes(inValue.data(), uint(inValue.size()));
+	}
+};
+
+/// A fallback function for generic pointers
+template <class T>
+struct Hash<T *>
+{
+	uint64		operator () (T *inValue) const
+	{
+		return HashBytes(&inValue, sizeof(inValue));
+	}
+};
+
+/// Helper macro to define a hash function for trivial types
+#define JPH_DEFINE_TRIVIAL_HASH(type)						\
+template <>													\
+struct Hash<type>											\
+{															\
+	uint64		operator () (const type &inValue) const		\
+	{														\
+		return HashBytes(&inValue, sizeof(inValue));		\
+	}														\
+};
+
+/// Commonly used types
+JPH_DEFINE_TRIVIAL_HASH(char)
+JPH_DEFINE_TRIVIAL_HASH(int)
+JPH_DEFINE_TRIVIAL_HASH(uint32)
+JPH_DEFINE_TRIVIAL_HASH(uint64)
+
+/// Helper function that hashes a single value into ioSeed
+/// Based on https://github.com/jonmaiga/mx3 by Jon Maiga
+template <typename T>
+inline void HashCombine(uint64 &ioSeed, const T &inValue)
+{
+	constexpr uint64 c = 0xbea225f9eb34556dUL;
+
+	uint64 h = ioSeed;
+	uint64 x = Hash<T> { } (inValue);
+
+	// See: https://github.com/jonmaiga/mx3/blob/master/mx3.h
+	// mix_stream(h, x)
+	x *= c;
+	x ^= x >> 39;
+	h += x * c;
+	h *= c;
+
+	// mix(h)
+	h ^= h >> 32;
+	h *= c;
+	h ^= h >> 29;
+	h *= c;
+	h ^= h >> 32;
+	h *= c;
+	h ^= h >> 29;
+
+	ioSeed = h;
+}
+
+/// Hash combiner to use a custom struct in an unordered map or set
+///
+/// Usage:
+///
+///		struct SomeHashKey
+///		{
+///			std::string key1;
+///			std::string key2;
+///			bool key3;
+///		};
+///
+///		JPH_MAKE_HASHABLE(SomeHashKey, t.key1, t.key2, t.key3)
+template <typename FirstValue, typename... Values>
+inline uint64 HashCombineArgs(const FirstValue &inFirstValue, Values... inValues)
+{
+	// Prime the seed by hashing the first value
+	uint64 seed = Hash<FirstValue> { } (inFirstValue);
+
+	// Hash all remaining values together using a fold expression
+	(HashCombine(seed, inValues), ...);
+
+	return seed;
+}
+
+#define JPH_MAKE_HASH_STRUCT(type, name, ...)				\
+	struct [[nodiscard]] name								\
+	{														\
+		::JPH::uint64 operator()(const type &t) const		\
+		{													\
+			return ::JPH::HashCombineArgs(__VA_ARGS__);		\
+		}													\
+	};
+
+#define JPH_MAKE_STD_HASH(type)								\
+	JPH_SUPPRESS_WARNING_PUSH								\
+	JPH_SUPPRESS_WARNINGS									\
+	namespace std											\
+	{														\
+		template<>											\
+		struct [[nodiscard]] hash<type>						\
+		{													\
+			size_t operator()(const type &t) const			\
+			{												\
+				return size_t(::JPH::Hash<type>{ }(t));		\
+			}												\
+		};													\
+	}														\
+	JPH_SUPPRESS_WARNING_POP
+
+#define JPH_MAKE_HASHABLE(type, ...)						\
+	JPH_SUPPRESS_WARNING_PUSH								\
+	JPH_SUPPRESS_WARNINGS									\
+	namespace JPH											\
+	{														\
+		template<>											\
+		JPH_MAKE_HASH_STRUCT(type, Hash<type>, __VA_ARGS__) \
+	}														\
+	JPH_SUPPRESS_WARNING_POP								\
+	JPH_MAKE_STD_HASH(type)
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/HashTable.h
+++ b/thirdparty/jolt_physics/Jolt/Core/HashTable.h
@@ -0,0 +1,872 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Math/BVec16.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Helper class for implementing an UnorderedSet or UnorderedMap
+/// Based on CppCon 2017: Matt Kulukundis "Designing a Fast, Efficient, Cache-friendly Hash Table, Step by Step"
+/// See: https://www.youtube.com/watch?v=ncHmEUmJZf4
+template <class Key, class KeyValue, class HashTableDetail, class Hash, class KeyEqual>
+class HashTable
+{
+public:
+	/// Properties
+	using value_type = KeyValue;
+	using size_type = uint32;
+	using difference_type = ptrdiff_t;
+
+private:
+	/// Base class for iterators
+	template <class Table, class Iterator>
+	class IteratorBase
+	{
+	public:
+		/// Properties
+		using difference_type = typename Table::difference_type;
+		using value_type = typename Table::value_type;
+		using iterator_category = std::forward_iterator_tag;
+
+		/// Copy constructor
+							IteratorBase(const IteratorBase &inRHS) = default;
+
+		/// Assignment operator
+		IteratorBase &		operator = (const IteratorBase &inRHS) = default;
+
+		/// Iterator at start of table
+		explicit			IteratorBase(Table *inTable) :
+			mTable(inTable),
+			mIndex(0)
+		{
+			while (mIndex < mTable->mMaxSize && (mTable->mControl[mIndex] & cBucketUsed) == 0)
+				++mIndex;
+		}
+
+		/// Iterator at specific index
+							IteratorBase(Table *inTable, size_type inIndex) :
+			mTable(inTable),
+			mIndex(inIndex)
+		{
+		}
+
+		/// Prefix increment
+		Iterator &			operator ++ ()
+		{
+			JPH_ASSERT(IsValid());
+
+			do
+			{
+				++mIndex;
+			}
+			while (mIndex < mTable->mMaxSize && (mTable->mControl[mIndex] & cBucketUsed) == 0);
+
+			return static_cast<Iterator &>(*this);
+		}
+
+		/// Postfix increment
+		Iterator			operator ++ (int)
+		{
+			Iterator result(mTable, mIndex);
+			++(*this);
+			return result;
+		}
+
+		/// Access to key value pair
+		const KeyValue &	operator * () const
+		{
+			JPH_ASSERT(IsValid());
+			return mTable->mData[mIndex];
+		}
+
+		/// Access to key value pair
+		const KeyValue *	operator -> () const
+		{
+			JPH_ASSERT(IsValid());
+			return mTable->mData + mIndex;
+		}
+
+		/// Equality operator
+		bool				operator == (const Iterator &inRHS) const
+		{
+			return mIndex == inRHS.mIndex && mTable == inRHS.mTable;
+		}
+
+		/// Inequality operator
+		bool				operator != (const Iterator &inRHS) const
+		{
+			return !(*this == inRHS);
+		}
+
+		/// Check that the iterator is valid
+		bool				IsValid() const
+		{
+			return mIndex < mTable->mMaxSize
+				&& (mTable->mControl[mIndex] & cBucketUsed) != 0;
+		}
+
+		Table *				mTable;
+		size_type			mIndex;
+	};
+
+	/// Get the maximum number of elements that we can support given a number of buckets
+	static constexpr size_type sGetMaxLoad(size_type inBucketCount)
+	{
+		return uint32((cMaxLoadFactorNumerator * inBucketCount) / cMaxLoadFactorDenominator);
+	}
+
+	/// Update the control value for a bucket
+	JPH_INLINE void			SetControlValue(size_type inIndex, uint8 inValue)
+	{
+		JPH_ASSERT(inIndex < mMaxSize);
+		mControl[inIndex] = inValue;
+
+		// Mirror the first 15 bytes to the 15 bytes beyond mMaxSize
+		// Note that this is equivalent to:
+		// if (inIndex < 15)
+		//   mControl[inIndex + mMaxSize] = inValue
+		// else
+		//   mControl[inIndex] = inValue
+		// Which performs a needless write if inIndex >= 15 but at least it is branch-less
+		mControl[((inIndex - 15) & (mMaxSize - 1)) + 15] = inValue;
+	}
+
+	/// Get the index and control value for a particular key
+	JPH_INLINE void			GetIndexAndControlValue(const Key &inKey, size_type &outIndex, uint8 &outControl) const
+	{
+		// Calculate hash
+		uint64 hash_value = Hash { } (inKey);
+
+		// Split hash into index and control value
+		outIndex = size_type(hash_value >> 7) & (mMaxSize - 1);
+		outControl = cBucketUsed | uint8(hash_value);
+	}
+
+	/// Allocate space for the hash table
+	void					AllocateTable(size_type inMaxSize)
+	{
+		JPH_ASSERT(mData == nullptr);
+
+		mMaxSize = inMaxSize;
+		mLoadLeft = sGetMaxLoad(inMaxSize);
+		size_t required_size = size_t(mMaxSize) * (sizeof(KeyValue) + 1) + 15; // Add 15 bytes to mirror the first 15 bytes of the control values
+		if constexpr (cNeedsAlignedAllocate)
+			mData = reinterpret_cast<KeyValue *>(AlignedAllocate(required_size, alignof(KeyValue)));
+		else
+			mData = reinterpret_cast<KeyValue *>(Allocate(required_size));
+		mControl = reinterpret_cast<uint8 *>(mData + mMaxSize);
+	}
+
+	/// Copy the contents of another hash table
+	void					CopyTable(const HashTable &inRHS)
+	{
+		if (inRHS.empty())
+			return;
+
+		AllocateTable(inRHS.mMaxSize);
+
+		// Copy control bytes
+		memcpy(mControl, inRHS.mControl, mMaxSize + 15);
+
+		// Copy elements
+		uint index = 0;
+		for (const uint8 *control = mControl, *control_end = mControl + mMaxSize; control != control_end; ++control, ++index)
+			if (*control & cBucketUsed)
+				new (mData + index) KeyValue(inRHS.mData[index]);
+		mSize = inRHS.mSize;
+	}
+
+	/// Grow the table to a new size
+	void					GrowTable(size_type inNewMaxSize)
+	{
+		// Move the old table to a temporary structure
+		size_type old_max_size = mMaxSize;
+		KeyValue *old_data = mData;
+		const uint8 *old_control = mControl;
+		mData = nullptr;
+		mControl = nullptr;
+		mSize = 0;
+		mMaxSize = 0;
+		mLoadLeft = 0;
+
+		// Allocate new table
+		AllocateTable(inNewMaxSize);
+
+		// Reset all control bytes
+		memset(mControl, cBucketEmpty, mMaxSize + 15);
+
+		if (old_data != nullptr)
+		{
+			// Copy all elements from the old table
+			for (size_type i = 0; i < old_max_size; ++i)
+				if (old_control[i] & cBucketUsed)
+				{
+					size_type index;
+					KeyValue *element = old_data + i;
+					JPH_IF_ENABLE_ASSERTS(bool inserted =) InsertKey</* InsertAfterGrow= */ true>(HashTableDetail::sGetKey(*element), index);
+					JPH_ASSERT(inserted);
+					new (mData + index) KeyValue(std::move(*element));
+					element->~KeyValue();
+				}
+
+			// Free memory
+			if constexpr (cNeedsAlignedAllocate)
+				AlignedFree(old_data);
+			else
+				Free(old_data);
+		}
+	}
+
+protected:
+	/// Get an element by index
+	KeyValue &				GetElement(size_type inIndex) const
+	{
+		return mData[inIndex];
+	}
+
+	/// Insert a key into the map, returns true if the element was inserted, false if it already existed.
+	/// outIndex is the index at which the element should be constructed / where it is located.
+	template <bool InsertAfterGrow = false>
+	bool					InsertKey(const Key &inKey, size_type &outIndex)
+	{
+		// Ensure we have enough space
+		if (mLoadLeft == 0)
+		{
+			// Should not be growing if we're already growing!
+			if constexpr (InsertAfterGrow)
+				JPH_ASSERT(false);
+
+			// Decide if we need to clean up all tombstones or if we need to grow the map
+			size_type num_deleted = sGetMaxLoad(mMaxSize) - mSize;
+			if (num_deleted * cMaxDeletedElementsDenominator > mMaxSize * cMaxDeletedElementsNumerator)
+				rehash(0);
+			else
+			{
+				// Grow by a power of 2
+				size_type new_max_size = max<size_type>(mMaxSize << 1, 16);
+				if (new_max_size < mMaxSize)
+				{
+					JPH_ASSERT(false, "Overflow in hash table size, can't grow!");
+					return false;
+				}
+				GrowTable(new_max_size);
+			}
+		}
+
+		// Split hash into index and control value
+		size_type index;
+		uint8 control;
+		GetIndexAndControlValue(inKey, index, control);
+
+		// Keeps track of the index of the first deleted bucket we found
+		constexpr size_type cNoDeleted = ~size_type(0);
+		size_type first_deleted_index = cNoDeleted;
+
+		// Linear probing
+		KeyEqual equal;
+		size_type bucket_mask = mMaxSize - 1;
+		BVec16 control16 = BVec16::sReplicate(control);
+		BVec16 bucket_empty = BVec16::sZero();
+		BVec16 bucket_deleted = BVec16::sReplicate(cBucketDeleted);
+		for (;;)
+		{
+			// Read 16 control values (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes)
+			BVec16 control_bytes = BVec16::sLoadByte16(mControl + index);
+
+			// Check if we must find the element before we can insert
+			if constexpr (!InsertAfterGrow)
+			{
+				// Check for the control value we're looking for
+				// Note that when deleting we can create empty buckets instead of deleted buckets.
+				// This means we must unconditionally check all buckets in this batch for equality
+				// (also beyond the first empty bucket).
+				uint32 control_equal = uint32(BVec16::sEquals(control_bytes, control16).GetTrues());
+
+				// Index within the 16 buckets
+				size_type local_index = index;
+
+				// Loop while there's still buckets to process
+				while (control_equal != 0)
+				{
+					// Get the first equal bucket
+					uint first_equal = CountTrailingZeros(control_equal);
+
+					// Skip to the bucket
+					local_index += first_equal;
+
+					// Make sure that our index is not beyond the end of the table
+					local_index &= bucket_mask;
+
+					// We found a bucket with same control value
+					if (equal(HashTableDetail::sGetKey(mData[local_index]), inKey))
+					{
+						// Element already exists
+						outIndex = local_index;
+						return false;
+					}
+
+					// Skip past this bucket
+					control_equal >>= first_equal + 1;
+					local_index++;
+				}
+
+				// Check if we're still scanning for deleted buckets
+				if (first_deleted_index == cNoDeleted)
+				{
+					// Check if any buckets have been deleted, if so store the first one
+					uint32 control_deleted = uint32(BVec16::sEquals(control_bytes, bucket_deleted).GetTrues());
+					if (control_deleted != 0)
+						first_deleted_index = index + CountTrailingZeros(control_deleted);
+				}
+			}
+
+			// Check for empty buckets
+			uint32 control_empty = uint32(BVec16::sEquals(control_bytes, bucket_empty).GetTrues());
+			if (control_empty != 0)
+			{
+				// If we found a deleted bucket, use it.
+				// It doesn't matter if it is before or after the first empty bucket we found
+				// since we will always be scanning in batches of 16 buckets.
+				if (first_deleted_index == cNoDeleted || InsertAfterGrow)
+				{
+					index += CountTrailingZeros(control_empty);
+					--mLoadLeft; // Using an empty bucket decreases the load left
+				}
+				else
+				{
+					index = first_deleted_index;
+				}
+
+				// Make sure that our index is not beyond the end of the table
+				index &= bucket_mask;
+
+				// Update control byte
+				SetControlValue(index, control);
+				++mSize;
+
+				// Return index to newly allocated bucket
+				outIndex = index;
+				return true;
+			}
+
+			// Move to next batch of 16 buckets
+			index = (index + 16) & bucket_mask;
+		}
+	}
+
+public:
+	/// Non-const iterator
+	class iterator : public IteratorBase<HashTable, iterator>
+	{
+		using Base = IteratorBase<HashTable, iterator>;
+
+	public:
+		/// Properties
+		using reference = typename Base::value_type &;
+		using pointer = typename Base::value_type *;
+
+		/// Constructors
+		explicit			iterator(HashTable *inTable) : Base(inTable) { }
+							iterator(HashTable *inTable, size_type inIndex) : Base(inTable, inIndex) { }
+							iterator(const iterator &inIterator) : Base(inIterator) { }
+
+		/// Assignment
+		iterator &			operator = (const iterator &inRHS) { Base::operator = (inRHS); return *this; }
+
+		using Base::operator *;
+
+		/// Non-const access to key value pair
+		KeyValue &			operator * ()
+		{
+			JPH_ASSERT(this->IsValid());
+			return this->mTable->mData[this->mIndex];
+		}
+
+		using Base::operator ->;
+
+		/// Non-const access to key value pair
+		KeyValue *			operator -> ()
+		{
+			JPH_ASSERT(this->IsValid());
+			return this->mTable->mData + this->mIndex;
+		}
+	};
+
+	/// Const iterator
+	class const_iterator : public IteratorBase<const HashTable, const_iterator>
+	{
+		using Base = IteratorBase<const HashTable, const_iterator>;
+
+	public:
+		/// Properties
+		using reference = const typename Base::value_type &;
+		using pointer = const typename Base::value_type *;
+
+		/// Constructors
+		explicit			const_iterator(const HashTable *inTable) : Base(inTable) { }
+							const_iterator(const HashTable *inTable, size_type inIndex) : Base(inTable, inIndex) { }
+							const_iterator(const const_iterator &inRHS) : Base(inRHS) { }
+							const_iterator(const iterator &inIterator) : Base(inIterator.mTable, inIterator.mIndex) { }
+
+		/// Assignment
+		const_iterator &	operator = (const iterator &inRHS) { this->mTable = inRHS.mTable; this->mIndex = inRHS.mIndex; return *this; }
+		const_iterator &	operator = (const const_iterator &inRHS) { Base::operator = (inRHS); return *this; }
+	};
+
+	/// Default constructor
+							HashTable() = default;
+
+	/// Copy constructor
+							HashTable(const HashTable &inRHS)
+	{
+		CopyTable(inRHS);
+	}
+
+	/// Move constructor
+							HashTable(HashTable &&ioRHS) noexcept :
+		mData(ioRHS.mData),
+		mControl(ioRHS.mControl),
+		mSize(ioRHS.mSize),
+		mMaxSize(ioRHS.mMaxSize),
+		mLoadLeft(ioRHS.mLoadLeft)
+	{
+		ioRHS.mData = nullptr;
+		ioRHS.mControl = nullptr;
+		ioRHS.mSize = 0;
+		ioRHS.mMaxSize = 0;
+		ioRHS.mLoadLeft = 0;
+	}
+
+	/// Assignment operator
+	HashTable &				operator = (const HashTable &inRHS)
+	{
+		if (this != &inRHS)
+		{
+			clear();
+
+			CopyTable(inRHS);
+		}
+
+		return *this;
+	}
+
+	/// Move assignment operator
+	HashTable &				operator = (HashTable &&ioRHS) noexcept
+	{
+		if (this != &ioRHS)
+		{
+			clear();
+
+			mData = ioRHS.mData;
+			mControl = ioRHS.mControl;
+			mSize = ioRHS.mSize;
+			mMaxSize = ioRHS.mMaxSize;
+			mLoadLeft = ioRHS.mLoadLeft;
+
+			ioRHS.mData = nullptr;
+			ioRHS.mControl = nullptr;
+			ioRHS.mSize = 0;
+			ioRHS.mMaxSize = 0;
+			ioRHS.mLoadLeft = 0;
+		}
+
+		return *this;
+	}
+
+	/// Destructor
+							~HashTable()
+	{
+		clear();
+	}
+
+	/// Reserve memory for a certain number of elements
+	void					reserve(size_type inMaxSize)
+	{
+		// Calculate max size based on load factor
+		size_type max_size = GetNextPowerOf2(max<uint32>((cMaxLoadFactorDenominator * inMaxSize) / cMaxLoadFactorNumerator, 16));
+		if (max_size <= mMaxSize)
+			return;
+
+		GrowTable(max_size);
+	}
+
+	/// Destroy the entire hash table
+	void					clear()
+	{
+		// Delete all elements
+		if constexpr (!std::is_trivially_destructible<KeyValue>())
+			if (!empty())
+				for (size_type i = 0; i < mMaxSize; ++i)
+					if (mControl[i] & cBucketUsed)
+						mData[i].~KeyValue();
+
+		if (mData != nullptr)
+		{
+			// Free memory
+			if constexpr (cNeedsAlignedAllocate)
+				AlignedFree(mData);
+			else
+				Free(mData);
+
+			// Reset members
+			mData = nullptr;
+			mControl = nullptr;
+			mSize = 0;
+			mMaxSize = 0;
+			mLoadLeft = 0;
+		}
+	}
+
+	/// Destroy the entire hash table but keeps the memory allocated
+	void					ClearAndKeepMemory()
+	{
+		// Destruct elements
+		if constexpr (!std::is_trivially_destructible<KeyValue>())
+			if (!empty())
+				for (size_type i = 0; i < mMaxSize; ++i)
+					if (mControl[i] & cBucketUsed)
+						mData[i].~KeyValue();
+		mSize = 0;
+
+		// If there are elements that are not marked cBucketEmpty, we reset them
+		size_type max_load = sGetMaxLoad(mMaxSize);
+		if (mLoadLeft != max_load)
+		{
+			// Reset all control bytes
+			memset(mControl, cBucketEmpty, mMaxSize + 15);
+			mLoadLeft = max_load;
+		}
+	}
+
+	/// Iterator to first element
+	iterator				begin()
+	{
+		return iterator(this);
+	}
+
+	/// Iterator to one beyond last element
+	iterator				end()
+	{
+		return iterator(this, mMaxSize);
+	}
+
+	/// Iterator to first element
+	const_iterator			begin() const
+	{
+		return const_iterator(this);
+	}
+
+	/// Iterator to one beyond last element
+	const_iterator			end() const
+	{
+		return const_iterator(this, mMaxSize);
+	}
+
+	/// Iterator to first element
+	const_iterator			cbegin() const
+	{
+		return const_iterator(this);
+	}
+
+	/// Iterator to one beyond last element
+	const_iterator			cend() const
+	{
+		return const_iterator(this, mMaxSize);
+	}
+
+	/// Number of buckets in the table
+	size_type				bucket_count() const
+	{
+		return mMaxSize;
+	}
+
+	/// Max number of buckets that the table can have
+	constexpr size_type		max_bucket_count() const
+	{
+		return size_type(1) << (sizeof(size_type) * 8 - 1);
+	}
+
+	/// Check if there are no elements in the table
+	bool					empty() const
+	{
+		return mSize == 0;
+	}
+
+	/// Number of elements in the table
+	size_type				size() const
+	{
+		return mSize;
+	}
+
+	/// Max number of elements that the table can hold
+	constexpr size_type		max_size() const
+	{
+		return size_type((uint64(max_bucket_count()) * cMaxLoadFactorNumerator) / cMaxLoadFactorDenominator);
+	}
+
+	/// Get the max load factor for this table (max number of elements / number of buckets)
+	constexpr float			max_load_factor() const
+	{
+		return float(cMaxLoadFactorNumerator) / float(cMaxLoadFactorDenominator);
+	}
+
+	/// Insert a new element, returns iterator and if the element was inserted
+	std::pair<iterator, bool> insert(const value_type &inValue)
+	{
+		size_type index;
+		bool inserted = InsertKey(HashTableDetail::sGetKey(inValue), index);
+		if (inserted)
+			new (mData + index) KeyValue(inValue);
+		return std::make_pair(iterator(this, index), inserted);
+	}
+
+	/// Find an element, returns iterator to element or end() if not found
+	const_iterator			find(const Key &inKey) const
+	{
+		// Check if we have any data
+		if (empty())
+			return cend();
+
+		// Split hash into index and control value
+		size_type index;
+		uint8 control;
+		GetIndexAndControlValue(inKey, index, control);
+
+		// Linear probing
+		KeyEqual equal;
+		size_type bucket_mask = mMaxSize - 1;
+		BVec16 control16 = BVec16::sReplicate(control);
+		BVec16 bucket_empty = BVec16::sZero();
+		for (;;)
+		{
+			// Read 16 control values
+			// (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes)
+			BVec16 control_bytes = BVec16::sLoadByte16(mControl + index);
+
+			// Check for the control value we're looking for
+			// Note that when deleting we can create empty buckets instead of deleted buckets.
+			// This means we must unconditionally check all buckets in this batch for equality
+			// (also beyond the first empty bucket).
+			uint32 control_equal = uint32(BVec16::sEquals(control_bytes, control16).GetTrues());
+
+			// Index within the 16 buckets
+			size_type local_index = index;
+
+			// Loop while there's still buckets to process
+			while (control_equal != 0)
+			{
+				// Get the first equal bucket
+				uint first_equal = CountTrailingZeros(control_equal);
+
+				// Skip to the bucket
+				local_index += first_equal;
+
+				// Make sure that our index is not beyond the end of the table
+				local_index &= bucket_mask;
+
+				// We found a bucket with same control value
+				if (equal(HashTableDetail::sGetKey(mData[local_index]), inKey))
+				{
+					// Element found
+					return const_iterator(this, local_index);
+				}
+
+				// Skip past this bucket
+				control_equal >>= first_equal + 1;
+				local_index++;
+			}
+
+			// Check for empty buckets
+			uint32 control_empty = uint32(BVec16::sEquals(control_bytes, bucket_empty).GetTrues());
+			if (control_empty != 0)
+			{
+				// An empty bucket was found, we didn't find the element
+				return cend();
+			}
+
+			// Move to next batch of 16 buckets
+			index = (index + 16) & bucket_mask;
+		}
+	}
+
+	/// @brief Erase an element by iterator
+	void					erase(const const_iterator &inIterator)
+	{
+		JPH_ASSERT(inIterator.IsValid());
+
+		// Read 16 control values before and after the current index
+		// (note that we added 15 bytes at the end of the control values that mirror the first 15 bytes)
+		BVec16 control_bytes_before = BVec16::sLoadByte16(mControl + ((inIterator.mIndex - 16) & (mMaxSize - 1)));
+		BVec16 control_bytes_after = BVec16::sLoadByte16(mControl + inIterator.mIndex);
+		BVec16 bucket_empty = BVec16::sZero();
+		uint32 control_empty_before = uint32(BVec16::sEquals(control_bytes_before, bucket_empty).GetTrues());
+		uint32 control_empty_after = uint32(BVec16::sEquals(control_bytes_after, bucket_empty).GetTrues());
+
+		// If (this index including) there exist 16 consecutive non-empty slots (represented by a bit being 0) then
+		// a probe looking for some element needs to continue probing so we cannot mark the bucket as empty
+		// but must mark it as deleted instead.
+		// Note that we use: CountLeadingZeros(uint16) = CountLeadingZeros(uint32) - 16.
+		uint8 control_value = CountLeadingZeros(control_empty_before) - 16 + CountTrailingZeros(control_empty_after) < 16? cBucketEmpty : cBucketDeleted;
+
+		// Mark the bucket as empty/deleted
+		SetControlValue(inIterator.mIndex, control_value);
+
+		// Destruct the element
+		mData[inIterator.mIndex].~KeyValue();
+
+		// If we marked the bucket as empty we can increase the load left
+		if (control_value == cBucketEmpty)
+			++mLoadLeft;
+
+		// Decrease size
+		--mSize;
+	}
+
+	/// @brief Erase an element by key
+	size_type				erase(const Key &inKey)
+	{
+		const_iterator it = find(inKey);
+		if (it == cend())
+			return 0;
+
+		erase(it);
+		return 1;
+	}
+
+	/// Swap the contents of two hash tables
+	void					swap(HashTable &ioRHS) noexcept
+	{
+		std::swap(mData, ioRHS.mData);
+		std::swap(mControl, ioRHS.mControl);
+		std::swap(mSize, ioRHS.mSize);
+		std::swap(mMaxSize, ioRHS.mMaxSize);
+		std::swap(mLoadLeft, ioRHS.mLoadLeft);
+	}
+
+	/// In place re-hashing of all elements in the table. Removes all cBucketDeleted elements
+	/// The std version takes a bucket count, but we just re-hash to the same size.
+	void					rehash(size_type)
+	{
+		// Update the control value for all buckets
+		for (size_type i = 0; i < mMaxSize; ++i)
+		{
+			uint8 &control = mControl[i];
+			switch (control)
+			{
+			case cBucketDeleted:
+				// Deleted buckets become empty
+				control = cBucketEmpty;
+				break;
+			case cBucketEmpty:
+				// Remains empty
+				break;
+			default:
+				// Mark all occupied as deleted, to indicate it needs to move to the correct place
+				control = cBucketDeleted;
+				break;
+			}
+		}
+
+		// Replicate control values to the last 15 entries
+		for (size_type i = 0; i < 15; ++i)
+			mControl[mMaxSize + i] = mControl[i];
+
+		// Loop over all elements that have been 'deleted' and move them to their new spot
+		BVec16 bucket_used = BVec16::sReplicate(cBucketUsed);
+		size_type bucket_mask = mMaxSize - 1;
+		uint32 probe_mask = bucket_mask & ~uint32(0b1111); // Mask out lower 4 bits because we test 16 buckets at a time
+		for (size_type src = 0; src < mMaxSize; ++src)
+			if (mControl[src] == cBucketDeleted)
+				for (;;)
+				{
+					// Split hash into index and control value
+					size_type src_index;
+					uint8 src_control;
+					GetIndexAndControlValue(HashTableDetail::sGetKey(mData[src]), src_index, src_control);
+
+					// Linear probing
+					size_type dst = src_index;
+					for (;;)
+					{
+						// Check if any buckets are free
+						BVec16 control_bytes = BVec16::sLoadByte16(mControl + dst);
+						uint32 control_free = uint32(BVec16::sAnd(control_bytes, bucket_used).GetTrues()) ^ 0xffff;
+						if (control_free != 0)
+						{
+							// Select this bucket as destination
+							dst += CountTrailingZeros(control_free);
+							dst &= bucket_mask;
+							break;
+						}
+
+						// Move to next batch of 16 buckets
+						dst = (dst + 16) & bucket_mask;
+					}
+
+					// Check if we stay in the same probe group
+					if (((dst - src_index) & probe_mask) == ((src - src_index) & probe_mask))
+					{
+						// We stay in the same group, we can stay where we are
+						SetControlValue(src, src_control);
+						break;
+					}
+					else if (mControl[dst] == cBucketEmpty)
+					{
+						// There's an empty bucket, move us there
+						SetControlValue(dst, src_control);
+						SetControlValue(src, cBucketEmpty);
+						new (mData + dst) KeyValue(std::move(mData[src]));
+						mData[src].~KeyValue();
+						break;
+					}
+					else
+					{
+						// There's an element in the bucket we want to move to, swap them
+						JPH_ASSERT(mControl[dst] == cBucketDeleted);
+						SetControlValue(dst, src_control);
+						std::swap(mData[src], mData[dst]);
+						// Iterate again with the same source bucket
+					}
+				}
+
+		// Reinitialize load left
+		mLoadLeft = sGetMaxLoad(mMaxSize) - mSize;
+	}
+
+private:
+	/// If this allocator needs to fall back to aligned allocations because the type requires it
+	static constexpr bool	cNeedsAlignedAllocate = alignof(KeyValue) > (JPH_CPU_ADDRESS_BITS == 32? 8 : 16);
+
+	/// Max load factor is cMaxLoadFactorNumerator / cMaxLoadFactorDenominator
+	static constexpr uint64	cMaxLoadFactorNumerator = 7;
+	static constexpr uint64	cMaxLoadFactorDenominator = 8;
+
+	/// If we can recover this fraction of deleted elements, we'll reshuffle the buckets in place rather than growing the table
+	static constexpr uint64 cMaxDeletedElementsNumerator = 1;
+	static constexpr uint64 cMaxDeletedElementsDenominator = 8;
+
+	/// Values that the control bytes can have
+	static constexpr uint8	cBucketEmpty = 0;
+	static constexpr uint8	cBucketDeleted = 0x7f;
+	static constexpr uint8	cBucketUsed = 0x80;	// Lowest 7 bits are lowest 7 bits of the hash value
+
+	/// The buckets, an array of size mMaxSize
+	KeyValue *				mData = nullptr;
+
+	/// Control bytes, an array of size mMaxSize + 15
+	uint8 *					mControl = nullptr;
+
+	/// Number of elements in the table
+	size_type				mSize = 0;
+
+	/// Max number of elements that can be stored in the table
+	size_type				mMaxSize = 0;
+
+	/// Number of elements we can add to the table before we need to grow
+	size_type				mLoadLeft = 0;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/InsertionSort.h
+++ b/thirdparty/jolt_physics/Jolt/Core/InsertionSort.h
@@ -0,0 +1,58 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2022 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Implementation of the insertion sort algorithm.
+template <typename Iterator, typename Compare>
+inline void InsertionSort(Iterator inBegin, Iterator inEnd, Compare inCompare)
+{
+	// Empty arrays don't need to be sorted
+	if (inBegin != inEnd)
+	{
+		// Start at the second element
+		for (Iterator i = inBegin + 1; i != inEnd; ++i)
+		{
+			// Move this element to a temporary value
+			auto x = std::move(*i);
+
+			// Check if the element goes before inBegin (we can't decrement the iterator before inBegin so this needs to be a separate branch)
+			if (inCompare(x, *inBegin))
+			{
+				// Move all elements to the right to make space for x
+				Iterator prev;
+				for (Iterator j = i; j != inBegin; j = prev)
+				{
+					prev = j - 1;
+					*j = *prev;
+				}
+
+				// Move x to the first place
+				*inBegin = std::move(x);
+			}
+			else
+			{
+				// Move elements to the right as long as they are bigger than x
+				Iterator j = i;
+				for (Iterator prev = j - 1; inCompare(x, *prev); j = prev, --prev)
+					*j = std::move(*prev);
+
+				// Move x into place
+				*j = std::move(x);
+			}
+		}
+	}
+}
+
+/// Implementation of insertion sort algorithm without comparator.
+template <typename Iterator>
+inline void InsertionSort(Iterator inBegin, Iterator inEnd)
+{
+	std::less<> compare;
+	InsertionSort(inBegin, inEnd, compare);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/IssueReporting.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/IssueReporting.cpp
@@ -0,0 +1,27 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+JPH_NAMESPACE_BEGIN
+
+static void DummyTrace([[maybe_unused]] const char *inFMT, ...)
+{
+	JPH_ASSERT(false);
+};
+
+TraceFunction Trace = DummyTrace;
+
+#ifdef JPH_ENABLE_ASSERTS
+
+static bool DummyAssertFailed(const char *inExpression, const char *inMessage, const char *inFile, uint inLine)
+{
+	return true; // Trigger breakpoint
+};
+
+AssertFailedFunction AssertFailed = DummyAssertFailed;
+
+#endif // JPH_ENABLE_ASSERTS
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/IssueReporting.h
+++ b/thirdparty/jolt_physics/Jolt/Core/IssueReporting.h
@@ -0,0 +1,38 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Trace function, needs to be overridden by application. This should output a line of text to the log / TTY.
+using TraceFunction = void (*)(const char *inFMT, ...);
+JPH_EXPORT extern TraceFunction Trace;
+
+// Always turn on asserts in Debug mode
+#if defined(JPH_DEBUG) && !defined(JPH_ENABLE_ASSERTS)
+	#define JPH_ENABLE_ASSERTS
+#endif
+
+#ifdef JPH_ENABLE_ASSERTS
+	/// Function called when an assertion fails. This function should return true if a breakpoint needs to be triggered
+	using AssertFailedFunction = bool(*)(const char *inExpression, const char *inMessage, const char *inFile, uint inLine);
+	JPH_EXPORT extern AssertFailedFunction AssertFailed;
+
+	// Helper functions to pass message on to failed function
+	struct AssertLastParam { };
+	inline bool AssertFailedParamHelper(const char *inExpression, const char *inFile, uint inLine, AssertLastParam) { return AssertFailed(inExpression, nullptr, inFile, inLine); }
+	inline bool AssertFailedParamHelper(const char *inExpression, const char *inFile, uint inLine, const char *inMessage, AssertLastParam) { return AssertFailed(inExpression, inMessage, inFile, inLine); }
+
+	/// Main assert macro, usage: JPH_ASSERT(condition, message) or JPH_ASSERT(condition)
+	#define JPH_ASSERT(inExpression, ...)	do { if (!(inExpression) && AssertFailedParamHelper(#inExpression, __FILE__, JPH::uint(__LINE__), ##__VA_ARGS__, JPH::AssertLastParam())) JPH_BREAKPOINT; } while (false)
+
+	#define JPH_IF_ENABLE_ASSERTS(...)		__VA_ARGS__
+#else
+	#define JPH_ASSERT(...)					((void)0)
+
+	#define JPH_IF_ENABLE_ASSERTS(...)
+#endif // JPH_ENABLE_ASSERTS
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystem.h
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystem.h
@@ -0,0 +1,311 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/Reference.h>
+#include <Jolt/Core/Color.h>
+#include <Jolt/Core/Profiler.h>
+#include <Jolt/Core/NonCopyable.h>
+#include <Jolt/Core/StaticArray.h>
+#include <Jolt/Core/Atomics.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// A class that allows units of work (Jobs) to be scheduled across multiple threads.
+/// It allows dependencies between the jobs so that the jobs form a graph.
+///
+/// The pattern for using this class is:
+///
+///		// Create job system
+///		JobSystem *job_system = new JobSystemThreadPool(...);
+///
+///		// Create some jobs
+///		JobHandle second_job = job_system->CreateJob("SecondJob", Color::sRed, []() { ... }, 1); // Create a job with 1 dependency
+///		JobHandle first_job = job_system->CreateJob("FirstJob", Color::sGreen, [second_job]() { ....; second_job.RemoveDependency(); }, 0); // Job can start immediately, will start second job when it's done
+///		JobHandle third_job = job_system->CreateJob("ThirdJob", Color::sBlue, []() { ... }, 0); // This job can run immediately as well and can run in parallel to job 1 and 2
+///
+///		// Add the jobs to the barrier so that we can execute them while we're waiting
+///		Barrier *barrier = job_system->CreateBarrier();
+///		barrier->AddJob(first_job);
+///		barrier->AddJob(second_job);
+///		barrier->AddJob(third_job);
+///		job_system->WaitForJobs(barrier);
+///
+///		// Clean up
+///		job_system->DestroyBarrier(barrier);
+///		delete job_system;
+///
+///	Jobs are guaranteed to be started in the order that their dependency counter becomes zero (in case they're scheduled on a background thread)
+///	or in the order they're added to the barrier (when dependency count is zero and when executing on the thread that calls WaitForJobs).
+///
+/// If you want to implement your own job system, inherit from JobSystem and implement:
+///
+/// * JobSystem::GetMaxConcurrency - This should return the maximum number of jobs that can run in parallel.
+/// * JobSystem::CreateJob - This should create a Job object and return it to the caller.
+/// * JobSystem::FreeJob - This should free the memory associated with the job object. It is called by the Job destructor when it is Release()-ed for the last time.
+/// * JobSystem::QueueJob/QueueJobs - These should store the job pointer in an internal queue to run immediately (dependencies are tracked internally, this function is called when the job can run).
+/// The Job objects are reference counted and are guaranteed to stay alive during the QueueJob(s) call. If you store the job in your own data structure you need to call AddRef() to take a reference.
+/// After the job has been executed you need to call Release() to release the reference. Make sure you no longer dereference the job pointer after calling Release().
+///
+/// JobSystem::Barrier is used to track the completion of a set of jobs. Jobs will be created by other jobs and added to the barrier while it is being waited on. This means that you cannot
+/// create a dependency graph beforehand as the graph changes while jobs are running. Implement the following functions:
+///
+/// * Barrier::AddJob/AddJobs - Add a job to the barrier, any call to WaitForJobs will now also wait for this job to complete.
+/// If you store the job in a data structure in the Barrier you need to call AddRef() on the job to keep it alive and Release() after you're done with it.
+/// * Barrier::OnJobFinished - This function is called when a job has finished executing, you can use this to track completion and remove the job from the list of jobs to wait on.
+///
+/// The functions on JobSystem that need to be implemented to support barriers are:
+///
+/// * JobSystem::CreateBarrier - Create a new barrier.
+/// * JobSystem::DestroyBarrier - Destroy a barrier.
+/// * JobSystem::WaitForJobs - This is the main function that is used to wait for all jobs that have been added to a Barrier. WaitForJobs can execute jobs that have
+/// been added to the barrier while waiting. It is not wise to execute other jobs that touch physics structures as this can cause race conditions and deadlocks. Please keep in mind that the barrier is
+/// only intended to wait on the completion of the Jolt jobs added to it, if you scheduled any jobs in your engine's job system to execute the Jolt jobs as part of QueueJob/QueueJobs, you might still need
+/// to wait for these in this function after the barrier is finished waiting.
+///
+/// An example implementation is JobSystemThreadPool. If you don't want to write the Barrier class you can also inherit from JobSystemWithBarrier.
+class JPH_EXPORT JobSystem : public NonCopyable
+{
+protected:
+	class Job;
+
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// A job handle contains a reference to a job. The job will be deleted as soon as there are no JobHandles.
+	/// referring to the job and when it is not in the job queue / being processed.
+	class JobHandle : private Ref<Job>
+	{
+	public:
+		/// Constructor
+		inline				JobHandle()									= default;
+		inline				JobHandle(const JobHandle &inHandle)		= default;
+		inline				JobHandle(JobHandle &&inHandle) noexcept	: Ref<Job>(std::move(inHandle)) { }
+
+		/// Constructor, only to be used by JobSystem
+		inline explicit		JobHandle(Job *inJob)						: Ref<Job>(inJob) { }
+
+		/// Assignment
+		inline JobHandle &	operator = (const JobHandle &inHandle)		= default;
+		inline JobHandle &	operator = (JobHandle &&inHandle) noexcept	= default;
+
+		/// Check if this handle contains a job
+		inline bool			IsValid() const								{ return GetPtr() != nullptr; }
+
+		/// Check if this job has finished executing
+		inline bool			IsDone() const								{ return GetPtr() != nullptr && GetPtr()->IsDone(); }
+
+		/// Add to the dependency counter.
+		inline void			AddDependency(int inCount = 1) const		{ GetPtr()->AddDependency(inCount); }
+
+		/// Remove from the dependency counter. Job will start whenever the dependency counter reaches zero
+		/// and if it does it is no longer valid to call the AddDependency/RemoveDependency functions.
+		inline void			RemoveDependency(int inCount = 1) const		{ GetPtr()->RemoveDependencyAndQueue(inCount); }
+
+		/// Remove a dependency from a batch of jobs at once, this can be more efficient than removing them one by one as it requires less locking
+		static inline void	sRemoveDependencies(const JobHandle *inHandles, uint inNumHandles, int inCount = 1);
+
+		/// Helper function to remove dependencies on a static array of job handles
+		template <uint N>
+		static inline void	sRemoveDependencies(StaticArray<JobHandle, N> &inHandles, int inCount = 1)
+		{
+			sRemoveDependencies(inHandles.data(), inHandles.size(), inCount);
+		}
+
+		/// Inherit the GetPtr function, only to be used by the JobSystem
+		using Ref<Job>::GetPtr;
+	};
+
+	/// A job barrier keeps track of a number of jobs and allows waiting until they are all completed.
+	class Barrier : public NonCopyable
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		/// Add a job to this barrier
+		/// Note that jobs can keep being added to the barrier while waiting for the barrier
+		virtual void		AddJob(const JobHandle &inJob) = 0;
+
+		/// Add multiple jobs to this barrier
+		/// Note that jobs can keep being added to the barrier while waiting for the barrier
+		virtual void		AddJobs(const JobHandle *inHandles, uint inNumHandles) = 0;
+
+	protected:
+		/// Job needs to be able to call OnJobFinished
+		friend class Job;
+
+		/// Destructor, you should call JobSystem::DestroyBarrier instead of destructing this object directly
+		virtual				~Barrier() = default;
+
+		/// Called by a Job to mark that it is finished
+		virtual void		OnJobFinished(Job *inJob) = 0;
+	};
+
+	/// Main function of the job
+	using JobFunction = function<void()>;
+
+	/// Destructor
+	virtual					~JobSystem() = default;
+
+	/// Get maximum number of concurrently executing jobs
+	virtual int				GetMaxConcurrency() const = 0;
+
+	/// Create a new job, the job is started immediately if inNumDependencies == 0 otherwise it starts when
+	/// RemoveDependency causes the dependency counter to reach 0.
+	virtual JobHandle		CreateJob(const char *inName, ColorArg inColor, const JobFunction &inJobFunction, uint32 inNumDependencies = 0) = 0;
+
+	/// Create a new barrier, used to wait on jobs
+	virtual Barrier *		CreateBarrier() = 0;
+
+	/// Destroy a barrier when it is no longer used. The barrier should be empty at this point.
+	virtual void			DestroyBarrier(Barrier *inBarrier) = 0;
+
+	/// Wait for a set of jobs to be finished, note that only 1 thread can be waiting on a barrier at a time
+	virtual void			WaitForJobs(Barrier *inBarrier) = 0;
+
+protected:
+	/// A class that contains information for a single unit of work
+	class Job
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		/// Constructor
+							Job([[maybe_unused]] const char *inJobName, [[maybe_unused]] ColorArg inColor, JobSystem *inJobSystem, const JobFunction &inJobFunction, uint32 inNumDependencies) :
+		#if defined(JPH_EXTERNAL_PROFILE) || defined(JPH_PROFILE_ENABLED)
+			mJobName(inJobName),
+			mColor(inColor),
+		#endif // defined(JPH_EXTERNAL_PROFILE) || defined(JPH_PROFILE_ENABLED)
+			mJobSystem(inJobSystem),
+			mJobFunction(inJobFunction),
+			mNumDependencies(inNumDependencies)
+		{
+		}
+
+		/// Get the jobs system to which this job belongs
+		inline JobSystem *	GetJobSystem()								{ return mJobSystem; }
+
+		/// Add or release a reference to this object
+		inline void			AddRef()
+		{
+			// Adding a reference can use relaxed memory ordering
+			mReferenceCount.fetch_add(1, memory_order_relaxed);
+		}
+		inline void			Release()
+		{
+		#ifndef JPH_TSAN_ENABLED
+			// Releasing a reference must use release semantics...
+			if (mReferenceCount.fetch_sub(1, memory_order_release) == 1)
+			{
+				// ... so that we can use acquire to ensure that we see any updates from other threads that released a ref before freeing the job
+				atomic_thread_fence(memory_order_acquire);
+				mJobSystem->FreeJob(this);
+			}
+		#else
+			// But under TSAN, we cannot use atomic_thread_fence, so we use an acq_rel operation unconditionally instead
+			if (mReferenceCount.fetch_sub(1, memory_order_acq_rel) == 1)
+				mJobSystem->FreeJob(this);
+		#endif
+		}
+
+		/// Add to the dependency counter.
+		inline void			AddDependency(int inCount);
+
+		/// Remove from the dependency counter. Returns true whenever the dependency counter reaches zero
+		/// and if it does it is no longer valid to call the AddDependency/RemoveDependency functions.
+		inline bool			RemoveDependency(int inCount);
+
+		/// Remove from the dependency counter. Job will be queued whenever the dependency counter reaches zero
+		/// and if it does it is no longer valid to call the AddDependency/RemoveDependency functions.
+		inline void			RemoveDependencyAndQueue(int inCount);
+
+		/// Set the job barrier that this job belongs to and returns false if this was not possible because the job already finished
+		inline bool			SetBarrier(Barrier *inBarrier)
+		{
+			intptr_t barrier = 0;
+			if (mBarrier.compare_exchange_strong(barrier, reinterpret_cast<intptr_t>(inBarrier), memory_order_relaxed))
+				return true;
+			JPH_ASSERT(barrier == cBarrierDoneState, "A job can only belong to 1 barrier");
+			return false;
+		}
+
+		/// Run the job function, returns the number of dependencies that this job still has or cExecutingState or cDoneState
+		inline uint32		Execute()
+		{
+			// Transition job to executing state
+			uint32 state = 0; // We can only start running with a dependency counter of 0
+			if (!mNumDependencies.compare_exchange_strong(state, cExecutingState, memory_order_acquire))
+				return state; // state is updated by compare_exchange_strong to the current value
+
+			// Run the job function
+			{
+				JPH_PROFILE(mJobName, mColor.GetUInt32());
+				mJobFunction();
+			}
+
+			// Fetch the barrier pointer and exchange it for the done state, so we're sure that no barrier gets set after we want to call the callback
+			intptr_t barrier = mBarrier.load(memory_order_relaxed);
+			for (;;)
+			{
+				if (mBarrier.compare_exchange_weak(barrier, cBarrierDoneState, memory_order_relaxed))
+					break;
+			}
+			JPH_ASSERT(barrier != cBarrierDoneState);
+
+			// Mark job as done
+			state = cExecutingState;
+			mNumDependencies.compare_exchange_strong(state, cDoneState, memory_order_relaxed);
+			JPH_ASSERT(state == cExecutingState);
+
+			// Notify the barrier after we've changed the job to the done state so that any thread reading the state after receiving the callback will see that the job has finished
+			if (barrier != 0)
+				reinterpret_cast<Barrier *>(barrier)->OnJobFinished(this);
+
+			return cDoneState;
+		}
+
+		/// Test if the job can be executed
+		inline bool			CanBeExecuted() const						{ return mNumDependencies.load(memory_order_relaxed) == 0; }
+
+		/// Test if the job finished executing
+		inline bool			IsDone() const								{ return mNumDependencies.load(memory_order_relaxed) == cDoneState; }
+
+	#if defined(JPH_EXTERNAL_PROFILE) || defined(JPH_PROFILE_ENABLED)
+		/// Get the name of the job
+		const char *		GetName() const								{ return mJobName; }
+	#endif // defined(JPH_EXTERNAL_PROFILE) || defined(JPH_PROFILE_ENABLED)
+
+		static constexpr uint32 cExecutingState = 0xe0e0e0e0;			///< Value of mNumDependencies when job is executing
+		static constexpr uint32 cDoneState		= 0xd0d0d0d0;			///< Value of mNumDependencies when job is done executing
+
+		static constexpr intptr_t cBarrierDoneState = ~intptr_t(0);		///< Value to use when the barrier has been triggered
+
+private:
+	#if defined(JPH_EXTERNAL_PROFILE) || defined(JPH_PROFILE_ENABLED)
+		const char *		mJobName;									///< Name of the job
+		Color				mColor;										///< Color of the job in the profiler
+	#endif // defined(JPH_EXTERNAL_PROFILE) || defined(JPH_PROFILE_ENABLED)
+		JobSystem *			mJobSystem;									///< The job system we belong to
+		atomic<intptr_t>	mBarrier = 0;								///< Barrier that this job is associated with (is a Barrier pointer)
+		JobFunction			mJobFunction;								///< Main job function
+		atomic<uint32>		mReferenceCount = 0;						///< Amount of JobHandles pointing to this job
+		atomic<uint32>		mNumDependencies;							///< Amount of jobs that need to complete before this job can run
+	};
+
+	/// Adds a job to the job queue
+	virtual void			QueueJob(Job *inJob) = 0;
+
+	/// Adds a number of jobs at once to the job queue
+	virtual void			QueueJobs(Job **inJobs, uint inNumJobs) = 0;
+
+	/// Frees a job
+	virtual void			FreeJob(Job *inJob) = 0;
+};
+
+using JobHandle = JobSystem::JobHandle;
+
+JPH_NAMESPACE_END
+
+#include "JobSystem.inl"
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystem.inl
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystem.inl
@@ -0,0 +1,56 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+JPH_NAMESPACE_BEGIN
+
+void JobSystem::Job::AddDependency(int inCount)
+{
+	JPH_IF_ENABLE_ASSERTS(uint32 old_value =) mNumDependencies.fetch_add(inCount, memory_order_relaxed);
+	JPH_ASSERT(old_value > 0 && old_value != cExecutingState && old_value != cDoneState, "Job is queued, running or done, it is not allowed to add a dependency to a running job");
+}
+
+bool JobSystem::Job::RemoveDependency(int inCount)
+{
+	uint32 old_value = mNumDependencies.fetch_sub(inCount, memory_order_release);
+	JPH_ASSERT(old_value != cExecutingState && old_value != cDoneState, "Job is running or done, it is not allowed to add a dependency to a running job");
+	uint32 new_value = old_value - inCount;
+	JPH_ASSERT(old_value > new_value, "Test wrap around, this is a logic error");
+	return new_value == 0;
+}
+
+void JobSystem::Job::RemoveDependencyAndQueue(int inCount)
+{
+	if (RemoveDependency(inCount))
+		mJobSystem->QueueJob(this);
+}
+
+void JobSystem::JobHandle::sRemoveDependencies(const JobHandle *inHandles, uint inNumHandles, int inCount)
+{
+	JPH_PROFILE_FUNCTION();
+
+	JPH_ASSERT(inNumHandles > 0);
+
+	// Get the job system, all jobs should be part of the same job system
+	JobSystem *job_system = inHandles->GetPtr()->GetJobSystem();
+
+	// Allocate a buffer to store the jobs that need to be queued
+	Job **jobs_to_queue = (Job **)JPH_STACK_ALLOC(inNumHandles * sizeof(Job *));
+	Job **next_job = jobs_to_queue;
+
+	// Remove the dependencies on all jobs
+	for (const JobHandle *handle = inHandles, *handle_end = inHandles + inNumHandles; handle < handle_end; ++handle)
+	{
+		Job *job = handle->GetPtr();
+		JPH_ASSERT(job->GetJobSystem() == job_system); // All jobs should belong to the same job system
+		if (job->RemoveDependency(inCount))
+			*(next_job++) = job;
+	}
+
+	// If any jobs need to be scheduled, schedule them as a batch
+	uint num_jobs_to_queue = uint(next_job - jobs_to_queue);
+	if (num_jobs_to_queue != 0)
+		job_system->QueueJobs(jobs_to_queue, num_jobs_to_queue);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystemSingleThreaded.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystemSingleThreaded.cpp
@@ -0,0 +1,65 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2023 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/JobSystemSingleThreaded.h>
+
+JPH_NAMESPACE_BEGIN
+
+void JobSystemSingleThreaded::Init(uint inMaxJobs)
+{
+	mJobs.Init(inMaxJobs, inMaxJobs);
+}
+
+JobHandle JobSystemSingleThreaded::CreateJob(const char *inJobName, ColorArg inColor, const JobFunction &inJobFunction, uint32 inNumDependencies)
+{
+	// Construct an object
+	uint32 index = mJobs.ConstructObject(inJobName, inColor, this, inJobFunction, inNumDependencies);
+	JPH_ASSERT(index != AvailableJobs::cInvalidObjectIndex);
+	Job *job = &mJobs.Get(index);
+
+	// Construct handle to keep a reference, the job is queued below and will immediately complete
+	JobHandle handle(job);
+
+	// If there are no dependencies, queue the job now
+	if (inNumDependencies == 0)
+		QueueJob(job);
+
+	// Return the handle
+	return handle;
+}
+
+void JobSystemSingleThreaded::FreeJob(Job *inJob)
+{
+	mJobs.DestructObject(inJob);
+}
+
+void JobSystemSingleThreaded::QueueJob(Job *inJob)
+{
+	inJob->Execute();
+}
+
+void JobSystemSingleThreaded::QueueJobs(Job **inJobs, uint inNumJobs)
+{
+	for (uint i = 0; i < inNumJobs; ++i)
+		QueueJob(inJobs[i]);
+}
+
+JobSystem::Barrier *JobSystemSingleThreaded::CreateBarrier()
+{
+	return &mDummyBarrier;
+}
+
+void JobSystemSingleThreaded::DestroyBarrier(Barrier *inBarrier)
+{
+	// There's nothing to do here, the barrier is just a dummy
+}
+
+void JobSystemSingleThreaded::WaitForJobs(Barrier *inBarrier)
+{
+	// There's nothing to do here, the barrier is just a dummy, we just execute the jobs immediately
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystemSingleThreaded.h
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystemSingleThreaded.h
@@ -0,0 +1,62 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2023 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/JobSystem.h>
+#include <Jolt/Core/FixedSizeFreeList.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Implementation of a JobSystem without threads, runs jobs as soon as they are added
+class JPH_EXPORT JobSystemSingleThreaded final : public JobSystem
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+							JobSystemSingleThreaded() = default;
+	explicit				JobSystemSingleThreaded(uint inMaxJobs)			{ Init(inMaxJobs); }
+
+	/// Initialize the job system
+	/// @param inMaxJobs Max number of jobs that can be allocated at any time
+	void					Init(uint inMaxJobs);
+
+	// See JobSystem
+	virtual int				GetMaxConcurrency() const override				{ return 1; }
+	virtual JobHandle		CreateJob(const char *inName, ColorArg inColor, const JobFunction &inJobFunction, uint32 inNumDependencies = 0) override;
+	virtual Barrier *		CreateBarrier() override;
+	virtual void			DestroyBarrier(Barrier *inBarrier) override;
+	virtual void			WaitForJobs(Barrier *inBarrier) override;
+
+protected:
+	// Dummy implementation of Barrier, all jobs are executed immediately
+	class BarrierImpl : public Barrier
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		// See Barrier
+		virtual void		AddJob(const JobHandle &inJob) override			{ /* We don't need to track jobs */ }
+		virtual void		AddJobs(const JobHandle *inHandles, uint inNumHandles) override { /* We don't need to track jobs */ }
+
+	protected:
+		/// Called by a Job to mark that it is finished
+		virtual void		OnJobFinished(Job *inJob) override				{ /* We don't need to track jobs */ }
+	};
+
+	// See JobSystem
+	virtual void			QueueJob(Job *inJob) override;
+	virtual void			QueueJobs(Job **inJobs, uint inNumJobs) override;
+	virtual void			FreeJob(Job *inJob) override;
+
+	/// Shared barrier since the barrier implementation does nothing
+	BarrierImpl				mDummyBarrier;
+
+	/// Array of jobs (fixed size)
+	using AvailableJobs = FixedSizeFreeList<Job>;
+	AvailableJobs			mJobs;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystemThreadPool.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystemThreadPool.cpp
@@ -0,0 +1,364 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/JobSystemThreadPool.h>
+#include <Jolt/Core/Profiler.h>
+#include <Jolt/Core/FPException.h>
+
+#ifdef JPH_PLATFORM_WINDOWS
+	JPH_SUPPRESS_WARNING_PUSH
+	JPH_MSVC_SUPPRESS_WARNING(5039) // winbase.h(13179): warning C5039: 'TpSetCallbackCleanupGroup': pointer or reference to potentially throwing function passed to 'extern "C"' function under -EHc. Undefined behavior may occur if this function throws an exception.
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
+#ifndef JPH_COMPILER_MINGW
+	#include <Windows.h>
+#else
+	#include <windows.h>
+#endif
+
+	JPH_SUPPRESS_WARNING_POP
+#endif
+#ifdef JPH_PLATFORM_LINUX
+	#include <sys/prctl.h>
+#endif
+
+JPH_NAMESPACE_BEGIN
+
+void JobSystemThreadPool::Init(uint inMaxJobs, uint inMaxBarriers, int inNumThreads)
+{
+	JobSystemWithBarrier::Init(inMaxBarriers);
+
+	// Init freelist of jobs
+	mJobs.Init(inMaxJobs, inMaxJobs);
+
+	// Init queue
+	for (atomic<Job *> &j : mQueue)
+		j = nullptr;
+
+	// Start the worker threads
+	StartThreads(inNumThreads);
+}
+
+JobSystemThreadPool::JobSystemThreadPool(uint inMaxJobs, uint inMaxBarriers, int inNumThreads)
+{
+	Init(inMaxJobs, inMaxBarriers, inNumThreads);
+}
+
+void JobSystemThreadPool::StartThreads([[maybe_unused]] int inNumThreads)
+{
+#if !defined(JPH_CPU_WASM) || defined(__EMSCRIPTEN_PTHREADS__) // If we're running without threads support we cannot create threads and we ignore the inNumThreads parameter
+	// Auto detect number of threads
+	if (inNumThreads < 0)
+		inNumThreads = thread::hardware_concurrency() - 1;
+
+	// If no threads are requested we're done
+	if (inNumThreads == 0)
+		return;
+
+	// Don't quit the threads
+	mQuit = false;
+
+	// Allocate heads
+	mHeads = reinterpret_cast<atomic<uint> *>(Allocate(sizeof(atomic<uint>) * inNumThreads));
+	for (int i = 0; i < inNumThreads; ++i)
+		mHeads[i] = 0;
+
+	// Start running threads
+	JPH_ASSERT(mThreads.empty());
+	mThreads.reserve(inNumThreads);
+	for (int i = 0; i < inNumThreads; ++i)
+		mThreads.emplace_back([this, i] { ThreadMain(i); });
+#endif
+}
+
+JobSystemThreadPool::~JobSystemThreadPool()
+{
+	// Stop all worker threads
+	StopThreads();
+}
+
+void JobSystemThreadPool::StopThreads()
+{
+	if (mThreads.empty())
+		return;
+
+	// Signal threads that we want to stop and wake them up
+	mQuit = true;
+	mSemaphore.Release((uint)mThreads.size());
+
+	// Wait for all threads to finish
+	for (thread &t : mThreads)
+		if (t.joinable())
+			t.join();
+
+	// Delete all threads
+	mThreads.clear();
+
+	// Ensure that there are no lingering jobs in the queue
+	for (uint head = 0; head != mTail; ++head)
+	{
+		// Fetch job
+		Job *job_ptr = mQueue[head & (cQueueLength - 1)].exchange(nullptr);
+		if (job_ptr != nullptr)
+		{
+			// And execute it
+			job_ptr->Execute();
+			job_ptr->Release();
+		}
+	}
+
+	// Destroy heads and reset tail
+	Free(mHeads);
+	mHeads = nullptr;
+	mTail = 0;
+}
+
+JobHandle JobSystemThreadPool::CreateJob(const char *inJobName, ColorArg inColor, const JobFunction &inJobFunction, uint32 inNumDependencies)
+{
+	JPH_PROFILE_FUNCTION();
+
+	// Loop until we can get a job from the free list
+	uint32 index;
+	for (;;)
+	{
+		index = mJobs.ConstructObject(inJobName, inColor, this, inJobFunction, inNumDependencies);
+		if (index != AvailableJobs::cInvalidObjectIndex)
+			break;
+		JPH_ASSERT(false, "No jobs available!");
+		std::this_thread::sleep_for(std::chrono::microseconds(100));
+	}
+	Job *job = &mJobs.Get(index);
+
+	// Construct handle to keep a reference, the job is queued below and may immediately complete
+	JobHandle handle(job);
+
+	// If there are no dependencies, queue the job now
+	if (inNumDependencies == 0)
+		QueueJob(job);
+
+	// Return the handle
+	return handle;
+}
+
+void JobSystemThreadPool::FreeJob(Job *inJob)
+{
+	mJobs.DestructObject(inJob);
+}
+
+uint JobSystemThreadPool::GetHead() const
+{
+	// Find the minimal value across all threads
+	uint head = mTail;
+	for (size_t i = 0; i < mThreads.size(); ++i)
+		head = min(head, mHeads[i].load());
+	return head;
+}
+
+void JobSystemThreadPool::QueueJobInternal(Job *inJob)
+{
+	// Add reference to job because we're adding the job to the queue
+	inJob->AddRef();
+
+	// Need to read head first because otherwise the tail can already have passed the head
+	// We read the head outside of the loop since it involves iterating over all threads and we only need to update
+	// it if there's not enough space in the queue.
+	uint head = GetHead();
+
+	for (;;)
+	{
+		// Check if there's space in the queue
+		uint old_value = mTail;
+		if (old_value - head >= cQueueLength)
+		{
+			// We calculated the head outside of the loop, update head (and we also need to update tail to prevent it from passing head)
+			head = GetHead();
+			old_value = mTail;
+
+			// Second check if there's space in the queue
+			if (old_value - head >= cQueueLength)
+			{
+				// Wake up all threads in order to ensure that they can clear any nullptrs they may not have processed yet
+				mSemaphore.Release((uint)mThreads.size());
+
+				// Sleep a little (we have to wait for other threads to update their head pointer in order for us to be able to continue)
+				std::this_thread::sleep_for(std::chrono::microseconds(100));
+				continue;
+			}
+		}
+
+		// Write the job pointer if the slot is empty
+		Job *expected_job = nullptr;
+		bool success = mQueue[old_value & (cQueueLength - 1)].compare_exchange_strong(expected_job, inJob);
+
+		// Regardless of who wrote the slot, we will update the tail (if the successful thread got scheduled out
+		// after writing the pointer we still want to be able to continue)
+		mTail.compare_exchange_strong(old_value, old_value + 1);
+
+		// If we successfully added our job we're done
+		if (success)
+			break;
+	}
+}
+
+void JobSystemThreadPool::QueueJob(Job *inJob)
+{
+	JPH_PROFILE_FUNCTION();
+
+	// If we have no worker threads, we can't queue the job either. We assume in this case that the job will be added to a barrier and that the barrier will execute the job when it's Wait() function is called.
+	if (mThreads.empty())
+		return;
+
+	// Queue the job
+	QueueJobInternal(inJob);
+
+	// Wake up thread
+	mSemaphore.Release();
+}
+
+void JobSystemThreadPool::QueueJobs(Job **inJobs, uint inNumJobs)
+{
+	JPH_PROFILE_FUNCTION();
+
+	JPH_ASSERT(inNumJobs > 0);
+
+	// If we have no worker threads, we can't queue the job either. We assume in this case that the job will be added to a barrier and that the barrier will execute the job when it's Wait() function is called.
+	if (mThreads.empty())
+		return;
+
+	// Queue all jobs
+	for (Job **job = inJobs, **job_end = inJobs + inNumJobs; job < job_end; ++job)
+		QueueJobInternal(*job);
+
+	// Wake up threads
+	mSemaphore.Release(min(inNumJobs, (uint)mThreads.size()));
+}
+
+#if defined(JPH_PLATFORM_WINDOWS)
+
+#if !defined(JPH_COMPILER_MINGW) // MinGW doesn't support __try/__except)
+	// Sets the current thread name in MSVC debugger
+	static void RaiseThreadNameException(const char *inName)
+	{
+		#pragma pack(push, 8)
+
+		struct THREADNAME_INFO
+		{
+			DWORD	dwType;			// Must be 0x1000.
+			LPCSTR	szName;			// Pointer to name (in user addr space).
+			DWORD	dwThreadID;		// Thread ID (-1=caller thread).
+			DWORD	dwFlags;		// Reserved for future use, must be zero.
+		};
+
+		#pragma pack(pop)
+
+		THREADNAME_INFO info;
+		info.dwType = 0x1000;
+		info.szName = inName;
+		info.dwThreadID = (DWORD)-1;
+		info.dwFlags = 0;
+
+		__try
+		{
+			RaiseException(0x406D1388, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR *)&info);
+		}
+		__except(EXCEPTION_EXECUTE_HANDLER)
+		{
+		}
+	}
+#endif // !JPH_COMPILER_MINGW
+
+	static void SetThreadName(const char* inName)
+	{
+		JPH_SUPPRESS_WARNING_PUSH
+
+		// Suppress casting warning, it's fine here as GetProcAddress doesn't really return a FARPROC
+		JPH_CLANG_SUPPRESS_WARNING("-Wcast-function-type") // error : cast from 'FARPROC' (aka 'long long (*)()') to 'SetThreadDescriptionFunc' (aka 'long (*)(void *, const wchar_t *)') converts to incompatible function type
+		JPH_CLANG_SUPPRESS_WARNING("-Wcast-function-type-strict") // error : cast from 'FARPROC' (aka 'long long (*)()') to 'SetThreadDescriptionFunc' (aka 'long (*)(void *, const wchar_t *)') converts to incompatible function type
+		JPH_MSVC_SUPPRESS_WARNING(4191) // reinterpret_cast' : unsafe conversion from 'FARPROC' to 'SetThreadDescriptionFunc'. Calling this function through the result pointer may cause your program to fail
+
+		using SetThreadDescriptionFunc = HRESULT(WINAPI*)(HANDLE hThread, PCWSTR lpThreadDescription);
+		static SetThreadDescriptionFunc SetThreadDescription = reinterpret_cast<SetThreadDescriptionFunc>(GetProcAddress(GetModuleHandleW(L"Kernel32.dll"), "SetThreadDescription"));
+
+		JPH_SUPPRESS_WARNING_POP
+
+		if (SetThreadDescription)
+		{
+			wchar_t name_buffer[64] = { 0 };
+			if (MultiByteToWideChar(CP_UTF8, 0, inName, -1, name_buffer, sizeof(name_buffer) / sizeof(wchar_t) - 1) == 0)
+				return;
+
+			SetThreadDescription(GetCurrentThread(), name_buffer);
+		}
+#if !defined(JPH_COMPILER_MINGW)
+		else if (IsDebuggerPresent())
+			RaiseThreadNameException(inName);
+#endif // !JPH_COMPILER_MINGW
+	}
+#elif defined(JPH_PLATFORM_LINUX)
+	static void SetThreadName(const char *inName)
+	{
+		JPH_ASSERT(strlen(inName) < 16); // String will be truncated if it is longer
+		prctl(PR_SET_NAME, inName, 0, 0, 0);
+	}
+#endif // JPH_PLATFORM_LINUX
+
+void JobSystemThreadPool::ThreadMain(int inThreadIndex)
+{
+	// Name the thread
+	char name[64];
+	snprintf(name, sizeof(name), "Worker %d", int(inThreadIndex + 1));
+
+#if defined(JPH_PLATFORM_WINDOWS) || defined(JPH_PLATFORM_LINUX)
+	SetThreadName(name);
+#endif // JPH_PLATFORM_WINDOWS && !JPH_COMPILER_MINGW
+
+	// Enable floating point exceptions
+	FPExceptionsEnable enable_exceptions;
+	JPH_UNUSED(enable_exceptions);
+
+	JPH_PROFILE_THREAD_START(name);
+
+	// Call the thread init function
+	mThreadInitFunction(inThreadIndex);
+
+	atomic<uint> &head = mHeads[inThreadIndex];
+
+	while (!mQuit)
+	{
+		// Wait for jobs
+		mSemaphore.Acquire();
+
+		{
+			JPH_PROFILE("Executing Jobs");
+
+			// Loop over the queue
+			while (head != mTail)
+			{
+				// Exchange any job pointer we find with a nullptr
+				atomic<Job *> &job = mQueue[head & (cQueueLength - 1)];
+				if (job.load() != nullptr)
+				{
+					Job *job_ptr = job.exchange(nullptr);
+					if (job_ptr != nullptr)
+					{
+						// And execute it
+						job_ptr->Execute();
+						job_ptr->Release();
+					}
+				}
+				head++;
+			}
+		}
+	}
+
+	// Call the thread exit function
+	mThreadExitFunction(inThreadIndex);
+
+	JPH_PROFILE_THREAD_END();
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystemThreadPool.h
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystemThreadPool.h
@@ -0,0 +1,101 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/JobSystemWithBarrier.h>
+#include <Jolt/Core/FixedSizeFreeList.h>
+#include <Jolt/Core/Semaphore.h>
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <thread>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+// Things we're using from STL
+using std::thread;
+
+/// Implementation of a JobSystem using a thread pool
+///
+/// Note that this is considered an example implementation. It is expected that when you integrate
+/// the physics engine into your own project that you'll provide your own implementation of the
+/// JobSystem built on top of whatever job system your project uses.
+class JPH_EXPORT JobSystemThreadPool final : public JobSystemWithBarrier
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Creates a thread pool.
+	/// @see JobSystemThreadPool::Init
+							JobSystemThreadPool(uint inMaxJobs, uint inMaxBarriers, int inNumThreads = -1);
+							JobSystemThreadPool() = default;
+	virtual					~JobSystemThreadPool() override;
+
+	/// Functions to call when a thread is initialized or exits, must be set before calling Init()
+	using InitExitFunction = function<void(int)>;
+	void					SetThreadInitFunction(const InitExitFunction &inInitFunction)	{ mThreadInitFunction = inInitFunction; }
+	void					SetThreadExitFunction(const InitExitFunction &inExitFunction)	{ mThreadExitFunction = inExitFunction; }
+
+	/// Initialize the thread pool
+	/// @param inMaxJobs Max number of jobs that can be allocated at any time
+	/// @param inMaxBarriers Max number of barriers that can be allocated at any time
+	/// @param inNumThreads Number of threads to start (the number of concurrent jobs is 1 more because the main thread will also run jobs while waiting for a barrier to complete). Use -1 to auto detect the amount of CPU's.
+	void					Init(uint inMaxJobs, uint inMaxBarriers, int inNumThreads = -1);
+
+	// See JobSystem
+	virtual int				GetMaxConcurrency() const override				{ return int(mThreads.size()) + 1; }
+	virtual JobHandle		CreateJob(const char *inName, ColorArg inColor, const JobFunction &inJobFunction, uint32 inNumDependencies = 0) override;
+
+	/// Change the max concurrency after initialization
+	void					SetNumThreads(int inNumThreads)					{ StopThreads(); StartThreads(inNumThreads); }
+
+protected:
+	// See JobSystem
+	virtual void			QueueJob(Job *inJob) override;
+	virtual void			QueueJobs(Job **inJobs, uint inNumJobs) override;
+	virtual void			FreeJob(Job *inJob) override;
+
+private:
+	/// Start/stop the worker threads
+	void					StartThreads(int inNumThreads);
+	void					StopThreads();
+
+	/// Entry point for a thread
+	void					ThreadMain(int inThreadIndex);
+
+	/// Get the head of the thread that has processed the least amount of jobs
+	inline uint				GetHead() const;
+
+	/// Internal helper function to queue a job
+	inline void				QueueJobInternal(Job *inJob);
+
+	/// Functions to call when initializing or exiting a thread
+	InitExitFunction		mThreadInitFunction = [](int) { };
+	InitExitFunction		mThreadExitFunction = [](int) { };
+
+	/// Array of jobs (fixed size)
+	using AvailableJobs = FixedSizeFreeList<Job>;
+	AvailableJobs			mJobs;
+
+	/// Threads running jobs
+	Array<thread>			mThreads;
+
+	// The job queue
+	static constexpr uint32 cQueueLength = 1024;
+	static_assert(IsPowerOf2(cQueueLength));								// We do bit operations and require queue length to be a power of 2
+	atomic<Job *>			mQueue[cQueueLength];
+
+	// Head and tail of the queue, do this value modulo cQueueLength - 1 to get the element in the mQueue array
+	atomic<uint> *			mHeads = nullptr;								///< Per executing thread the head of the current queue
+	alignas(JPH_CACHE_LINE_SIZE) atomic<uint> mTail = 0;					///< Tail (write end) of the queue
+
+	// Semaphore used to signal worker threads that there is new work
+	Semaphore				mSemaphore;
+
+	/// Boolean to indicate that we want to stop the job system
+	atomic<bool>			mQuit = false;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystemWithBarrier.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystemWithBarrier.cpp
@@ -0,0 +1,230 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2023 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/JobSystemWithBarrier.h>
+#include <Jolt/Core/Profiler.h>
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <thread>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+JobSystemWithBarrier::BarrierImpl::BarrierImpl()
+{
+	for (atomic<Job *> &j : mJobs)
+		j = nullptr;
+}
+
+JobSystemWithBarrier::BarrierImpl::~BarrierImpl()
+{
+	JPH_ASSERT(IsEmpty());
+}
+
+void JobSystemWithBarrier::BarrierImpl::AddJob(const JobHandle &inJob)
+{
+	JPH_PROFILE_FUNCTION();
+
+	bool release_semaphore = false;
+
+	// Set the barrier on the job, this returns true if the barrier was successfully set (otherwise the job is already done and we don't need to add it to our list)
+	Job *job = inJob.GetPtr();
+	if (job->SetBarrier(this))
+	{
+		// If the job can be executed we want to release the semaphore an extra time to allow the waiting thread to start executing it
+		mNumToAcquire++;
+		if (job->CanBeExecuted())
+		{
+			release_semaphore = true;
+			mNumToAcquire++;
+		}
+
+		// Add the job to our job list
+		job->AddRef();
+		uint write_index = mJobWriteIndex++;
+		while (write_index - mJobReadIndex >= cMaxJobs)
+		{
+			JPH_ASSERT(false, "Barrier full, stalling!");
+			std::this_thread::sleep_for(std::chrono::microseconds(100));
+		}
+		mJobs[write_index & (cMaxJobs - 1)] = job;
+	}
+
+	// Notify waiting thread that a new executable job is available
+	if (release_semaphore)
+		mSemaphore.Release();
+}
+
+void JobSystemWithBarrier::BarrierImpl::AddJobs(const JobHandle *inHandles, uint inNumHandles)
+{
+	JPH_PROFILE_FUNCTION();
+
+	bool release_semaphore = false;
+
+	for (const JobHandle *handle = inHandles, *handles_end = inHandles + inNumHandles; handle < handles_end; ++handle)
+	{
+		// Set the barrier on the job, this returns true if the barrier was successfully set (otherwise the job is already done and we don't need to add it to our list)
+		Job *job = handle->GetPtr();
+		if (job->SetBarrier(this))
+		{
+			// If the job can be executed we want to release the semaphore an extra time to allow the waiting thread to start executing it
+			mNumToAcquire++;
+			if (!release_semaphore && job->CanBeExecuted())
+			{
+				release_semaphore = true;
+				mNumToAcquire++;
+			}
+
+			// Add the job to our job list
+			job->AddRef();
+			uint write_index = mJobWriteIndex++;
+			while (write_index - mJobReadIndex >= cMaxJobs)
+			{
+				JPH_ASSERT(false, "Barrier full, stalling!");
+				std::this_thread::sleep_for(std::chrono::microseconds(100));
+			}
+			mJobs[write_index & (cMaxJobs - 1)] = job;
+		}
+	}
+
+	// Notify waiting thread that a new executable job is available
+	if (release_semaphore)
+		mSemaphore.Release();
+}
+
+void JobSystemWithBarrier::BarrierImpl::OnJobFinished(Job *inJob)
+{
+	JPH_PROFILE_FUNCTION();
+
+	mSemaphore.Release();
+}
+
+void JobSystemWithBarrier::BarrierImpl::Wait()
+{
+	while (mNumToAcquire > 0)
+	{
+		{
+			JPH_PROFILE("Execute Jobs");
+
+			// Go through all jobs
+			bool has_executed;
+			do
+			{
+				has_executed = false;
+
+				// Loop through the jobs and erase jobs from the beginning of the list that are done
+				while (mJobReadIndex < mJobWriteIndex)
+				{
+					atomic<Job *> &job = mJobs[mJobReadIndex & (cMaxJobs - 1)];
+					Job *job_ptr = job.load();
+					if (job_ptr == nullptr || !job_ptr->IsDone())
+						break;
+
+					// Job is finished, release it
+					job_ptr->Release();
+					job = nullptr;
+					++mJobReadIndex;
+				}
+
+				// Loop through the jobs and execute the first executable job
+				for (uint index = mJobReadIndex; index < mJobWriteIndex; ++index)
+				{
+					const atomic<Job *> &job = mJobs[index & (cMaxJobs - 1)];
+					Job *job_ptr = job.load();
+					if (job_ptr != nullptr && job_ptr->CanBeExecuted())
+					{
+						// This will only execute the job if it has not already executed
+						job_ptr->Execute();
+						has_executed = true;
+						break;
+					}
+				}
+
+			} while (has_executed);
+		}
+
+		// Wait for another thread to wake us when either there is more work to do or when all jobs have completed.
+		// When there have been multiple releases, we acquire them all at the same time to avoid needlessly spinning on executing jobs.
+		// Note that using GetValue is inherently unsafe since we can read a stale value, but this is not an issue here as this is the only
+		// place where we acquire the semaphore. Other threads only release it, so we can only read a value that is lower or equal to the actual value.
+		int num_to_acquire = max(1, mSemaphore.GetValue());
+		mSemaphore.Acquire(num_to_acquire);
+		mNumToAcquire -= num_to_acquire;
+	}
+
+	// All jobs should be done now, release them
+	while (mJobReadIndex < mJobWriteIndex)
+	{
+		atomic<Job *> &job = mJobs[mJobReadIndex & (cMaxJobs - 1)];
+		Job *job_ptr = job.load();
+		JPH_ASSERT(job_ptr != nullptr && job_ptr->IsDone());
+		job_ptr->Release();
+		job = nullptr;
+		++mJobReadIndex;
+	}
+}
+
+void JobSystemWithBarrier::Init(uint inMaxBarriers)
+{
+	JPH_ASSERT(mBarriers == nullptr); // Already initialized?
+
+	// Init freelist of barriers
+	mMaxBarriers = inMaxBarriers;
+	mBarriers = new BarrierImpl [inMaxBarriers];
+}
+
+JobSystemWithBarrier::JobSystemWithBarrier(uint inMaxBarriers)
+{
+	Init(inMaxBarriers);
+}
+
+JobSystemWithBarrier::~JobSystemWithBarrier()
+{
+	// Ensure that none of the barriers are used
+#ifdef JPH_ENABLE_ASSERTS
+	for (const BarrierImpl *b = mBarriers, *b_end = mBarriers + mMaxBarriers; b < b_end; ++b)
+		JPH_ASSERT(!b->mInUse);
+#endif // JPH_ENABLE_ASSERTS
+	delete [] mBarriers;
+}
+
+JobSystem::Barrier *JobSystemWithBarrier::CreateBarrier()
+{
+	JPH_PROFILE_FUNCTION();
+
+	// Find the first unused barrier
+	for (uint32 index = 0; index < mMaxBarriers; ++index)
+	{
+		bool expected = false;
+		if (mBarriers[index].mInUse.compare_exchange_strong(expected, true))
+			return &mBarriers[index];
+	}
+
+	return nullptr;
+}
+
+void JobSystemWithBarrier::DestroyBarrier(Barrier *inBarrier)
+{
+	JPH_PROFILE_FUNCTION();
+
+	// Check that no jobs are in the barrier
+	JPH_ASSERT(static_cast<BarrierImpl *>(inBarrier)->IsEmpty());
+
+	// Flag the barrier as unused
+	bool expected = true;
+	static_cast<BarrierImpl *>(inBarrier)->mInUse.compare_exchange_strong(expected, false);
+	JPH_ASSERT(expected);
+}
+
+void JobSystemWithBarrier::WaitForJobs(Barrier *inBarrier)
+{
+	JPH_PROFILE_FUNCTION();
+
+	// Let our barrier implementation wait for the jobs
+	static_cast<BarrierImpl *>(inBarrier)->Wait();
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/JobSystemWithBarrier.h
+++ b/thirdparty/jolt_physics/Jolt/Core/JobSystemWithBarrier.h
@@ -0,0 +1,85 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2023 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/JobSystem.h>
+#include <Jolt/Core/Semaphore.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Implementation of the Barrier class for a JobSystem
+///
+/// This class can be used to make it easier to create a new JobSystem implementation that integrates with your own job system.
+/// It will implement all functionality relating to barriers, so the only functions that are left to be implemented are:
+///
+/// * JobSystem::GetMaxConcurrency
+/// * JobSystem::CreateJob
+/// * JobSystem::FreeJob
+/// * JobSystem::QueueJob/QueueJobs
+///
+/// See instructions in JobSystem for more information on how to implement these.
+class JPH_EXPORT JobSystemWithBarrier : public JobSystem
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructs barriers
+	/// @see JobSystemWithBarrier::Init
+	explicit				JobSystemWithBarrier(uint inMaxBarriers);
+							JobSystemWithBarrier() = default;
+	virtual					~JobSystemWithBarrier() override;
+
+	/// Initialize the barriers
+	/// @param inMaxBarriers Max number of barriers that can be allocated at any time
+	void					Init(uint inMaxBarriers);
+
+	// See JobSystem
+	virtual Barrier *		CreateBarrier() override;
+	virtual void			DestroyBarrier(Barrier *inBarrier) override;
+	virtual void			WaitForJobs(Barrier *inBarrier) override;
+
+private:
+	class BarrierImpl : public Barrier
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		/// Constructor
+							BarrierImpl();
+		virtual				~BarrierImpl() override;
+
+		// See Barrier
+		virtual void		AddJob(const JobHandle &inJob) override;
+		virtual void		AddJobs(const JobHandle *inHandles, uint inNumHandles) override;
+
+		/// Check if there are any jobs in the job barrier
+		inline bool			IsEmpty() const									{ return mJobReadIndex == mJobWriteIndex; }
+
+		/// Wait for all jobs in this job barrier, while waiting, execute jobs that are part of this barrier on the current thread
+		void				Wait();
+
+		/// Flag to indicate if a barrier has been handed out
+		atomic<bool>		mInUse { false };
+
+	protected:
+		/// Called by a Job to mark that it is finished
+		virtual void		OnJobFinished(Job *inJob) override;
+
+		/// Jobs queue for the barrier
+		static constexpr uint cMaxJobs = 2048;
+		static_assert(IsPowerOf2(cMaxJobs));								// We do bit operations and require max jobs to be a power of 2
+		atomic<Job *>		mJobs[cMaxJobs];								///< List of jobs that are part of this barrier, nullptrs for empty slots
+		alignas(JPH_CACHE_LINE_SIZE) atomic<uint> mJobReadIndex { 0 };		///< First job that could be valid (modulo cMaxJobs), can be nullptr if other thread is still working on adding the job
+		alignas(JPH_CACHE_LINE_SIZE) atomic<uint> mJobWriteIndex { 0 };		///< First job that can be written (modulo cMaxJobs)
+		atomic<int>			mNumToAcquire { 0 };							///< Number of times the semaphore has been released, the barrier should acquire the semaphore this many times (written at the same time as mJobWriteIndex so ok to put in same cache line)
+		Semaphore			mSemaphore;										///< Semaphore used by finishing jobs to signal the barrier that they're done
+	};
+
+	/// Array of barriers (we keep them constructed all the time since constructing a semaphore/mutex is not cheap)
+	uint					mMaxBarriers = 0;								///< Max amount of barriers
+	BarrierImpl *			mBarriers = nullptr;							///< List of the actual barriers
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/LinearCurve.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/LinearCurve.cpp
@@ -0,0 +1,51 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/LinearCurve.h>
+#include <Jolt/Core/StreamIn.h>
+#include <Jolt/Core/StreamOut.h>
+#include <Jolt/ObjectStream/TypeDeclarations.h>
+
+JPH_NAMESPACE_BEGIN
+
+JPH_IMPLEMENT_SERIALIZABLE_NON_VIRTUAL(LinearCurve::Point)
+{
+	JPH_ADD_ATTRIBUTE(Point, mX)
+	JPH_ADD_ATTRIBUTE(Point, mY)
+}
+
+JPH_IMPLEMENT_SERIALIZABLE_NON_VIRTUAL(LinearCurve)
+{
+	JPH_ADD_ATTRIBUTE(LinearCurve, mPoints)
+}
+
+float LinearCurve::GetValue(float inX) const
+{
+	if (mPoints.empty())
+		return 0.0f;
+
+	Points::const_iterator i2 = std::lower_bound(mPoints.begin(), mPoints.end(), inX, [](const Point &inPoint, float inValue) { return inPoint.mX < inValue; });
+
+	if (i2 == mPoints.begin())
+		return mPoints.front().mY;
+	else if (i2 == mPoints.end())
+		return mPoints.back().mY;
+
+	Points::const_iterator i1 = i2 - 1;
+	return i1->mY + (inX - i1->mX) * (i2->mY - i1->mY) / (i2->mX - i1->mX);
+}
+
+void LinearCurve::SaveBinaryState(StreamOut &inStream) const
+{
+	inStream.Write(mPoints);
+}
+
+void LinearCurve::RestoreBinaryState(StreamIn &inStream)
+{
+	inStream.Read(mPoints);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/LinearCurve.h
+++ b/thirdparty/jolt_physics/Jolt/Core/LinearCurve.h
@@ -0,0 +1,67 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/ObjectStream/SerializableObject.h>
+#include <Jolt/Core/QuickSort.h>
+
+JPH_NAMESPACE_BEGIN
+
+class StreamOut;
+class StreamIn;
+
+// A set of points (x, y) that form a linear curve
+class JPH_EXPORT LinearCurve
+{
+	JPH_DECLARE_SERIALIZABLE_NON_VIRTUAL(JPH_EXPORT, LinearCurve)
+
+public:
+	/// A point on the curve
+	class Point
+	{
+		JPH_DECLARE_SERIALIZABLE_NON_VIRTUAL(JPH_EXPORT, Point)
+
+	public:
+		float			mX = 0.0f;
+		float			mY = 0.0f;
+	};
+
+	/// Remove all points
+	void				Clear()											{ mPoints.clear(); }
+
+	/// Reserve memory for inNumPoints points
+	void				Reserve(uint inNumPoints)						{ mPoints.reserve(inNumPoints); }
+
+	/// Add a point to the curve. Points must be inserted in ascending X or Sort() needs to be called when all points have been added.
+	/// @param inX X value
+	/// @param inY Y value
+	void				AddPoint(float inX, float inY)					{ mPoints.push_back({ inX, inY }); }
+
+	/// Sort the points on X ascending
+	void				Sort()											{ QuickSort(mPoints.begin(), mPoints.end(), [](const Point &inLHS, const Point &inRHS) { return inLHS.mX < inRHS.mX; }); }
+
+	/// Get the lowest X value
+	float				GetMinX() const									{ return mPoints.empty()? 0.0f : mPoints.front().mX; }
+
+	/// Get the highest X value
+	float				GetMaxX() const									{ return mPoints.empty()? 0.0f : mPoints.back().mX; }
+
+	/// Sample value on the curve
+	/// @param inX X value to sample at
+	/// @return Interpolated Y value
+	float				GetValue(float inX) const;
+
+	/// Saves the state of this object in binary form to inStream.
+	void				SaveBinaryState(StreamOut &inStream) const;
+
+	/// Restore the state of this object from inStream.
+	void				RestoreBinaryState(StreamIn &inStream);
+
+	/// The points on the curve, should be sorted ascending by x
+	using Points = Array<Point>;
+	Points				mPoints;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/LockFreeHashMap.h
+++ b/thirdparty/jolt_physics/Jolt/Core/LockFreeHashMap.h
@@ -0,0 +1,182 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+#include <Jolt/Core/Atomics.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Allocator for a lock free hash map
+class LFHMAllocator : public NonCopyable
+{
+public:
+	/// Destructor
+	inline					~LFHMAllocator();
+
+	/// Initialize the allocator
+	/// @param inObjectStoreSizeBytes Number of bytes to reserve for all key value pairs
+	inline void				Init(uint inObjectStoreSizeBytes);
+
+	/// Clear all allocations
+	inline void				Clear();
+
+	/// Allocate a new block of data
+	/// @param inBlockSize Size of block to allocate (will potentially return a smaller block if memory is full).
+	/// @param ioBegin Should be the start of the first free byte in current memory block on input, will contain the start of the first free byte in allocated block on return.
+	/// @param ioEnd Should be the byte beyond the current memory block on input, will contain the byte beyond the allocated block on return.
+	inline void				Allocate(uint32 inBlockSize, uint32 &ioBegin, uint32 &ioEnd);
+
+	/// Convert a pointer to an offset
+	template <class T>
+	inline uint32			ToOffset(const T *inData) const;
+
+	/// Convert an offset to a pointer
+	template <class T>
+	inline T *				FromOffset(uint32 inOffset) const;
+
+private:
+	uint8 *					mObjectStore = nullptr;			///< This contains a contiguous list of objects (possibly of varying size)
+	uint32					mObjectStoreSizeBytes = 0;		///< The size of mObjectStore in bytes
+	atomic<uint32>			mWriteOffset { 0 };				///< Next offset to write to in mObjectStore
+};
+
+/// Allocator context object for a lock free hash map that allocates a larger memory block at once and hands it out in smaller portions.
+/// This avoids contention on the atomic LFHMAllocator::mWriteOffset.
+class LFHMAllocatorContext : public NonCopyable
+{
+public:
+	/// Construct a new allocator context
+	inline					LFHMAllocatorContext(LFHMAllocator &inAllocator, uint32 inBlockSize);
+
+	/// @brief Allocate data block
+	/// @param inSize Size of block to allocate.
+	/// @param inAlignment Alignment of block to allocate.
+	/// @param outWriteOffset Offset in buffer where block is located
+	/// @return True if allocation succeeded
+	inline bool				Allocate(uint32 inSize, uint32 inAlignment, uint32 &outWriteOffset);
+
+private:
+	LFHMAllocator &			mAllocator;
+	uint32					mBlockSize;
+	uint32					mBegin = 0;
+	uint32					mEnd = 0;
+};
+
+/// Very simple lock free hash map that only allows insertion, retrieval and provides a fixed amount of buckets and fixed storage.
+/// Note: This class currently assumes key and value are simple types that need no calls to the destructor.
+template <class Key, class Value>
+class LockFreeHashMap : public NonCopyable
+{
+public:
+	using MapType = LockFreeHashMap<Key, Value>;
+
+	/// Destructor
+	explicit				LockFreeHashMap(LFHMAllocator &inAllocator) : mAllocator(inAllocator) { }
+							~LockFreeHashMap();
+
+	/// Initialization
+	/// @param inMaxBuckets Max amount of buckets to use in the hashmap. Must be power of 2.
+	void					Init(uint32 inMaxBuckets);
+
+	/// Remove all elements.
+	/// Note that this cannot happen simultaneously with adding new elements.
+	void					Clear();
+
+	/// Get the current amount of buckets that the map is using
+	uint32					GetNumBuckets() const			{ return mNumBuckets; }
+
+	/// Get the maximum amount of buckets that this map supports
+	uint32					GetMaxBuckets() const			{ return mMaxBuckets; }
+
+	/// Update the number of buckets. This must be done after clearing the map and cannot be done concurrently with any other operations on the map.
+	/// Note that the number of buckets can never become bigger than the specified max buckets during initialization and that it must be a power of 2.
+	void					SetNumBuckets(uint32 inNumBuckets);
+
+	/// A key / value pair that is inserted in the map
+	class KeyValue
+	{
+	public:
+		const Key &			GetKey() const					{ return mKey; }
+		Value &				GetValue()						{ return mValue; }
+		const Value &		GetValue() const				{ return mValue; }
+
+	private:
+		template <class K, class V> friend class LockFreeHashMap;
+
+		Key					mKey;							///< Key for this entry
+		uint32				mNextOffset;					///< Offset in mObjectStore of next KeyValue entry with same hash
+		Value				mValue;							///< Value for this entry + optionally extra bytes
+	};
+
+	/// Insert a new element, returns null if map full.
+	/// Multiple threads can be inserting in the map at the same time.
+	template <class... Params>
+	inline KeyValue *		Create(LFHMAllocatorContext &ioContext, const Key &inKey, uint64 inKeyHash, int inExtraBytes, Params &&... inConstructorParams);
+
+	/// Find an element, returns null if not found
+	inline const KeyValue *	Find(const Key &inKey, uint64 inKeyHash) const;
+
+	/// Value of an invalid handle
+	const static uint32		cInvalidHandle = uint32(-1);
+
+	/// Get convert key value pair to uint32 handle
+	inline uint32			ToHandle(const KeyValue *inKeyValue) const;
+
+	/// Convert uint32 handle back to key and value
+	inline const KeyValue *	FromHandle(uint32 inHandle) const;
+
+#ifdef JPH_ENABLE_ASSERTS
+	/// Get the number of key value pairs that this map currently contains.
+	/// Available only when asserts are enabled because adding elements creates contention on this atomic and negatively affects performance.
+	inline uint32			GetNumKeyValues() const			{ return mNumKeyValues; }
+#endif // JPH_ENABLE_ASSERTS
+
+	/// Get all key/value pairs
+	inline void				GetAllKeyValues(Array<const KeyValue *> &outAll) const;
+
+	/// Non-const iterator
+	struct Iterator
+	{
+		/// Comparison
+		bool				operator == (const Iterator &inRHS) const	{ return mMap == inRHS.mMap && mBucket == inRHS.mBucket && mOffset == inRHS.mOffset; }
+		bool				operator != (const Iterator &inRHS) const	{ return !(*this == inRHS); }
+
+		/// Convert to key value pair
+		KeyValue &			operator * ();
+
+		/// Next item
+		Iterator &			operator ++ ();
+
+		MapType *			mMap;
+		uint32				mBucket;
+		uint32				mOffset;
+	};
+
+	/// Iterate over the map, note that it is not safe to do this in parallel to Clear().
+	/// It is safe to do this while adding elements to the map, but newly added elements may or may not be returned by the iterator.
+	Iterator				begin();
+	Iterator				end();
+
+#ifdef JPH_DEBUG
+	/// Output stats about this map to the log
+	void					TraceStats() const;
+#endif
+
+private:
+	LFHMAllocator &			mAllocator;						///< Allocator used to allocate key value pairs
+
+#ifdef JPH_ENABLE_ASSERTS
+	atomic<uint32>			mNumKeyValues = 0;				///< Number of key value pairs in the store
+#endif // JPH_ENABLE_ASSERTS
+
+	atomic<uint32> *		mBuckets = nullptr;				///< This contains the offset in mObjectStore of the first object with a particular hash
+	uint32					mNumBuckets = 0;				///< Current number of buckets
+	uint32					mMaxBuckets = 0;				///< Maximum number of buckets
+};
+
+JPH_NAMESPACE_END
+
+#include "LockFreeHashMap.inl"
--- a/thirdparty/jolt_physics/Jolt/Core/LockFreeHashMap.inl
+++ b/thirdparty/jolt_physics/Jolt/Core/LockFreeHashMap.inl
@@ -0,0 +1,351 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////////
+// LFHMAllocator
+///////////////////////////////////////////////////////////////////////////////////
+
+inline LFHMAllocator::~LFHMAllocator()
+{
+	AlignedFree(mObjectStore);
+}
+
+inline void LFHMAllocator::Init(uint inObjectStoreSizeBytes)
+{
+	JPH_ASSERT(mObjectStore == nullptr);
+
+	mObjectStoreSizeBytes = inObjectStoreSizeBytes;
+	mObjectStore = reinterpret_cast<uint8 *>(JPH::AlignedAllocate(inObjectStoreSizeBytes, 16));
+}
+
+inline void LFHMAllocator::Clear()
+{
+	mWriteOffset = 0;
+}
+
+inline void LFHMAllocator::Allocate(uint32 inBlockSize, uint32 &ioBegin, uint32 &ioEnd)
+{
+	// If we're already beyond the end of our buffer then don't do an atomic add.
+	// It's possible that many keys are inserted after the allocator is full, making it possible
+	// for mWriteOffset (uint32) to wrap around to zero. When this happens, there will be a memory corruption.
+	// This way, we will be able to progress the write offset beyond the size of the buffer
+	// worst case by max <CPU count> * inBlockSize.
+	if (mWriteOffset.load(memory_order_relaxed) >= mObjectStoreSizeBytes)
+		return;
+
+	// Atomically fetch a block from the pool
+	uint32 begin = mWriteOffset.fetch_add(inBlockSize, memory_order_relaxed);
+	uint32 end = min(begin + inBlockSize, mObjectStoreSizeBytes);
+
+	if (ioEnd == begin)
+	{
+		// Block is allocated straight after our previous block
+		begin = ioBegin;
+	}
+	else
+	{
+		// Block is a new block
+		begin = min(begin, mObjectStoreSizeBytes);
+	}
+
+	// Store the begin and end of the resulting block
+	ioBegin = begin;
+	ioEnd = end;
+}
+
+template <class T>
+inline uint32 LFHMAllocator::ToOffset(const T *inData) const
+{
+	const uint8 *data = reinterpret_cast<const uint8 *>(inData);
+	JPH_ASSERT(data >= mObjectStore && data < mObjectStore + mObjectStoreSizeBytes);
+	return uint32(data - mObjectStore);
+}
+
+template <class T>
+inline T *LFHMAllocator::FromOffset(uint32 inOffset) const
+{
+	JPH_ASSERT(inOffset < mObjectStoreSizeBytes);
+	return reinterpret_cast<T *>(mObjectStore + inOffset);
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+// LFHMAllocatorContext
+///////////////////////////////////////////////////////////////////////////////////
+
+inline LFHMAllocatorContext::LFHMAllocatorContext(LFHMAllocator &inAllocator, uint32 inBlockSize) :
+	mAllocator(inAllocator),
+	mBlockSize(inBlockSize)
+{
+}
+
+inline bool LFHMAllocatorContext::Allocate(uint32 inSize, uint32 inAlignment, uint32 &outWriteOffset)
+{
+	// Calculate needed bytes for alignment
+	JPH_ASSERT(IsPowerOf2(inAlignment));
+	uint32 alignment_mask = inAlignment - 1;
+	uint32 alignment = (inAlignment - (mBegin & alignment_mask)) & alignment_mask;
+
+	// Check if we have space
+	if (mEnd - mBegin < inSize + alignment)
+	{
+		// Allocate a new block
+		mAllocator.Allocate(mBlockSize, mBegin, mEnd);
+
+		// Update alignment
+		alignment = (inAlignment - (mBegin & alignment_mask)) & alignment_mask;
+
+		// Check if we have space again
+		if (mEnd - mBegin < inSize + alignment)
+			return false;
+	}
+
+	// Make the allocation
+	mBegin += alignment;
+	outWriteOffset = mBegin;
+	mBegin += inSize;
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+// LockFreeHashMap
+///////////////////////////////////////////////////////////////////////////////////
+
+template <class Key, class Value>
+void LockFreeHashMap<Key, Value>::Init(uint32 inMaxBuckets)
+{
+	JPH_ASSERT(inMaxBuckets >= 4 && IsPowerOf2(inMaxBuckets));
+	JPH_ASSERT(mBuckets == nullptr);
+
+	mNumBuckets = inMaxBuckets;
+	mMaxBuckets = inMaxBuckets;
+
+	mBuckets = reinterpret_cast<atomic<uint32> *>(AlignedAllocate(inMaxBuckets * sizeof(atomic<uint32>), 16));
+
+	Clear();
+}
+
+template <class Key, class Value>
+LockFreeHashMap<Key, Value>::~LockFreeHashMap()
+{
+	AlignedFree(mBuckets);
+}
+
+template <class Key, class Value>
+void LockFreeHashMap<Key, Value>::Clear()
+{
+#ifdef JPH_ENABLE_ASSERTS
+	// Reset number of key value pairs
+	mNumKeyValues = 0;
+#endif // JPH_ENABLE_ASSERTS
+
+	// Reset buckets 4 at a time
+	static_assert(sizeof(atomic<uint32>) == sizeof(uint32));
+	UVec4 invalid_handle = UVec4::sReplicate(cInvalidHandle);
+	uint32 *start = reinterpret_cast<uint32 *>(mBuckets);
+	const uint32 *end = start + mNumBuckets;
+	JPH_ASSERT(IsAligned(start, 16));
+	while (start < end)
+	{
+		invalid_handle.StoreInt4Aligned(start);
+		start += 4;
+	}
+}
+
+template <class Key, class Value>
+void LockFreeHashMap<Key, Value>::SetNumBuckets(uint32 inNumBuckets)
+{
+	JPH_ASSERT(mNumKeyValues == 0);
+	JPH_ASSERT(inNumBuckets <= mMaxBuckets);
+	JPH_ASSERT(inNumBuckets >= 4 && IsPowerOf2(inNumBuckets));
+
+	mNumBuckets = inNumBuckets;
+}
+
+template <class Key, class Value>
+template <class... Params>
+inline typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Value>::Create(LFHMAllocatorContext &ioContext, const Key &inKey, uint64 inKeyHash, int inExtraBytes, Params &&... inConstructorParams)
+{
+	// This is not a multi map, test the key hasn't been inserted yet
+	JPH_ASSERT(Find(inKey, inKeyHash) == nullptr);
+
+	// Calculate total size
+	uint size = sizeof(KeyValue) + inExtraBytes;
+
+	// Get the write offset for this key value pair
+	uint32 write_offset;
+	if (!ioContext.Allocate(size, alignof(KeyValue), write_offset))
+		return nullptr;
+
+#ifdef JPH_ENABLE_ASSERTS
+	// Increment amount of entries in map
+	mNumKeyValues.fetch_add(1, memory_order_relaxed);
+#endif // JPH_ENABLE_ASSERTS
+
+	// Construct the key/value pair
+	KeyValue *kv = mAllocator.template FromOffset<KeyValue>(write_offset);
+	JPH_ASSERT(intptr_t(kv) % alignof(KeyValue) == 0);
+#ifdef JPH_DEBUG
+	memset(kv, 0xcd, size);
+#endif
+	kv->mKey = inKey;
+	new (&kv->mValue) Value(std::forward<Params>(inConstructorParams)...);
+
+	// Get the offset to the first object from the bucket with corresponding hash
+	atomic<uint32> &offset = mBuckets[inKeyHash & (mNumBuckets - 1)];
+
+	// Add this entry as the first element in the linked list
+	uint32 old_offset = offset.load(memory_order_relaxed);
+	for (;;)
+	{
+		kv->mNextOffset = old_offset;
+		if (offset.compare_exchange_weak(old_offset, write_offset, memory_order_release))
+			break;
+	}
+
+	return kv;
+}
+
+template <class Key, class Value>
+inline const typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Value>::Find(const Key &inKey, uint64 inKeyHash) const
+{
+	// Get the offset to the keyvalue object from the bucket with corresponding hash
+	uint32 offset = mBuckets[inKeyHash & (mNumBuckets - 1)].load(memory_order_acquire);
+	while (offset != cInvalidHandle)
+	{
+		// Loop through linked list of values until the right one is found
+		const KeyValue *kv = mAllocator.template FromOffset<const KeyValue>(offset);
+		if (kv->mKey == inKey)
+			return kv;
+		offset = kv->mNextOffset;
+	}
+
+	// Not found
+	return nullptr;
+}
+
+template <class Key, class Value>
+inline uint32 LockFreeHashMap<Key, Value>::ToHandle(const KeyValue *inKeyValue) const
+{
+	return mAllocator.ToOffset(inKeyValue);
+}
+
+template <class Key, class Value>
+inline const typename LockFreeHashMap<Key, Value>::KeyValue *LockFreeHashMap<Key, Value>::FromHandle(uint32 inHandle) const
+{
+	return mAllocator.template FromOffset<const KeyValue>(inHandle);
+}
+
+template <class Key, class Value>
+inline void LockFreeHashMap<Key, Value>::GetAllKeyValues(Array<const KeyValue *> &outAll) const
+{
+	for (const atomic<uint32> *bucket = mBuckets; bucket < mBuckets + mNumBuckets; ++bucket)
+	{
+		uint32 offset = *bucket;
+		while (offset != cInvalidHandle)
+		{
+			const KeyValue *kv = mAllocator.template FromOffset<const KeyValue>(offset);
+			outAll.push_back(kv);
+			offset = kv->mNextOffset;
+		}
+	}
+}
+
+template <class Key, class Value>
+typename LockFreeHashMap<Key, Value>::Iterator LockFreeHashMap<Key, Value>::begin()
+{
+	// Start with the first bucket
+	Iterator it { this, 0, mBuckets[0] };
+
+	// If it doesn't contain a valid entry, use the ++ operator to find the first valid entry
+	if (it.mOffset == cInvalidHandle)
+		++it;
+
+	return it;
+}
+
+template <class Key, class Value>
+typename LockFreeHashMap<Key, Value>::Iterator LockFreeHashMap<Key, Value>::end()
+{
+	return { this, mNumBuckets, cInvalidHandle };
+}
+
+template <class Key, class Value>
+typename LockFreeHashMap<Key, Value>::KeyValue &LockFreeHashMap<Key, Value>::Iterator::operator* ()
+{
+	JPH_ASSERT(mOffset != cInvalidHandle);
+
+	return *mMap->mAllocator.template FromOffset<KeyValue>(mOffset);
+}
+
+template <class Key, class Value>
+typename LockFreeHashMap<Key, Value>::Iterator &LockFreeHashMap<Key, Value>::Iterator::operator++ ()
+{
+	JPH_ASSERT(mBucket < mMap->mNumBuckets);
+
+	// Find the next key value in this bucket
+	if (mOffset != cInvalidHandle)
+	{
+		const KeyValue *kv = mMap->mAllocator.template FromOffset<const KeyValue>(mOffset);
+		mOffset = kv->mNextOffset;
+		if (mOffset != cInvalidHandle)
+			return *this;
+	}
+
+	// Loop over next buckets
+	for (;;)
+	{
+		// Next bucket
+		++mBucket;
+		if (mBucket >= mMap->mNumBuckets)
+			return *this;
+
+		// Fetch the first entry in the bucket
+		mOffset = mMap->mBuckets[mBucket];
+		if (mOffset != cInvalidHandle)
+			return *this;
+	}
+}
+
+#ifdef JPH_DEBUG
+
+template <class Key, class Value>
+void LockFreeHashMap<Key, Value>::TraceStats() const
+{
+	const int cMaxPerBucket = 256;
+
+	int max_objects_per_bucket = 0;
+	int num_objects = 0;
+	int histogram[cMaxPerBucket];
+	for (int i = 0; i < cMaxPerBucket; ++i)
+		histogram[i] = 0;
+
+	for (atomic<uint32> *bucket = mBuckets, *bucket_end = mBuckets + mNumBuckets; bucket < bucket_end; ++bucket)
+	{
+		int objects_in_bucket = 0;
+		uint32 offset = *bucket;
+		while (offset != cInvalidHandle)
+		{
+			const KeyValue *kv = mAllocator.template FromOffset<const KeyValue>(offset);
+			offset = kv->mNextOffset;
+			++objects_in_bucket;
+			++num_objects;
+		}
+		max_objects_per_bucket = max(objects_in_bucket, max_objects_per_bucket);
+		histogram[min(objects_in_bucket, cMaxPerBucket - 1)]++;
+	}
+
+	Trace("max_objects_per_bucket = %d, num_buckets = %u, num_objects = %d", max_objects_per_bucket, mNumBuckets, num_objects);
+
+	for (int i = 0; i < cMaxPerBucket; ++i)
+		if (histogram[i] != 0)
+			Trace("%d: %d", i, histogram[i]);
+}
+
+#endif
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Memory.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/Memory.cpp
@@ -0,0 +1,85 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <cstdlib>
+JPH_SUPPRESS_WARNINGS_STD_END
+#include <stdlib.h>
+
+JPH_NAMESPACE_BEGIN
+
+#ifdef JPH_DISABLE_CUSTOM_ALLOCATOR
+	#define JPH_ALLOC_FN(x)	x
+	#define JPH_ALLOC_SCOPE
+#else
+	#define JPH_ALLOC_FN(x)	x##Impl
+	#define JPH_ALLOC_SCOPE static
+#endif
+
+JPH_ALLOC_SCOPE void *JPH_ALLOC_FN(Allocate)(size_t inSize)
+{
+	JPH_ASSERT(inSize > 0);
+	return malloc(inSize);
+}
+
+JPH_ALLOC_SCOPE void *JPH_ALLOC_FN(Reallocate)(void *inBlock, [[maybe_unused]] size_t inOldSize, size_t inNewSize)
+{
+	JPH_ASSERT(inNewSize > 0);
+	return realloc(inBlock, inNewSize);
+}
+
+JPH_ALLOC_SCOPE void JPH_ALLOC_FN(Free)(void *inBlock)
+{
+	free(inBlock);
+}
+
+JPH_ALLOC_SCOPE void *JPH_ALLOC_FN(AlignedAllocate)(size_t inSize, size_t inAlignment)
+{
+	JPH_ASSERT(inSize > 0 && inAlignment > 0);
+
+#if defined(JPH_PLATFORM_WINDOWS)
+	// Microsoft doesn't implement posix_memalign
+	return _aligned_malloc(inSize, inAlignment);
+#else
+	void *block = nullptr;
+	JPH_SUPPRESS_WARNING_PUSH
+	JPH_GCC_SUPPRESS_WARNING("-Wunused-result")
+	JPH_CLANG_SUPPRESS_WARNING("-Wunused-result")
+	posix_memalign(&block, inAlignment, inSize);
+	JPH_SUPPRESS_WARNING_POP
+	return block;
+#endif
+}
+
+JPH_ALLOC_SCOPE void JPH_ALLOC_FN(AlignedFree)(void *inBlock)
+{
+#if defined(JPH_PLATFORM_WINDOWS)
+	_aligned_free(inBlock);
+#else
+	free(inBlock);
+#endif
+}
+
+#ifndef JPH_DISABLE_CUSTOM_ALLOCATOR
+
+AllocateFunction Allocate = nullptr;
+ReallocateFunction Reallocate = nullptr;
+FreeFunction Free = nullptr;
+AlignedAllocateFunction AlignedAllocate = nullptr;
+AlignedFreeFunction AlignedFree = nullptr;
+
+void RegisterDefaultAllocator()
+{
+	Allocate = AllocateImpl;
+	Reallocate = ReallocateImpl;
+	Free = FreeImpl;
+	AlignedAllocate = AlignedAllocateImpl;
+	AlignedFree = AlignedFreeImpl;
+}
+
+#endif // JPH_DISABLE_CUSTOM_ALLOCATOR
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Memory.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Memory.h
@@ -0,0 +1,74 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+#ifndef JPH_DISABLE_CUSTOM_ALLOCATOR
+
+// Normal memory allocation, must be at least 8 byte aligned on 32 bit platform and 16 byte aligned on 64 bit platform
+using AllocateFunction = void *(*)(size_t inSize);
+using ReallocateFunction = void *(*)(void *inBlock, size_t inOldSize, size_t inNewSize);
+using FreeFunction = void (*)(void *inBlock);
+
+// Aligned memory allocation
+using AlignedAllocateFunction = void *(*)(size_t inSize, size_t inAlignment);
+using AlignedFreeFunction = void (*)(void *inBlock);
+
+// User defined allocation / free functions
+JPH_EXPORT extern AllocateFunction Allocate;
+JPH_EXPORT extern ReallocateFunction Reallocate;
+JPH_EXPORT extern FreeFunction Free;
+JPH_EXPORT extern AlignedAllocateFunction AlignedAllocate;
+JPH_EXPORT extern AlignedFreeFunction AlignedFree;
+
+/// Register platform default allocation / free functions
+JPH_EXPORT void RegisterDefaultAllocator();
+
+// 32-bit MinGW g++ doesn't call the correct overload for the new operator when a type is 16 bytes aligned.
+// It uses the non-aligned version, which on 32 bit platforms usually returns an 8 byte aligned block.
+// We therefore default to 16 byte aligned allocations when the regular new operator is used.
+// See: https://github.com/godotengine/godot/issues/105455#issuecomment-2824311547
+#if defined(JPH_COMPILER_MINGW) && JPH_CPU_ADDRESS_BITS == 32
+	#define JPH_INTERNAL_DEFAULT_ALLOCATE(size) JPH::AlignedAllocate(size, 16)
+	#define JPH_INTERNAL_DEFAULT_FREE(pointer) JPH::AlignedFree(pointer)
+#else
+	#define JPH_INTERNAL_DEFAULT_ALLOCATE(size) JPH::Allocate(size)
+	#define JPH_INTERNAL_DEFAULT_FREE(pointer) JPH::Free(pointer)
+#endif
+
+/// Macro to override the new and delete functions
+#define JPH_OVERRIDE_NEW_DELETE \
+	JPH_INLINE void *operator new (size_t inCount)												{ return JPH_INTERNAL_DEFAULT_ALLOCATE(inCount); } \
+	JPH_INLINE void operator delete (void *inPointer) noexcept									{ JPH_INTERNAL_DEFAULT_FREE(inPointer); } \
+	JPH_INLINE void *operator new[] (size_t inCount)											{ return JPH_INTERNAL_DEFAULT_ALLOCATE(inCount); } \
+	JPH_INLINE void operator delete[] (void *inPointer) noexcept								{ JPH_INTERNAL_DEFAULT_FREE(inPointer); } \
+	JPH_INLINE void *operator new (size_t inCount, std::align_val_t inAlignment)				{ return JPH::AlignedAllocate(inCount, static_cast<size_t>(inAlignment)); } \
+	JPH_INLINE void operator delete (void *inPointer, [[maybe_unused]] std::align_val_t inAlignment) noexcept	{ JPH::AlignedFree(inPointer); } \
+	JPH_INLINE void *operator new[] (size_t inCount, std::align_val_t inAlignment)				{ return JPH::AlignedAllocate(inCount, static_cast<size_t>(inAlignment)); } \
+	JPH_INLINE void operator delete[] (void *inPointer, [[maybe_unused]] std::align_val_t inAlignment) noexcept	{ JPH::AlignedFree(inPointer); } \
+	JPH_INLINE void *operator new ([[maybe_unused]] size_t inCount, void *inPointer) noexcept	{ return inPointer; } \
+	JPH_INLINE void operator delete ([[maybe_unused]] void *inPointer, [[maybe_unused]] void *inPlace) noexcept { /* Do nothing */ } \
+	JPH_INLINE void *operator new[] ([[maybe_unused]] size_t inCount, void *inPointer) noexcept	{ return inPointer; } \
+	JPH_INLINE void operator delete[] ([[maybe_unused]] void *inPointer, [[maybe_unused]] void *inPlace) noexcept { /* Do nothing */ }
+
+#else
+
+// Directly define the allocation functions
+JPH_EXPORT void *Allocate(size_t inSize);
+JPH_EXPORT void *Reallocate(void *inBlock, size_t inOldSize, size_t inNewSize);
+JPH_EXPORT void Free(void *inBlock);
+JPH_EXPORT void *AlignedAllocate(size_t inSize, size_t inAlignment);
+JPH_EXPORT void AlignedFree(void *inBlock);
+
+// Don't implement allocator registering
+inline void RegisterDefaultAllocator() { }
+
+// Don't override new/delete
+#define JPH_OVERRIDE_NEW_DELETE
+
+#endif // !JPH_DISABLE_CUSTOM_ALLOCATOR
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Mutex.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Mutex.h
@@ -0,0 +1,223 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/Profiler.h>
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <mutex>
+#include <shared_mutex>
+#include <thread>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+// Things we're using from STL
+using std::mutex;
+using std::shared_mutex;
+using std::thread;
+using std::lock_guard;
+using std::shared_lock;
+using std::unique_lock;
+
+#ifdef JPH_PLATFORM_BLUE
+
+// On Platform Blue the mutex class is not very fast so we implement it using the official APIs
+class MutexBase : public NonCopyable
+{
+public:
+					MutexBase()
+	{
+		JPH_PLATFORM_BLUE_MUTEX_INIT(mMutex);
+	}
+
+					~MutexBase()
+	{
+		JPH_PLATFORM_BLUE_MUTEX_DESTROY(mMutex);
+	}
+
+	inline bool		try_lock()
+	{
+		return JPH_PLATFORM_BLUE_MUTEX_TRYLOCK(mMutex);
+	}
+
+	inline void		lock()
+	{
+		JPH_PLATFORM_BLUE_MUTEX_LOCK(mMutex);
+	}
+
+	inline void		unlock()
+	{
+		JPH_PLATFORM_BLUE_MUTEX_UNLOCK(mMutex);
+	}
+
+private:
+	JPH_PLATFORM_BLUE_MUTEX		mMutex;
+};
+
+// On Platform Blue the shared_mutex class is not very fast so we implement it using the official APIs
+class SharedMutexBase : public NonCopyable
+{
+public:
+					SharedMutexBase()
+	{
+		JPH_PLATFORM_BLUE_RWLOCK_INIT(mRWLock);
+	}
+
+					~SharedMutexBase()
+	{
+		JPH_PLATFORM_BLUE_RWLOCK_DESTROY(mRWLock);
+	}
+
+	inline bool		try_lock()
+	{
+		return JPH_PLATFORM_BLUE_RWLOCK_TRYWLOCK(mRWLock);
+	}
+
+	inline bool		try_lock_shared()
+	{
+		return JPH_PLATFORM_BLUE_RWLOCK_TRYRLOCK(mRWLock);
+	}
+
+	inline void		lock()
+	{
+		JPH_PLATFORM_BLUE_RWLOCK_WLOCK(mRWLock);
+	}
+
+	inline void		unlock()
+	{
+		JPH_PLATFORM_BLUE_RWLOCK_WUNLOCK(mRWLock);
+	}
+
+	inline void		lock_shared()
+	{
+		JPH_PLATFORM_BLUE_RWLOCK_RLOCK(mRWLock);
+	}
+
+	inline void		unlock_shared()
+	{
+		JPH_PLATFORM_BLUE_RWLOCK_RUNLOCK(mRWLock);
+	}
+
+private:
+	JPH_PLATFORM_BLUE_RWLOCK	mRWLock;
+};
+
+#else
+
+// On other platforms just use the STL implementation
+using MutexBase = mutex;
+using SharedMutexBase = shared_mutex;
+
+#endif // JPH_PLATFORM_BLUE
+
+#if defined(JPH_ENABLE_ASSERTS) || defined(JPH_PROFILE_ENABLED) || defined(JPH_EXTERNAL_PROFILE)
+
+/// Very simple wrapper around MutexBase which tracks lock contention in the profiler
+/// and asserts that locks/unlocks take place on the same thread
+class Mutex : public MutexBase
+{
+public:
+	inline bool		try_lock()
+	{
+		JPH_ASSERT(mLockedThreadID != std::this_thread::get_id());
+		if (MutexBase::try_lock())
+		{
+			JPH_IF_ENABLE_ASSERTS(mLockedThreadID = std::this_thread::get_id();)
+			return true;
+		}
+		return false;
+	}
+
+	inline void		lock()
+	{
+		if (!try_lock())
+		{
+			JPH_PROFILE("Lock", 0xff00ffff);
+			MutexBase::lock();
+			JPH_IF_ENABLE_ASSERTS(mLockedThreadID = std::this_thread::get_id();)
+		}
+	}
+
+	inline void		unlock()
+	{
+		JPH_ASSERT(mLockedThreadID == std::this_thread::get_id());
+		JPH_IF_ENABLE_ASSERTS(mLockedThreadID = thread::id();)
+		MutexBase::unlock();
+	}
+
+#ifdef JPH_ENABLE_ASSERTS
+	inline bool		is_locked()
+	{
+		return mLockedThreadID != thread::id();
+	}
+#endif // JPH_ENABLE_ASSERTS
+
+private:
+	JPH_IF_ENABLE_ASSERTS(thread::id mLockedThreadID;)
+};
+
+/// Very simple wrapper around SharedMutexBase which tracks lock contention in the profiler
+/// and asserts that locks/unlocks take place on the same thread
+class SharedMutex : public SharedMutexBase
+{
+public:
+	inline bool		try_lock()
+	{
+		JPH_ASSERT(mLockedThreadID != std::this_thread::get_id());
+		if (SharedMutexBase::try_lock())
+		{
+			JPH_IF_ENABLE_ASSERTS(mLockedThreadID = std::this_thread::get_id();)
+			return true;
+		}
+		return false;
+	}
+
+	inline void		lock()
+	{
+		if (!try_lock())
+		{
+			JPH_PROFILE("WLock", 0xff00ffff);
+			SharedMutexBase::lock();
+			JPH_IF_ENABLE_ASSERTS(mLockedThreadID = std::this_thread::get_id();)
+		}
+	}
+
+	inline void		unlock()
+	{
+		JPH_ASSERT(mLockedThreadID == std::this_thread::get_id());
+		JPH_IF_ENABLE_ASSERTS(mLockedThreadID = thread::id();)
+		SharedMutexBase::unlock();
+	}
+
+#ifdef JPH_ENABLE_ASSERTS
+	inline bool		is_locked()
+	{
+		return mLockedThreadID != thread::id();
+	}
+#endif // JPH_ENABLE_ASSERTS
+
+	inline void		lock_shared()
+	{
+		if (!try_lock_shared())
+		{
+			JPH_PROFILE("RLock", 0xff00ffff);
+			SharedMutexBase::lock_shared();
+		}
+	}
+
+private:
+	JPH_IF_ENABLE_ASSERTS(thread::id mLockedThreadID;)
+};
+
+#else
+
+using Mutex = MutexBase;
+using SharedMutex = SharedMutexBase;
+
+#endif
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/MutexArray.h
+++ b/thirdparty/jolt_physics/Jolt/Core/MutexArray.h
@@ -0,0 +1,98 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// A mutex array protects a number of resources with a limited amount of mutexes.
+/// It uses hashing to find the mutex of a particular object.
+/// The idea is that if the amount of threads is much smaller than the amount of mutexes
+/// that there is a relatively small chance that two different objects map to the same mutex.
+template <class MutexType>
+class MutexArray : public NonCopyable
+{
+public:
+	/// Constructor, constructs an empty mutex array that you need to initialize with Init()
+							MutexArray() = default;
+
+	/// Constructor, constructs an array with inNumMutexes entries
+	explicit				MutexArray(uint inNumMutexes) { Init(inNumMutexes); }
+
+	/// Destructor
+							~MutexArray() { delete [] mMutexStorage; }
+
+	/// Initialization
+	/// @param inNumMutexes The amount of mutexes to allocate
+	void					Init(uint inNumMutexes)
+	{
+		JPH_ASSERT(mMutexStorage == nullptr);
+		JPH_ASSERT(inNumMutexes > 0 && IsPowerOf2(inNumMutexes));
+
+		mMutexStorage = new MutexStorage[inNumMutexes];
+		mNumMutexes = inNumMutexes;
+	}
+
+	/// Get the number of mutexes that were allocated
+	inline uint				GetNumMutexes() const
+	{
+		return mNumMutexes;
+	}
+
+	/// Convert an object index to a mutex index
+	inline uint32			GetMutexIndex(uint32 inObjectIndex) const
+	{
+		Hash<uint32> hasher;
+		return hasher(inObjectIndex) & (mNumMutexes - 1);
+	}
+
+	/// Get the mutex belonging to a certain object by index
+	inline MutexType &		GetMutexByObjectIndex(uint32 inObjectIndex)
+	{
+		return mMutexStorage[GetMutexIndex(inObjectIndex)].mMutex;
+	}
+
+	/// Get a mutex by index in the array
+	inline MutexType &		GetMutexByIndex(uint32 inMutexIndex)
+	{
+		return mMutexStorage[inMutexIndex].mMutex;
+	}
+
+	/// Lock all mutexes
+	void					LockAll()
+	{
+		JPH_PROFILE_FUNCTION();
+
+		MutexStorage *end = mMutexStorage + mNumMutexes;
+		for (MutexStorage *m = mMutexStorage; m < end; ++m)
+			m->mMutex.lock();
+	}
+
+	/// Unlock all mutexes
+	void					UnlockAll()
+	{
+		JPH_PROFILE_FUNCTION();
+
+		MutexStorage *end = mMutexStorage + mNumMutexes;
+		for (MutexStorage *m = mMutexStorage; m < end; ++m)
+			m->mMutex.unlock();
+	}
+
+private:
+	/// Align the mutex to a cache line to ensure there is no false sharing (this is platform dependent, we do this to be safe)
+	struct alignas(JPH_CACHE_LINE_SIZE) MutexStorage
+	{
+		JPH_OVERRIDE_NEW_DELETE
+
+		MutexType			mMutex;
+	};
+
+	MutexStorage *			mMutexStorage = nullptr;
+	uint					mNumMutexes = 0;
+};
+
+JPH_NAMESPACE_END
+
--- a/thirdparty/jolt_physics/Jolt/Core/NonCopyable.h
+++ b/thirdparty/jolt_physics/Jolt/Core/NonCopyable.h
@@ -0,0 +1,18 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Class that makes another class non-copyable. Usage: Inherit from NonCopyable.
+class JPH_EXPORT NonCopyable
+{
+public:
+			NonCopyable() = default;
+			NonCopyable(const NonCopyable &) = delete;
+	void	operator = (const NonCopyable &) = delete;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Profiler.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/Profiler.cpp
@@ -0,0 +1,677 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/Profiler.h>
+#include <Jolt/Core/Color.h>
+#include <Jolt/Core/StringTools.h>
+#include <Jolt/Core/QuickSort.h>
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <fstream>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+#if defined(JPH_EXTERNAL_PROFILE) && defined(JPH_SHARED_LIBRARY)
+
+ProfileStartMeasurementFunction ProfileStartMeasurement = [](const char *, uint32, uint8 *) { };
+ProfileEndMeasurementFunction ProfileEndMeasurement = [](uint8 *) { };
+
+#elif defined(JPH_PROFILE_ENABLED)
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Profiler
+//////////////////////////////////////////////////////////////////////////////////////////
+
+Profiler *Profiler::sInstance = nullptr;
+
+#ifdef JPH_SHARED_LIBRARY
+	static thread_local ProfileThread *sInstance = nullptr;
+
+	ProfileThread *ProfileThread::sGetInstance()
+	{
+		return sInstance;
+	}
+
+	void ProfileThread::sSetInstance(ProfileThread *inInstance)
+	{
+		sInstance = inInstance;
+	}
+#else
+	thread_local ProfileThread *ProfileThread::sInstance = nullptr;
+#endif
+
+bool ProfileMeasurement::sOutOfSamplesReported = false;
+
+void Profiler::UpdateReferenceTime()
+{
+	mReferenceTick = GetProcessorTickCount();
+	mReferenceTime = std::chrono::high_resolution_clock::now();
+}
+
+uint64 Profiler::GetProcessorTicksPerSecond() const
+{
+	uint64 ticks = GetProcessorTickCount();
+	std::chrono::high_resolution_clock::time_point time = std::chrono::high_resolution_clock::now();
+
+	return (ticks - mReferenceTick) * 1000000000ULL / std::chrono::duration_cast<std::chrono::nanoseconds>(time - mReferenceTime).count();
+}
+
+// This function assumes that none of the threads are active while we're dumping the profile,
+// otherwise there will be a race condition on mCurrentSample and the profile data.
+JPH_TSAN_NO_SANITIZE
+void Profiler::NextFrame()
+{
+	std::lock_guard lock(mLock);
+
+	if (mDump)
+	{
+		DumpInternal();
+		mDump = false;
+	}
+
+	for (ProfileThread *t : mThreads)
+		t->mCurrentSample = 0;
+
+	UpdateReferenceTime();
+}
+
+void Profiler::Dump(const string_view &inTag)
+{
+	mDump = true;
+	mDumpTag = inTag;
+}
+
+void Profiler::AddThread(ProfileThread *inThread)
+{
+	std::lock_guard lock(mLock);
+
+	mThreads.push_back(inThread);
+}
+
+void Profiler::RemoveThread(ProfileThread *inThread)
+{
+	std::lock_guard lock(mLock);
+
+	Array<ProfileThread *>::iterator i = std::find(mThreads.begin(), mThreads.end(), inThread);
+	JPH_ASSERT(i != mThreads.end());
+	mThreads.erase(i);
+}
+
+void Profiler::sAggregate(int inDepth, uint32 inColor, ProfileSample *&ioSample, const ProfileSample *inEnd, Aggregators &ioAggregators, KeyToAggregator &ioKeyToAggregator)
+{
+	// Store depth
+	ioSample->mDepth = uint8(min(255, inDepth));
+
+	// Update color
+	if (ioSample->mColor == 0)
+		ioSample->mColor = inColor;
+	else
+		inColor = ioSample->mColor;
+
+	// Start accumulating totals
+	uint64 cycles_this_with_children = ioSample->mEndCycle - ioSample->mStartCycle;
+
+	// Loop over following samples until we find a sample that starts on or after our end
+	ProfileSample *sample;
+	for (sample = ioSample + 1; sample < inEnd && sample->mStartCycle < ioSample->mEndCycle; ++sample)
+	{
+		JPH_ASSERT(sample[-1].mStartCycle <= sample->mStartCycle);
+		JPH_ASSERT(sample->mStartCycle >= ioSample->mStartCycle);
+		JPH_ASSERT(sample->mEndCycle <= ioSample->mEndCycle);
+
+		// Recurse and skip over the children of this child
+		sAggregate(inDepth + 1, inColor, sample, inEnd, ioAggregators, ioKeyToAggregator);
+	}
+
+	// Find the aggregator for this name / filename pair
+	Aggregator *aggregator;
+	KeyToAggregator::iterator aggregator_idx = ioKeyToAggregator.find(ioSample->mName);
+	if (aggregator_idx == ioKeyToAggregator.end())
+	{
+		// Not found, add to map and insert in array
+		ioKeyToAggregator.try_emplace(ioSample->mName, ioAggregators.size());
+		ioAggregators.emplace_back(ioSample->mName);
+		aggregator = &ioAggregators.back();
+	}
+	else
+	{
+		// Found
+		aggregator = &ioAggregators[aggregator_idx->second];
+	}
+
+	// Add the measurement to the aggregator
+	aggregator->AccumulateMeasurement(cycles_this_with_children);
+
+	// Update ioSample to the last child of ioSample
+	JPH_ASSERT(sample[-1].mStartCycle <= ioSample->mEndCycle);
+	JPH_ASSERT(sample >= inEnd || sample->mStartCycle >= ioSample->mEndCycle);
+	ioSample = sample - 1;
+}
+
+void Profiler::DumpInternal()
+{
+	// Freeze data from threads
+	// Note that this is not completely thread safe: As a profile sample is added mCurrentSample is incremented
+	// but the data is not written until the sample finishes. So if we dump the profile information while
+	// some other thread is running, we may get some garbage information from the previous frame
+	Threads threads;
+	for (ProfileThread *t : mThreads)
+		threads.push_back({ t->mThreadName, t->mSamples, t->mSamples + t->mCurrentSample });
+
+	// Shift all samples so that the first sample is at zero
+	uint64 min_cycle = 0xffffffffffffffffUL;
+	for (const ThreadSamples &t : threads)
+		if (t.mSamplesBegin < t.mSamplesEnd)
+			min_cycle = min(min_cycle, t.mSamplesBegin[0].mStartCycle);
+	for (const ThreadSamples &t : threads)
+		for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
+		{
+			s->mStartCycle -= min_cycle;
+			s->mEndCycle -= min_cycle;
+		}
+
+	// Determine tag of this profile
+	String tag;
+	if (mDumpTag.empty())
+	{
+		// Next sequence number
+		static int number = 0;
+		++number;
+		tag = ConvertToString(number);
+	}
+	else
+	{
+		// Take provided tag
+		tag = mDumpTag;
+		mDumpTag.clear();
+	}
+
+	// Aggregate data across threads
+	Aggregators aggregators;
+	KeyToAggregator key_to_aggregators;
+	for (const ThreadSamples &t : threads)
+		for (ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
+			sAggregate(0, Color::sGetDistinctColor(0).GetUInt32(), s, end, aggregators, key_to_aggregators);
+
+	// Dump as chart
+	DumpChart(tag.c_str(), threads, key_to_aggregators, aggregators);
+}
+
+static String sHTMLEncode(const char *inString)
+{
+	String str(inString);
+	StringReplace(str, "<", "&lt;");
+	StringReplace(str, ">", "&gt;");
+	return str;
+}
+
+void Profiler::DumpChart(const char *inTag, const Threads &inThreads, const KeyToAggregator &inKeyToAggregators, const Aggregators &inAggregators)
+{
+	// Open file
+	std::ofstream f;
+	f.open(StringFormat("profile_chart_%s.html", inTag).c_str(), std::ofstream::out | std::ofstream::trunc);
+	if (!f.is_open())
+		return;
+
+	// Write header
+	f << R"(<!DOCTYPE html>
+<html>
+	<head>
+		<title>Profile Chart</title>
+		<style>
+			html, body {
+				padding: 0px;
+				border: 0px;
+				margin: 0px;
+				width: 100%;
+				height: 100%;
+				overflow: hidden;
+			}
+
+			canvas {
+				position: absolute;
+				top: 10px;
+				left: 10px;
+				padding: 0px;
+				border: 0px;
+				margin: 0px;
+			}
+
+			#tooltip {
+				font: Courier New;
+				position: absolute;
+				background-color: white;
+				border: 1px;
+				border-style: solid;
+				border-color: black;
+				pointer-events: none;
+				padding: 5px;
+				font: 14px Arial;
+				visibility: hidden;
+				height: auto;
+			}
+
+			.stat {
+				color: blue;
+				text-align: right;
+			}
+		</style>
+		<script type="text/javascript">
+			var canvas;
+			var ctx;
+			var tooltip;
+			var min_scale;
+			var scale;
+			var offset_x = 0;
+			var offset_y = 0;
+			var size_y;
+			var dragging = false;
+			var previous_x = 0;
+			var previous_y = 0;
+			var bar_height = 15;
+			var line_height = bar_height + 2;
+			var thread_separation = 6;
+			var thread_font_size = 12;
+			var thread_font = thread_font_size + "px Arial";
+			var bar_font_size = 10;
+			var bar_font = bar_font_size + "px Arial";
+			var end_cycle = 0;
+
+			function drawChart()
+			{
+				ctx.clearRect(0, 0, canvas.width, canvas.height);
+
+				ctx.lineWidth = 1;
+
+				var y = offset_y;
+
+				for (var t = 0; t < threads.length; t++)
+				{
+					// Check if thread has samples
+					var thread = threads[t];
+					if (thread.start.length == 0)
+						continue;
+
+					// Draw thread name
+					y += thread_font_size;
+					ctx.font = thread_font;
+					ctx.fillStyle = "#000000";
+					ctx.fillText(thread.thread_name, 0, y);
+					y += thread_separation;
+
+					// Draw outlines for each bar of samples
+					ctx.fillStyle = "#c0c0c0";
+					for (var d = 0; d <= thread.max_depth; d++)
+						ctx.fillRect(0, y + d * line_height, canvas.width, bar_height);
+
+					// Draw samples
+					ctx.font = bar_font;
+					for (var s = 0; s < thread.start.length; s++)
+					{
+						// Cull bar
+						var rx = scale * (offset_x + thread.start[s]);
+						if (rx > canvas.width) // right of canvas
+							break;
+						var rw = scale * thread.cycles[s];
+						if (rw < 0.5) // less than half pixel, skip
+							continue;
+						if (rx + rw < 0) // left of canvas
+							continue;
+
+						// Draw bar
+						var ry = y + line_height * thread.depth[s];
+						ctx.fillStyle = thread.color[s];
+						ctx.fillRect(rx, ry, rw, bar_height);
+						ctx.strokeStyle = thread.darkened_color[s];
+						ctx.strokeRect(rx, ry, rw, bar_height);
+
+						// Get index in aggregated list
+						var a = thread.aggregator[s];
+
+						// Draw text
+						if (rw > aggregated.name_width[a])
+						{
+							ctx.fillStyle = "#000000";
+							ctx.fillText(aggregated.name[a], rx + (rw - aggregated.name_width[a]) / 2, ry + bar_height - 4);
+						}
+					}
+
+					// Next line
+					y += line_height * (1 + thread.max_depth) + thread_separation;
+				}
+
+				// Update size
+				size_y = y - offset_y;
+			}
+
+			function drawTooltip(mouse_x, mouse_y)
+			{
+				var y = offset_y;
+
+				for (var t = 0; t < threads.length; t++)
+				{
+					// Check if thread has samples
+					var thread = threads[t];
+					if (thread.start.length == 0)
+						continue;
+
+					// Thead name
+					y += thread_font_size + thread_separation;
+
+					// Draw samples
+					for (var s = 0; s < thread.start.length; s++)
+					{
+						// Cull bar
+						var rx = scale * (offset_x + thread.start[s]);
+						if (rx > mouse_x)
+							break;
+						var rw = scale * thread.cycles[s];
+						if (rx + rw < mouse_x)
+							continue;
+
+						var ry = y + line_height * thread.depth[s];
+						if (mouse_y >= ry && mouse_y < ry + bar_height)
+						{
+							// Get index into aggregated list
+							var a = thread.aggregator[s];
+
+							// Found bar, fill in tooltip
+							tooltip.style.left = (canvas.offsetLeft + mouse_x) + "px";
+							tooltip.style.top = (canvas.offsetTop + mouse_y) + "px";
+							tooltip.style.visibility = "visible";
+							tooltip.innerHTML = aggregated.name[a] + "<br>"
+								+ "<table>"
+								+ "<tr><td>Time:</td><td class=\"stat\">" + (1000000 * thread.cycles[s] / cycles_per_second).toFixed(2) + " &micro;s</td></tr>"
+								+ "<tr><td>Start:</td><td class=\"stat\">" + (1000000 * thread.start[s] / cycles_per_second).toFixed(2) + " &micro;s</td></tr>"
+								+ "<tr><td>End:</td><td class=\"stat\">" + (1000000 * (thread.start[s] + thread.cycles[s]) / cycles_per_second).toFixed(2) + " &micro;s</td></tr>"
+								+ "<tr><td>Avg. Time:</td><td class=\"stat\">" + (1000000 * aggregated.cycles_per_frame[a] / cycles_per_second / aggregated.calls[a]).toFixed(2) + " &micro;s</td></tr>"
+								+ "<tr><td>Min Time:</td><td class=\"stat\">" + (1000000 * aggregated.min_cycles[a] / cycles_per_second).toFixed(2) + " &micro;s</td></tr>"
+								+ "<tr><td>Max Time:</td><td class=\"stat\">" + (1000000 * aggregated.max_cycles[a] / cycles_per_second).toFixed(2) + " &micro;s</td></tr>"
+								+ "<tr><td>Time / Frame:</td><td class=\"stat\">" + (1000000 * aggregated.cycles_per_frame[a] / cycles_per_second).toFixed(2) + " &micro;s</td></tr>"
+								+ "<tr><td>Calls:</td><td class=\"stat\">" + aggregated.calls[a] + "</td></tr>"
+								+ "</table>";
+							return;
+						}
+					}
+
+					// Next line
+					y += line_height * (1 + thread.max_depth) + thread_separation;
+				}
+
+				// No bar found, hide tooltip
+				tooltip.style.visibility = "hidden";
+			}
+
+			function onMouseDown(evt)
+			{
+				dragging = true;
+				previous_x = evt.clientX, previous_y = evt.clientY;
+				tooltip.style.visibility = "hidden";
+			}
+
+			function onMouseUp(evt)
+			{
+				dragging = false;
+			}
+
+			function clampMotion()
+			{
+				// Clamp horizontally
+				var min_offset_x = canvas.width / scale - end_cycle;
+				if (offset_x < min_offset_x)
+					offset_x = min_offset_x;
+				if (offset_x > 0)
+					offset_x = 0;
+
+				// Clamp vertically
+				var min_offset_y = canvas.height - size_y;
+				if (offset_y < min_offset_y)
+					offset_y = min_offset_y;
+				if (offset_y > 0)
+					offset_y = 0;
+
+				// Clamp scale
+				if (scale < min_scale)
+					scale = min_scale;
+				var max_scale = 1000 * min_scale;
+				if (scale > max_scale)
+					scale = max_scale;
+			}
+
+			function onMouseMove(evt)
+			{
+				if (dragging)
+				{
+					// Calculate new offset
+					offset_x += (evt.clientX - previous_x) / scale;
+					offset_y += evt.clientY - previous_y;
+
+					clampMotion();
+
+					drawChart();
+				}
+				else
+					drawTooltip(evt.clientX - canvas.offsetLeft, evt.clientY - canvas.offsetTop);
+
+				previous_x = evt.clientX, previous_y = evt.clientY;
+			}
+
+			function onScroll(evt)
+			{
+				tooltip.style.visibility = "hidden";
+
+				var old_scale = scale;
+				if (evt.deltaY > 0)
+					scale /= 1.1;
+				else
+					scale *= 1.1;
+
+				clampMotion();
+
+				// Ensure that event under mouse stays under mouse
+				var x = previous_x - canvas.offsetLeft;
+				offset_x += x / scale - x / old_scale;
+
+				clampMotion();
+
+				drawChart();
+			}
+
+			function darkenColor(color)
+			{
+				var i = parseInt(color.slice(1), 16);
+
+				var r = i >> 16;
+				var g = (i >> 8) & 0xff;
+				var b = i & 0xff;
+
+				r = Math.round(0.8 * r);
+				g = Math.round(0.8 * g);
+				b = Math.round(0.8 * b);
+
+				i = (r << 16) + (g << 8) + b;
+
+				return "#" + i.toString(16);
+			}
+
+			function startChart()
+			{
+				// Fetch elements
+				canvas = document.getElementById('canvas');
+				ctx = canvas.getContext("2d");
+				tooltip = document.getElementById('tooltip');
+
+				// Resize canvas to fill screen
+				canvas.width = document.body.offsetWidth - 20;
+				canvas.height = document.body.offsetHeight - 20;
+
+				// Register mouse handlers
+				canvas.onmousedown = onMouseDown;
+				canvas.onmouseup = onMouseUp;
+				canvas.onmouseout = onMouseUp;
+				canvas.onmousemove = onMouseMove;
+				canvas.onwheel	= onScroll;
+
+				for (var t = 0; t < threads.length; t++)
+				{
+					var thread = threads[t];
+
+					// Calculate darkened colors
+					thread.darkened_color = new Array(thread.color.length);
+					for (var s = 0; s < thread.color.length; s++)
+						thread.darkened_color[s] = darkenColor(thread.color[s]);
+
+					// Calculate max depth and end cycle
+					thread.max_depth = 0;
+					for (var s = 0; s < thread.start.length; s++)
+					{
+						thread.max_depth = Math.max(thread.max_depth, thread.depth[s]);
+						end_cycle = Math.max(end_cycle, thread.start[s] + thread.cycles[s]);
+					}
+				}
+
+				// Calculate width of name strings
+				ctx.font = bar_font;
+				aggregated.name_width = new Array(aggregated.name.length);
+				for (var a = 0; a < aggregated.name.length; a++)
+					aggregated.name_width[a] = ctx.measureText(aggregated.name[a]).width;
+
+				// Store scale properties
+				min_scale = canvas.width / end_cycle;
+				scale = min_scale;
+
+				drawChart();
+			}
+		</script>
+	</head>
+	<body onload="startChart();">
+	<script type="text/javascript">
+)";
+
+	// Get cycles per second
+	uint64 cycles_per_second = GetProcessorTicksPerSecond();
+	f << "var cycles_per_second = " << cycles_per_second << ";\n";
+
+	// Dump samples
+	f << "var threads = [\n";
+	bool first_thread = true;
+	for (const ThreadSamples &t : inThreads)
+	{
+		if (!first_thread)
+			f << ",\n";
+		first_thread = false;
+
+		f << "{\nthread_name: \"" << t.mThreadName << "\",\naggregator: [";
+		bool first = true;
+		for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
+		{
+			if (!first)
+				f << ",";
+			first = false;
+			f << inKeyToAggregators.find(s->mName)->second;
+		}
+		f << "],\ncolor: [";
+		first = true;
+		for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
+		{
+			if (!first)
+				f << ",";
+			first = false;
+			Color c(s->mColor);
+			f << StringFormat("\"#%02x%02x%02x\"", c.r, c.g, c.b);
+		}
+		f << "],\nstart: [";
+		first = true;
+		for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
+		{
+			if (!first)
+				f << ",";
+			first = false;
+			f << s->mStartCycle;
+		}
+		f << "],\ncycles: [";
+		first = true;
+		for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
+		{
+			if (!first)
+				f << ",";
+			first = false;
+			f << s->mEndCycle - s->mStartCycle;
+		}
+		f << "],\ndepth: [";
+		first = true;
+		for (const ProfileSample *s = t.mSamplesBegin, *end = t.mSamplesEnd; s < end; ++s)
+		{
+			if (!first)
+				f << ",";
+			first = false;
+			f << int(s->mDepth);
+		}
+		f << "]\n}";
+	}
+
+	// Dump aggregated data
+	f << "];\nvar aggregated = {\nname: [";
+	bool first = true;
+	for (const Aggregator &a : inAggregators)
+	{
+		if (!first)
+			f << ",";
+		first = false;
+		String name = "\"" + sHTMLEncode(a.mName) + "\"";
+		f << name;
+	}
+	f << "],\ncalls: [";
+	first = true;
+	for (const Aggregator &a : inAggregators)
+	{
+		if (!first)
+			f << ",";
+		first = false;
+		f << a.mCallCounter;
+	}
+	f << "],\nmin_cycles: [";
+	first = true;
+	for (const Aggregator &a : inAggregators)
+	{
+		if (!first)
+			f << ",";
+		first = false;
+		f << a.mMinCyclesInCallWithChildren;
+	}
+	f << "],\nmax_cycles: [";
+	first = true;
+	for (const Aggregator &a : inAggregators)
+	{
+		if (!first)
+			f << ",";
+		first = false;
+		f << a.mMaxCyclesInCallWithChildren;
+	}
+	f << "],\ncycles_per_frame: [";
+	first = true;
+	for (const Aggregator &a : inAggregators)
+	{
+		if (!first)
+			f << ",";
+		first = false;
+		f << a.mTotalCyclesInCallWithChildren;
+	}
+
+	// Write footer
+	f << R"(]};
+</script>
+
+<canvas id="canvas"></canvas>
+<div id="tooltip"></div>
+
+</tbody></table></body></html>)";
+}
+
+#endif // JPH_PROFILE_ENABLED
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Profiler.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Profiler.h
@@ -0,0 +1,301 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <mutex>
+#include <chrono>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+#include <Jolt/Core/NonCopyable.h>
+#include <Jolt/Core/TickCounter.h>
+#include <Jolt/Core/UnorderedMap.h>
+
+#if defined(JPH_EXTERNAL_PROFILE)
+
+JPH_NAMESPACE_BEGIN
+
+#ifdef JPH_SHARED_LIBRARY
+/// Functions called when a profiler measurement starts or stops, need to be overridden by the user.
+using ProfileStartMeasurementFunction = void (*)(const char *inName, uint32 inColor, uint8 *ioUserData);
+using ProfileEndMeasurementFunction = void (*)(uint8 *ioUserData);
+
+JPH_EXPORT extern ProfileStartMeasurementFunction ProfileStartMeasurement;
+JPH_EXPORT extern ProfileEndMeasurementFunction ProfileEndMeasurement;
+#endif // JPH_SHARED_LIBRARY
+
+/// Create this class on the stack to start sampling timing information of a particular scope.
+///
+/// For statically linked builds, this is left unimplemented intentionally. Needs to be implemented by the user of the library.
+/// On construction a measurement should start, on destruction it should be stopped.
+/// For dynamically linked builds, the user should override the ProfileStartMeasurement and ProfileEndMeasurement functions.
+class alignas(16) ExternalProfileMeasurement : public NonCopyable
+{
+public:
+	/// Constructor
+#ifdef JPH_SHARED_LIBRARY
+	JPH_INLINE						ExternalProfileMeasurement(const char *inName, uint32 inColor = 0) { ProfileStartMeasurement(inName, inColor, mUserData); }
+	JPH_INLINE						~ExternalProfileMeasurement() { ProfileEndMeasurement(mUserData); }
+#else
+									ExternalProfileMeasurement(const char *inName, uint32 inColor = 0);
+									~ExternalProfileMeasurement();
+#endif
+
+private:
+	uint8							mUserData[64];
+};
+
+JPH_NAMESPACE_END
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Macros to do the actual profiling
+//////////////////////////////////////////////////////////////////////////////////////////
+
+JPH_SUPPRESS_WARNING_PUSH
+JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic")
+
+// Dummy implementations
+#define JPH_PROFILE_START(name)
+#define JPH_PROFILE_END()
+#define JPH_PROFILE_THREAD_START(name)
+#define JPH_PROFILE_THREAD_END()
+#define JPH_PROFILE_NEXTFRAME()
+#define JPH_PROFILE_DUMP(...)
+
+// Scope profiling measurement
+#define JPH_PROFILE_TAG2(line)		profile##line
+#define JPH_PROFILE_TAG(line)		JPH_PROFILE_TAG2(line)
+
+/// Macro to collect profiling information.
+///
+/// Usage:
+///
+///		{
+///			JPH_PROFILE("Operation");
+///			do operation;
+///		}
+///
+#define JPH_PROFILE(...)			ExternalProfileMeasurement JPH_PROFILE_TAG(__LINE__)(__VA_ARGS__)
+
+// Scope profiling for function
+#define JPH_PROFILE_FUNCTION()		JPH_PROFILE(JPH_FUNCTION_NAME)
+
+JPH_SUPPRESS_WARNING_POP
+
+#elif defined(JPH_PROFILE_ENABLED)
+
+JPH_NAMESPACE_BEGIN
+
+class ProfileSample;
+class ProfileThread;
+
+/// Singleton class for managing profiling information
+class JPH_EXPORT Profiler : public NonCopyable
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+								Profiler()															{ UpdateReferenceTime(); }
+
+	/// Increments the frame counter to provide statistics per frame
+	void						NextFrame();
+
+	/// Dump profiling statistics at the start of the next frame
+	/// @param inTag If not empty, this overrides the auto incrementing number in the filename of the dump file
+	void						Dump(const string_view &inTag = string_view());
+
+	/// Add a thread to be instrumented
+	void						AddThread(ProfileThread *inThread);
+
+	/// Remove a thread from being instrumented
+	void						RemoveThread(ProfileThread *inThread);
+
+	/// Singleton instance
+	static Profiler *			sInstance;
+
+private:
+	/// Helper class to freeze ProfileSamples per thread while processing them
+	struct ThreadSamples
+	{
+		String					mThreadName;
+		ProfileSample *			mSamplesBegin;
+		ProfileSample *			mSamplesEnd;
+	};
+
+	/// Helper class to aggregate ProfileSamples
+	class Aggregator
+	{
+	public:
+		/// Constructor
+								Aggregator(const char *inName)										: mName(inName) { }
+
+		/// Accumulate results for a measurement
+		void					AccumulateMeasurement(uint64 inCyclesInCallWithChildren)
+		{
+			mCallCounter++;
+			mTotalCyclesInCallWithChildren += inCyclesInCallWithChildren;
+			mMinCyclesInCallWithChildren = min(inCyclesInCallWithChildren, mMinCyclesInCallWithChildren);
+			mMaxCyclesInCallWithChildren = max(inCyclesInCallWithChildren, mMaxCyclesInCallWithChildren);
+		}
+
+		/// Sort descending by total cycles
+		bool					operator < (const Aggregator &inRHS) const
+		{
+			return mTotalCyclesInCallWithChildren > inRHS.mTotalCyclesInCallWithChildren;
+		}
+
+		/// Identification
+		const char *			mName;																///< User defined name of this item
+
+		/// Statistics
+		uint32					mCallCounter = 0;													///< Number of times AccumulateMeasurement was called
+		uint64					mTotalCyclesInCallWithChildren = 0;									///< Total amount of cycles spent in this scope
+		uint64					mMinCyclesInCallWithChildren = 0xffffffffffffffffUL;				///< Minimum amount of cycles spent per call
+		uint64					mMaxCyclesInCallWithChildren = 0;									///< Maximum amount of cycles spent per call
+	};
+
+	using Threads = Array<ThreadSamples>;
+	using Aggregators = Array<Aggregator>;
+	using KeyToAggregator = UnorderedMap<const char *, size_t>;
+
+	/// Helper function to aggregate profile sample data
+	static void					sAggregate(int inDepth, uint32 inColor, ProfileSample *&ioSample, const ProfileSample *inEnd, Aggregators &ioAggregators, KeyToAggregator &ioKeyToAggregator);
+
+	/// We measure the amount of ticks per second, this function resets the reference time point
+	void						UpdateReferenceTime();
+
+	/// Get the amount of ticks per second, note that this number will never be fully accurate as the amount of ticks per second may vary with CPU load, so this number is only to be used to give an indication of time for profiling purposes
+	uint64						GetProcessorTicksPerSecond() const;
+
+	/// Dump profiling statistics
+	void						DumpInternal();
+	void						DumpChart(const char *inTag, const Threads &inThreads, const KeyToAggregator &inKeyToAggregators, const Aggregators &inAggregators);
+
+	std::mutex					mLock;																///< Lock that protects mThreads
+	uint64						mReferenceTick;														///< Tick count at the start of the frame
+	std::chrono::high_resolution_clock::time_point mReferenceTime;									///< Time at the start of the frame
+	Array<ProfileThread *>		mThreads;															///< List of all active threads
+	bool						mDump = false;														///< When true, the samples are dumped next frame
+	String						mDumpTag;															///< When not empty, this overrides the auto incrementing number of the dump filename
+};
+
+// Class that contains the information of a single scoped measurement
+class alignas(16) JPH_EXPORT_GCC_BUG_WORKAROUND ProfileSample : public NonCopyable
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	const char *				mName;																///< User defined name of this item
+	uint32						mColor;																///< Color to use for this sample
+	uint8						mDepth;																///< Calculated depth
+	uint8						mUnused[3];
+	uint64						mStartCycle;														///< Cycle counter at start of measurement
+	uint64						mEndCycle;															///< Cycle counter at end of measurement
+};
+
+/// Collects all samples of a single thread
+class ProfileThread : public NonCopyable
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+	inline						ProfileThread(const string_view &inThreadName);
+	inline						~ProfileThread();
+
+	static const uint cMaxSamples = 65536;
+
+	String						mThreadName;														///< Name of the thread that we're collecting information for
+	ProfileSample				mSamples[cMaxSamples];												///< Buffer of samples
+	uint						mCurrentSample = 0;													///< Next position to write a sample to
+
+#ifdef JPH_SHARED_LIBRARY
+	JPH_EXPORT static void		sSetInstance(ProfileThread *inInstance);
+	JPH_EXPORT static ProfileThread *sGetInstance();
+#else
+	static inline void			sSetInstance(ProfileThread *inInstance)								{ sInstance = inInstance; }
+	static inline ProfileThread *sGetInstance()														{ return sInstance; }
+
+private:
+	static thread_local ProfileThread *sInstance;
+#endif
+};
+
+/// Create this class on the stack to start sampling timing information of a particular scope
+class JPH_EXPORT ProfileMeasurement : public NonCopyable
+{
+public:
+	/// Constructor
+	inline						ProfileMeasurement(const char *inName, uint32 inColor = 0);
+	inline						~ProfileMeasurement();
+
+private:
+	ProfileSample *				mSample;
+	ProfileSample				mTemp;
+
+	static bool					sOutOfSamplesReported;
+};
+
+JPH_NAMESPACE_END
+
+#include "Profiler.inl"
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Macros to do the actual profiling
+//////////////////////////////////////////////////////////////////////////////////////////
+
+JPH_SUPPRESS_WARNING_PUSH
+JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic")
+
+/// Start instrumenting program
+#define JPH_PROFILE_START(name)			do { Profiler::sInstance = new Profiler; JPH_PROFILE_THREAD_START(name); } while (false)
+
+/// End instrumenting program
+#define JPH_PROFILE_END()				do { JPH_PROFILE_THREAD_END(); delete Profiler::sInstance; Profiler::sInstance = nullptr; } while (false)
+
+/// Start instrumenting a thread
+#define JPH_PROFILE_THREAD_START(name)	do { if (Profiler::sInstance) ProfileThread::sSetInstance(new ProfileThread(name)); } while (false)
+
+/// End instrumenting a thread
+#define JPH_PROFILE_THREAD_END()		do { delete ProfileThread::sGetInstance(); ProfileThread::sSetInstance(nullptr); } while (false)
+
+/// Scope profiling measurement
+#define JPH_PROFILE_TAG2(line)			profile##line
+#define JPH_PROFILE_TAG(line)			JPH_PROFILE_TAG2(line)
+#define JPH_PROFILE(...)				ProfileMeasurement JPH_PROFILE_TAG(__LINE__)(__VA_ARGS__)
+
+/// Scope profiling for function
+#define JPH_PROFILE_FUNCTION()			JPH_PROFILE(JPH_FUNCTION_NAME)
+
+/// Update frame counter
+#define JPH_PROFILE_NEXTFRAME()			Profiler::sInstance->NextFrame()
+
+/// Dump profiling info
+#define JPH_PROFILE_DUMP(...)			Profiler::sInstance->Dump(__VA_ARGS__)
+
+JPH_SUPPRESS_WARNING_POP
+
+#else
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Dummy profiling instructions
+//////////////////////////////////////////////////////////////////////////////////////////
+
+JPH_SUPPRESS_WARNING_PUSH
+JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat-pedantic")
+
+#define JPH_PROFILE_START(name)
+#define JPH_PROFILE_END()
+#define JPH_PROFILE_THREAD_START(name)
+#define JPH_PROFILE_THREAD_END()
+#define JPH_PROFILE(...)
+#define JPH_PROFILE_FUNCTION()
+#define JPH_PROFILE_NEXTFRAME()
+#define JPH_PROFILE_DUMP(...)
+
+JPH_SUPPRESS_WARNING_POP
+
+#endif
--- a/thirdparty/jolt_physics/Jolt/Core/Profiler.inl
+++ b/thirdparty/jolt_physics/Jolt/Core/Profiler.inl
@@ -0,0 +1,90 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+JPH_NAMESPACE_BEGIN
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// ProfileThread
+//////////////////////////////////////////////////////////////////////////////////////////
+
+ProfileThread::ProfileThread(const string_view &inThreadName) :
+	mThreadName(inThreadName)
+{
+	Profiler::sInstance->AddThread(this);
+}
+
+ProfileThread::~ProfileThread()
+{
+	Profiler::sInstance->RemoveThread(this);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// ProfileMeasurement
+//////////////////////////////////////////////////////////////////////////////////////////
+
+JPH_TSAN_NO_SANITIZE // TSAN reports a race on sOutOfSamplesReported, however the worst case is that we report the out of samples message multiple times
+ProfileMeasurement::ProfileMeasurement(const char *inName, uint32 inColor)
+{
+	ProfileThread *current_thread = ProfileThread::sGetInstance();
+	if (current_thread == nullptr)
+	{
+		// Thread not instrumented
+		mSample = nullptr;
+	}
+	else if (current_thread->mCurrentSample < ProfileThread::cMaxSamples)
+	{
+		// Get pointer to write data to
+		mSample = &current_thread->mSamples[current_thread->mCurrentSample++];
+
+		// Start constructing sample (will end up on stack)
+		mTemp.mName = inName;
+		mTemp.mColor = inColor;
+
+		// Collect start sample last
+		mTemp.mStartCycle = GetProcessorTickCount();
+	}
+	else
+	{
+		// Out of samples
+		if (!sOutOfSamplesReported)
+		{
+			sOutOfSamplesReported = true;
+			Trace("ProfileMeasurement: Too many samples, some data will be lost!");
+		}
+		mSample = nullptr;
+	}
+}
+
+ProfileMeasurement::~ProfileMeasurement()
+{
+	if (mSample != nullptr)
+	{
+		// Finalize sample
+		mTemp.mEndCycle = GetProcessorTickCount();
+
+		// Write it to the memory buffer bypassing the cache
+		static_assert(sizeof(ProfileSample) == 32, "Assume 32 bytes");
+		static_assert(alignof(ProfileSample) == 16, "Assume 16 byte alignment");
+	#if defined(JPH_USE_SSE)
+		const __m128i *src = reinterpret_cast<const __m128i *>(&mTemp);
+		__m128i *dst = reinterpret_cast<__m128i *>(mSample);
+		__m128i val = _mm_loadu_si128(src);
+		_mm_stream_si128(dst, val);
+		val = _mm_loadu_si128(src + 1);
+		_mm_stream_si128(dst + 1, val);
+	#elif defined(JPH_USE_NEON)
+		const int *src = reinterpret_cast<const int *>(&mTemp);
+		int *dst = reinterpret_cast<int *>(mSample);
+		int32x4_t val = vld1q_s32(src);
+		vst1q_s32(dst, val);
+		val = vld1q_s32(src + 4);
+		vst1q_s32(dst + 4, val);
+	#else
+		memcpy(mSample, &mTemp, sizeof(ProfileSample));
+	#endif
+		mSample = nullptr;
+	}
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/QuickSort.h
+++ b/thirdparty/jolt_physics/Jolt/Core/QuickSort.h
@@ -0,0 +1,137 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2022 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/InsertionSort.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Helper function for QuickSort, will move the pivot element to inMiddle.
+template <typename Iterator, typename Compare>
+inline void QuickSortMedianOfThree(Iterator inFirst, Iterator inMiddle, Iterator inLast, Compare inCompare)
+{
+	// This should be guaranteed because we switch over to insertion sort when there's 32 or less elements
+	JPH_ASSERT(inFirst != inMiddle && inMiddle != inLast);
+
+	if (inCompare(*inMiddle, *inFirst))
+		std::swap(*inFirst, *inMiddle);
+
+	if (inCompare(*inLast, *inFirst))
+		std::swap(*inFirst, *inLast);
+
+	if (inCompare(*inLast, *inMiddle))
+		std::swap(*inMiddle, *inLast);
+}
+
+/// Helper function for QuickSort using the Ninther method, will move the pivot element to inMiddle.
+template <typename Iterator, typename Compare>
+inline void QuickSortNinther(Iterator inFirst, Iterator inMiddle, Iterator inLast, Compare inCompare)
+{
+	// Divide the range in 8 equal parts (this means there are 9 points)
+	auto diff = (inLast - inFirst) >> 3;
+	auto two_diff = diff << 1;
+
+	// Median of first 3 points
+	Iterator mid1 = inFirst + diff;
+	QuickSortMedianOfThree(inFirst, mid1, inFirst + two_diff, inCompare);
+
+	// Median of second 3 points
+	QuickSortMedianOfThree(inMiddle - diff, inMiddle, inMiddle + diff, inCompare);
+
+	// Median of third 3 points
+	Iterator mid3 = inLast - diff;
+	QuickSortMedianOfThree(inLast - two_diff, mid3, inLast, inCompare);
+
+	// Determine the median of the 3 medians
+	QuickSortMedianOfThree(mid1, inMiddle, mid3, inCompare);
+}
+
+/// Implementation of the quick sort algorithm. The STL version implementation is not consistent across platforms.
+template <typename Iterator, typename Compare>
+inline void QuickSort(Iterator inBegin, Iterator inEnd, Compare inCompare)
+{
+	// Implementation based on https://en.wikipedia.org/wiki/Quicksort using Hoare's partition scheme
+
+	// Loop so that we only need to do 1 recursive call instead of 2.
+	for (;;)
+	{
+		// If there's less than 2 elements we're done
+		auto num_elements = inEnd - inBegin;
+		if (num_elements < 2)
+			return;
+
+		// Fall back to insertion sort if there are too few elements
+		if (num_elements <= 32)
+		{
+			InsertionSort(inBegin, inEnd, inCompare);
+			return;
+		}
+
+		// Determine pivot
+		Iterator pivot_iterator = inBegin + ((num_elements - 1) >> 1);
+		QuickSortNinther(inBegin, pivot_iterator, inEnd - 1, inCompare);
+		auto pivot = *pivot_iterator;
+
+		// Left and right iterators
+		Iterator i = inBegin;
+		Iterator j = inEnd;
+
+		for (;;)
+		{
+			// Find the first element that is bigger than the pivot
+			while (inCompare(*i, pivot))
+				i++;
+
+			// Find the last element that is smaller than the pivot
+			do
+				--j;
+			while (inCompare(pivot, *j));
+
+			// If the two iterators crossed, we're done
+			if (i >= j)
+				break;
+
+			// Swap the elements
+			std::swap(*i, *j);
+
+			// Note that the first while loop in this function should
+			// have been do i++ while (...) but since we cannot decrement
+			// the iterator from inBegin we left that out, so we need to do
+			// it here.
+			++i;
+		}
+
+		// Include the middle element on the left side
+		j++;
+
+		// Check which partition is smaller
+		if (j - inBegin < inEnd - j)
+		{
+			// Left side is smaller, recurse to left first
+			QuickSort(inBegin, j, inCompare);
+
+			// Loop again with the right side to avoid a call
+			inBegin = j;
+		}
+		else
+		{
+			// Right side is smaller, recurse to right first
+			QuickSort(j, inEnd, inCompare);
+
+			// Loop again with the left side to avoid a call
+			inEnd = j;
+		}
+	}
+}
+
+/// Implementation of quick sort algorithm without comparator.
+template <typename Iterator>
+inline void QuickSort(Iterator inBegin, Iterator inEnd)
+{
+	std::less<> compare;
+	QuickSort(inBegin, inEnd, compare);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/RTTI.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/RTTI.cpp
@@ -0,0 +1,149 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/RTTI.h>
+#include <Jolt/Core/StringTools.h>
+
+JPH_NAMESPACE_BEGIN
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// RTTI
+//////////////////////////////////////////////////////////////////////////////////////////
+
+RTTI::RTTI(const char *inName, int inSize, pCreateObjectFunction inCreateObject, pDestructObjectFunction inDestructObject) :
+	mName(inName),
+	mSize(inSize),
+	mCreate(inCreateObject),
+	mDestruct(inDestructObject)
+{
+	JPH_ASSERT(inDestructObject != nullptr, "Object cannot be destructed");
+}
+
+RTTI::RTTI(const char *inName, int inSize, pCreateObjectFunction inCreateObject, pDestructObjectFunction inDestructObject, pCreateRTTIFunction inCreateRTTI) :
+	mName(inName),
+	mSize(inSize),
+	mCreate(inCreateObject),
+	mDestruct(inDestructObject)
+{
+	JPH_ASSERT(inDestructObject != nullptr, "Object cannot be destructed");
+
+	inCreateRTTI(*this);
+}
+
+int RTTI::GetBaseClassCount() const
+{
+	return (int)mBaseClasses.size();
+}
+
+const RTTI *RTTI::GetBaseClass(int inIdx) const
+{
+	return mBaseClasses[inIdx].mRTTI;
+}
+
+uint32 RTTI::GetHash() const
+{
+	// Perform diffusion step to get from 64 to 32 bits (see https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function)
+	uint64 hash = HashString(mName);
+	return (uint32)(hash ^ (hash >> 32));
+}
+
+void *RTTI::CreateObject() const
+{
+	return IsAbstract()? nullptr : mCreate();
+}
+
+void RTTI::DestructObject(void *inObject) const
+{
+	mDestruct(inObject);
+}
+
+void RTTI::AddBaseClass(const RTTI *inRTTI, int inOffset)
+{
+	JPH_ASSERT(inOffset >= 0 && inOffset < mSize, "Base class not contained in derived class");
+
+	// Add base class
+	BaseClass base;
+	base.mRTTI = inRTTI;
+	base.mOffset = inOffset;
+	mBaseClasses.push_back(base);
+
+#ifdef JPH_OBJECT_STREAM
+	// Add attributes of base class
+	for (const SerializableAttribute &a : inRTTI->mAttributes)
+		mAttributes.push_back(SerializableAttribute(a, inOffset));
+#endif // JPH_OBJECT_STREAM
+}
+
+bool RTTI::operator == (const RTTI &inRHS) const
+{
+	// Compare addresses
+	if (this == &inRHS)
+		return true;
+
+	// Check that the names differ (if that is the case we probably have two instances
+	// of the same attribute info across the program, probably the second is in a DLL)
+	JPH_ASSERT(strcmp(mName, inRHS.mName) != 0);
+	return false;
+}
+
+bool RTTI::IsKindOf(const RTTI *inRTTI) const
+{
+	// Check if this is the same type
+	if (this == inRTTI)
+		return true;
+
+	// Check all base classes
+	for (const BaseClass &b : mBaseClasses)
+		if (b.mRTTI->IsKindOf(inRTTI))
+			return true;
+
+	return false;
+}
+
+const void *RTTI::CastTo(const void *inObject, const RTTI *inRTTI) const
+{
+	JPH_ASSERT(inObject != nullptr);
+
+	// Check if this is the same type
+	if (this == inRTTI)
+		return inObject;
+
+	// Check all base classes
+	for (const BaseClass &b : mBaseClasses)
+	{
+		// Cast the pointer to the base class
+		const void *casted = (const void *)(((const uint8 *)inObject) + b.mOffset);
+
+		// Test base class
+		const void *rv = b.mRTTI->CastTo(casted, inRTTI);
+		if (rv != nullptr)
+			return rv;
+	}
+
+	// Not possible to cast
+	return nullptr;
+}
+
+#ifdef JPH_OBJECT_STREAM
+
+void RTTI::AddAttribute(const SerializableAttribute &inAttribute)
+{
+	mAttributes.push_back(inAttribute);
+}
+
+int RTTI::GetAttributeCount() const
+{
+	return (int)mAttributes.size();
+}
+
+const SerializableAttribute &RTTI::GetAttribute(int inIdx) const
+{
+	return mAttributes[inIdx];
+}
+
+#endif // JPH_OBJECT_STREAM
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/RTTI.h
+++ b/thirdparty/jolt_physics/Jolt/Core/RTTI.h
@@ -0,0 +1,436 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/Reference.h>
+#include <Jolt/Core/StaticArray.h>
+#include <Jolt/ObjectStream/SerializableAttribute.h>
+
+JPH_NAMESPACE_BEGIN
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// RTTI
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/// Light weight runtime type information system. This way we don't need to turn
+/// on the default RTTI system of the compiler (introducing a possible overhead for every
+/// class)
+///
+/// Notes:
+///  - An extra virtual member function is added. This adds 8 bytes to the size of
+///    an instance of the class (unless you are already using virtual functions).
+///
+/// To use RTTI on a specific class use:
+///
+/// Header file:
+///
+///		class Foo
+///		{
+///			JPH_DECLARE_RTTI_VIRTUAL_BASE(Foo)
+///		}
+///
+///		class Bar : public Foo
+///		{
+///			JPH_DECLARE_RTTI_VIRTUAL(Bar)
+///		};
+///
+/// Implementation file:
+///
+///		JPH_IMPLEMENT_RTTI_VIRTUAL_BASE(Foo)
+///		{
+///		}
+///
+///		JPH_IMPLEMENT_RTTI_VIRTUAL(Bar)
+///		{
+///			JPH_ADD_BASE_CLASS(Bar, Foo) // Multiple inheritance is allowed, just do JPH_ADD_BASE_CLASS for every base class
+///		}
+///
+/// For abstract classes use:
+///
+/// Header file:
+///
+///		class Foo
+///		{
+///			JPH_DECLARE_RTTI_ABSTRACT_BASE(Foo)
+///
+///		public:
+///			virtual void AbstractFunction() = 0;
+///		}
+///
+///		class Bar : public Foo
+///		{
+///			JPH_DECLARE_RTTI_VIRTUAL(Bar)
+///
+///		public:
+///			virtual void AbstractFunction() { } // Function is now implemented so this class is no longer abstract
+///		};
+///
+/// Implementation file:
+///
+///		JPH_IMPLEMENT_RTTI_ABSTRACT_BASE(Foo)
+///		{
+///		}
+///
+///		JPH_IMPLEMENT_RTTI_VIRTUAL(Bar)
+///		{
+///			JPH_ADD_BASE_CLASS(Bar, Foo)
+///		}
+///
+/// Example of usage in a program:
+///
+///		Foo *foo_ptr = new Foo;
+///		Foo *bar_ptr = new Bar;
+///
+///		IsType(foo_ptr, RTTI(Bar)) returns false
+///		IsType(bar_ptr, RTTI(Bar)) returns true
+///
+///		IsKindOf(foo_ptr, RTTI(Bar)) returns false
+///		IsKindOf(bar_ptr, RTTI(Foo)) returns true
+///		IsKindOf(bar_ptr, RTTI(Bar)) returns true
+///
+///		StaticCast<Bar>(foo_ptr) asserts and returns foo_ptr casted to Bar *
+///		StaticCast<Bar>(bar_ptr) returns bar_ptr casted to Bar *
+///
+///		DynamicCast<Bar>(foo_ptr) returns nullptr
+///		DynamicCast<Bar>(bar_ptr) returns bar_ptr casted to Bar *
+///
+/// Other feature of DynamicCast:
+///
+///		class A { int data[5]; };
+///		class B { int data[7]; };
+///		class C : public A, public B { int data[9]; };
+///
+///		C *c = new C;
+///		A *a = c;
+///
+/// Note that:
+///
+///		B *b = (B *)a;
+///
+/// generates an invalid pointer,
+///
+///		B *b = StaticCast<B>(a);
+///
+/// doesn't compile, and
+///
+///		B *b = DynamicCast<B>(a);
+///
+/// does the correct cast
+class JPH_EXPORT RTTI
+{
+public:
+	/// Function to create an object
+	using pCreateObjectFunction = void *(*)();
+
+	/// Function to destroy an object
+	using pDestructObjectFunction = void (*)(void *inObject);
+
+	/// Function to initialize the runtime type info structure
+	using pCreateRTTIFunction = void (*)(RTTI &inRTTI);
+
+	/// Constructor
+								RTTI(const char *inName, int inSize, pCreateObjectFunction inCreateObject, pDestructObjectFunction inDestructObject);
+								RTTI(const char *inName, int inSize, pCreateObjectFunction inCreateObject, pDestructObjectFunction inDestructObject, pCreateRTTIFunction inCreateRTTI);
+
+	// Properties
+	inline const char *			GetName() const												{ return mName; }
+	void						SetName(const char *inName)									{ mName = inName; }
+	inline int					GetSize() const												{ return mSize; }
+	bool						IsAbstract() const											{ return mCreate == nullptr || mDestruct == nullptr; }
+	int							GetBaseClassCount() const;
+	const RTTI *				GetBaseClass(int inIdx) const;
+	uint32						GetHash() const;
+
+	/// Create an object of this type (returns nullptr if the object is abstract)
+	void *						CreateObject() const;
+
+	/// Destruct object of this type (does nothing if the object is abstract)
+	void						DestructObject(void *inObject) const;
+
+	/// Add base class
+	void						AddBaseClass(const RTTI *inRTTI, int inOffset);
+
+	/// Equality operators
+	bool						operator == (const RTTI &inRHS) const;
+	bool						operator != (const RTTI &inRHS) const						{ return !(*this == inRHS); }
+
+	/// Test if this class is derived from class of type inRTTI
+	bool						IsKindOf(const RTTI *inRTTI) const;
+
+	/// Cast inObject of this type to object of type inRTTI, returns nullptr if the cast is unsuccessful
+	const void *				CastTo(const void *inObject, const RTTI *inRTTI) const;
+
+#ifdef JPH_OBJECT_STREAM
+	/// Attribute access
+	void						AddAttribute(const SerializableAttribute &inAttribute);
+	int							GetAttributeCount() const;
+	const SerializableAttribute & GetAttribute(int inIdx) const;
+#endif // JPH_OBJECT_STREAM
+
+protected:
+	/// Base class information
+	struct BaseClass
+	{
+		const RTTI *			mRTTI;
+		int						mOffset;
+	};
+
+	const char *				mName;														///< Class name
+	int							mSize;														///< Class size
+	StaticArray<BaseClass, 4>	mBaseClasses;												///< Names of base classes
+	pCreateObjectFunction		mCreate;													///< Pointer to a function that will create a new instance of this class
+	pDestructObjectFunction		mDestruct;													///< Pointer to a function that will destruct an object of this class
+#ifdef JPH_OBJECT_STREAM
+	StaticArray<SerializableAttribute, 32> mAttributes;										///< All attributes of this class
+#endif // JPH_OBJECT_STREAM
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Add run time type info to types that don't have virtual functions
+//////////////////////////////////////////////////////////////////////////////////////////
+
+// JPH_DECLARE_RTTI_NON_VIRTUAL
+#define JPH_DECLARE_RTTI_NON_VIRTUAL(linkage, class_name)															\
+public:																												\
+	JPH_OVERRIDE_NEW_DELETE																							\
+	friend linkage RTTI *		GetRTTIOfType(class_name *);														\
+	friend inline const RTTI *	GetRTTI([[maybe_unused]] const class_name *inObject) { return GetRTTIOfType(static_cast<class_name *>(nullptr)); }\
+	static void					sCreateRTTI(RTTI &inRTTI);															\
+
+// JPH_IMPLEMENT_RTTI_NON_VIRTUAL
+#define JPH_IMPLEMENT_RTTI_NON_VIRTUAL(class_name)																	\
+	RTTI *						GetRTTIOfType(class_name *)															\
+	{																												\
+		static RTTI rtti(#class_name, sizeof(class_name), []() -> void * { return new class_name; }, [](void *inObject) { delete (class_name *)inObject; }, &class_name::sCreateRTTI); \
+		return &rtti;																								\
+	}																												\
+	void						class_name::sCreateRTTI(RTTI &inRTTI)												\
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Same as above, but when you cannot insert the declaration in the class
+// itself, for example for templates and third party classes
+//////////////////////////////////////////////////////////////////////////////////////////
+
+// JPH_DECLARE_RTTI_OUTSIDE_CLASS
+#define JPH_DECLARE_RTTI_OUTSIDE_CLASS(linkage, class_name)															\
+	linkage RTTI *				GetRTTIOfType(class_name *);														\
+	inline const RTTI *			GetRTTI(const class_name *inObject) { return GetRTTIOfType((class_name *)nullptr); }\
+	void						CreateRTTI##class_name(RTTI &inRTTI);												\
+
+// JPH_IMPLEMENT_RTTI_OUTSIDE_CLASS
+#define JPH_IMPLEMENT_RTTI_OUTSIDE_CLASS(class_name)																\
+	RTTI *						GetRTTIOfType(class_name *)															\
+	{																												\
+		static RTTI rtti((const char *)#class_name, sizeof(class_name), []() -> void * { return new class_name; }, [](void *inObject) { delete (class_name *)inObject; }, &CreateRTTI##class_name); \
+		return &rtti;																								\
+	}																												\
+	void						CreateRTTI##class_name(RTTI &inRTTI)
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Same as above, but for classes that have virtual functions
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#define JPH_DECLARE_RTTI_HELPER(linkage, class_name, modifier)														\
+public:																												\
+	JPH_OVERRIDE_NEW_DELETE																							\
+	friend linkage RTTI *		GetRTTIOfType(class_name *);														\
+	friend inline const RTTI *	GetRTTI(const class_name *inObject) { return inObject->GetRTTI(); }					\
+	virtual const RTTI *		GetRTTI() const modifier;															\
+	virtual const void *		CastTo(const RTTI *inRTTI) const modifier;											\
+	static void					sCreateRTTI(RTTI &inRTTI);															\
+
+// JPH_DECLARE_RTTI_VIRTUAL - for derived classes with RTTI
+#define JPH_DECLARE_RTTI_VIRTUAL(linkage, class_name)																\
+	JPH_DECLARE_RTTI_HELPER(linkage, class_name, override)
+
+// JPH_IMPLEMENT_RTTI_VIRTUAL
+#define JPH_IMPLEMENT_RTTI_VIRTUAL(class_name)																		\
+	RTTI *			GetRTTIOfType(class_name *)																		\
+	{																												\
+		static RTTI rtti(#class_name, sizeof(class_name), []() -> void * { return new class_name; }, [](void *inObject) { delete (class_name *)inObject; }, &class_name::sCreateRTTI); \
+		return &rtti;																								\
+	}																												\
+	const RTTI *				class_name::GetRTTI() const															\
+	{																												\
+		return JPH_RTTI(class_name);																				\
+	}																												\
+	const void *				class_name::CastTo(const RTTI *inRTTI) const										\
+	{																												\
+		return JPH_RTTI(class_name)->CastTo((const void *)this, inRTTI);											\
+	}																												\
+	void						class_name::sCreateRTTI(RTTI &inRTTI)												\
+
+// JPH_DECLARE_RTTI_VIRTUAL_BASE - for concrete base class that has RTTI
+#define JPH_DECLARE_RTTI_VIRTUAL_BASE(linkage, class_name)															\
+	JPH_DECLARE_RTTI_HELPER(linkage, class_name, )
+
+// JPH_IMPLEMENT_RTTI_VIRTUAL_BASE
+#define JPH_IMPLEMENT_RTTI_VIRTUAL_BASE(class_name)																	\
+	JPH_IMPLEMENT_RTTI_VIRTUAL(class_name)
+
+// JPH_DECLARE_RTTI_ABSTRACT - for derived abstract class that have RTTI
+#define JPH_DECLARE_RTTI_ABSTRACT(linkage, class_name)																\
+	JPH_DECLARE_RTTI_HELPER(linkage, class_name, override)
+
+// JPH_IMPLEMENT_RTTI_ABSTRACT
+#define JPH_IMPLEMENT_RTTI_ABSTRACT(class_name)																		\
+	RTTI *						GetRTTIOfType(class_name *)															\
+	{																												\
+		static RTTI rtti(#class_name, sizeof(class_name), nullptr, [](void *inObject) { delete (class_name *)inObject; }, &class_name::sCreateRTTI); \
+		return &rtti;																								\
+	}																												\
+	const RTTI *				class_name::GetRTTI() const															\
+	{																												\
+		return JPH_RTTI(class_name);																				\
+	}																												\
+	const void *				class_name::CastTo(const RTTI *inRTTI) const										\
+	{																												\
+		return JPH_RTTI(class_name)->CastTo((const void *)this, inRTTI);											\
+	}																												\
+	void						class_name::sCreateRTTI(RTTI &inRTTI)												\
+
+// JPH_DECLARE_RTTI_ABSTRACT_BASE - for abstract base class that has RTTI
+#define JPH_DECLARE_RTTI_ABSTRACT_BASE(linkage, class_name)															\
+	JPH_DECLARE_RTTI_HELPER(linkage, class_name, )
+
+// JPH_IMPLEMENT_RTTI_ABSTRACT_BASE
+#define JPH_IMPLEMENT_RTTI_ABSTRACT_BASE(class_name)																\
+	JPH_IMPLEMENT_RTTI_ABSTRACT(class_name)
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Declare an RTTI class for registering with the factory
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#define JPH_DECLARE_RTTI_FOR_FACTORY(linkage, class_name)															\
+	linkage RTTI *				GetRTTIOfType(class class_name *);
+
+#define JPH_DECLARE_RTTI_WITH_NAMESPACE_FOR_FACTORY(linkage, name_space, class_name)								\
+	namespace name_space {																							\
+		class class_name;																							\
+		linkage RTTI *			GetRTTIOfType(class class_name *);													\
+	}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Find the RTTI of a class
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#define JPH_RTTI(class_name)	GetRTTIOfType(static_cast<class_name *>(nullptr))
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Macro to rename a class, useful for embedded classes:
+//
+// class A { class B { }; }
+//
+// Now use JPH_RENAME_CLASS(B, A::B) to avoid conflicts with other classes named B
+//////////////////////////////////////////////////////////////////////////////////////////
+
+// JPH_RENAME_CLASS
+#define JPH_RENAME_CLASS(class_name, new_name)																		\
+								inRTTI.SetName(#new_name);
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Macro to add base classes
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/// Define very dirty macro to get the offset of a baseclass into a class
+#define JPH_BASE_CLASS_OFFSET(inClass, inBaseClass)	((int(uint64((inBaseClass *)((inClass *)0x10000))))-0x10000)
+
+// JPH_ADD_BASE_CLASS
+#define JPH_ADD_BASE_CLASS(class_name, base_class_name)																\
+								inRTTI.AddBaseClass(JPH_RTTI(base_class_name), JPH_BASE_CLASS_OFFSET(class_name, base_class_name));
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// Macros and templates to identify a class
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/// Check if inObject is of DstType
+template <class Type>
+inline bool IsType(const Type *inObject, const RTTI *inRTTI)
+{
+	return inObject == nullptr || *inObject->GetRTTI() == *inRTTI;
+}
+
+template <class Type>
+inline bool IsType(const RefConst<Type> &inObject, const RTTI *inRTTI)
+{
+	return inObject == nullptr || *inObject->GetRTTI() == *inRTTI;
+}
+
+template <class Type>
+inline bool IsType(const Ref<Type> &inObject, const RTTI *inRTTI)
+{
+	return inObject == nullptr || *inObject->GetRTTI() == *inRTTI;
+}
+
+/// Check if inObject is or is derived from DstType
+template <class Type>
+inline bool IsKindOf(const Type *inObject, const RTTI *inRTTI)
+{
+	return inObject == nullptr || inObject->GetRTTI()->IsKindOf(inRTTI);
+}
+
+template <class Type>
+inline bool IsKindOf(const RefConst<Type> &inObject, const RTTI *inRTTI)
+{
+	return inObject == nullptr || inObject->GetRTTI()->IsKindOf(inRTTI);
+}
+
+template <class Type>
+inline bool IsKindOf(const Ref<Type> &inObject, const RTTI *inRTTI)
+{
+	return inObject == nullptr || inObject->GetRTTI()->IsKindOf(inRTTI);
+}
+
+/// Cast inObject to DstType, asserts on failure
+template <class DstType, class SrcType, std::enable_if_t<std::is_base_of_v<DstType, SrcType> || std::is_base_of_v<SrcType, DstType>, bool> = true>
+inline const DstType *StaticCast(const SrcType *inObject)
+{
+	return static_cast<const DstType *>(inObject);
+}
+
+template <class DstType, class SrcType, std::enable_if_t<std::is_base_of_v<DstType, SrcType> || std::is_base_of_v<SrcType, DstType>, bool> = true>
+inline DstType *StaticCast(SrcType *inObject)
+{
+	return static_cast<DstType *>(inObject);
+}
+
+template <class DstType, class SrcType, std::enable_if_t<std::is_base_of_v<DstType, SrcType> || std::is_base_of_v<SrcType, DstType>, bool> = true>
+inline const DstType *StaticCast(const RefConst<SrcType> &inObject)
+{
+	return static_cast<const DstType *>(inObject.GetPtr());
+}
+
+template <class DstType, class SrcType, std::enable_if_t<std::is_base_of_v<DstType, SrcType> || std::is_base_of_v<SrcType, DstType>, bool> = true>
+inline DstType *StaticCast(const Ref<SrcType> &inObject)
+{
+	return static_cast<DstType *>(inObject.GetPtr());
+}
+
+/// Cast inObject to DstType, returns nullptr on failure
+template <class DstType, class SrcType>
+inline const DstType *DynamicCast(const SrcType *inObject)
+{
+	return inObject != nullptr? reinterpret_cast<const DstType *>(inObject->CastTo(JPH_RTTI(DstType))) : nullptr;
+}
+
+template <class DstType, class SrcType>
+inline DstType *DynamicCast(SrcType *inObject)
+{
+	return inObject != nullptr? const_cast<DstType *>(reinterpret_cast<const DstType *>(inObject->CastTo(JPH_RTTI(DstType)))) : nullptr;
+}
+
+template <class DstType, class SrcType>
+inline const DstType *DynamicCast(const RefConst<SrcType> &inObject)
+{
+	return inObject != nullptr? reinterpret_cast<const DstType *>(inObject->CastTo(JPH_RTTI(DstType))) : nullptr;
+}
+
+template <class DstType, class SrcType>
+inline DstType *DynamicCast(const Ref<SrcType> &inObject)
+{
+	return inObject != nullptr? const_cast<DstType *>(reinterpret_cast<const DstType *>(inObject->CastTo(JPH_RTTI(DstType)))) : nullptr;
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Reference.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Reference.h
@@ -0,0 +1,244 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/Atomics.h>
+
+JPH_NAMESPACE_BEGIN
+
+// Forward declares
+template <class T> class Ref;
+template <class T> class RefConst;
+
+/// Simple class to facilitate reference counting / releasing
+/// Derive your class from RefTarget and you can reference it by using Ref<classname> or RefConst<classname>
+///
+/// Reference counting classes keep an integer which indicates how many references
+/// to the object are active. Reference counting objects are derived from RefTarget
+/// and staT & their life with a reference count of zero. They can then be assigned
+/// to equivalents of pointers (Ref) which will increase the reference count immediately.
+/// If the destructor of Ref is called or another object is assigned to the reference
+/// counting pointer it will decrease the reference count of the object again. If this
+/// reference count becomes zero, the object is destroyed.
+///
+/// This provides a very powerful mechanism to prevent memory leaks, but also gives
+/// some responsibility to the programmer. The most notable point is that you cannot
+/// have one object reference another and have the other reference the first one
+/// back, because this way the reference count of both objects will never become
+/// lower than 1, resulting in a memory leak. By carefully designing your classes
+/// (and particularly identifying who owns who in the class hierarchy) you can avoid
+/// these problems.
+template <class T>
+class RefTarget
+{
+public:
+	/// Constructor
+	inline					RefTarget() = default;
+	inline					RefTarget(const RefTarget &)					{ /* Do not copy refcount */ }
+	inline					~RefTarget()									{ JPH_IF_ENABLE_ASSERTS(uint32 value = mRefCount.load(memory_order_relaxed);) JPH_ASSERT(value == 0 || value == cEmbedded); } ///< assert no one is referencing us
+
+	/// Mark this class as embedded, this means the type can be used in a compound or constructed on the stack.
+	/// The Release function will never destruct the object, it is assumed the destructor will be called by whoever allocated
+	/// the object and at that point in time it is checked that no references are left to the structure.
+	inline void				SetEmbedded() const								{ JPH_IF_ENABLE_ASSERTS(uint32 old = ) mRefCount.fetch_add(cEmbedded, memory_order_relaxed); JPH_ASSERT(old < cEmbedded); }
+
+	/// Assignment operator
+	inline RefTarget &		operator = (const RefTarget &)					{ /* Don't copy refcount */ return *this; }
+
+	/// Get current refcount of this object
+	uint32					GetRefCount() const								{ return mRefCount.load(memory_order_relaxed); }
+
+	/// Add or release a reference to this object
+	inline void				AddRef() const
+	{
+		// Adding a reference can use relaxed memory ordering
+		mRefCount.fetch_add(1, memory_order_relaxed);
+	}
+
+	inline void				Release() const
+	{
+	#ifndef JPH_TSAN_ENABLED
+		// Releasing a reference must use release semantics...
+		if (mRefCount.fetch_sub(1, memory_order_release) == 1)
+		{
+			// ... so that we can use acquire to ensure that we see any updates from other threads that released a ref before deleting the object
+			atomic_thread_fence(memory_order_acquire);
+			delete static_cast<const T *>(this);
+		}
+	#else
+		// But under TSAN, we cannot use atomic_thread_fence, so we use an acq_rel operation unconditionally instead
+		if (mRefCount.fetch_sub(1, memory_order_acq_rel) == 1)
+			delete static_cast<const T *>(this);
+	#endif
+	}
+
+	/// INTERNAL HELPER FUNCTION USED BY SERIALIZATION
+	static int				sInternalGetRefCountOffset()					{ return offsetof(T, mRefCount); }
+
+protected:
+	static constexpr uint32 cEmbedded = 0x0ebedded;							///< A large value that gets added to the refcount to mark the object as embedded
+
+	mutable atomic<uint32>	mRefCount = 0;									///< Current reference count
+};
+
+/// Pure virtual version of RefTarget
+class JPH_EXPORT RefTargetVirtual
+{
+public:
+	/// Virtual destructor
+	virtual					~RefTargetVirtual() = default;
+
+	/// Virtual add reference
+	virtual void			AddRef() = 0;
+
+	/// Virtual release reference
+	virtual void			Release() = 0;
+};
+
+/// Class for automatic referencing, this is the equivalent of a pointer to type T
+/// if you assign a value to this class it will increment the reference count by one
+/// of this object, and if you assign something else it will decrease the reference
+/// count of the first object again. If it reaches a reference count of zero it will
+/// be deleted
+template <class T>
+class Ref
+{
+public:
+	/// Constructor
+	inline					Ref()											: mPtr(nullptr) { }
+	inline					Ref(T *inRHS)									: mPtr(inRHS) { AddRef(); }
+	inline					Ref(const Ref<T> &inRHS)						: mPtr(inRHS.mPtr) { AddRef(); }
+	inline					Ref(Ref<T> &&inRHS) noexcept					: mPtr(inRHS.mPtr) { inRHS.mPtr = nullptr; }
+	inline					~Ref()											{ Release(); }
+
+	/// Assignment operators
+	inline Ref<T> &			operator = (T *inRHS)							{ if (mPtr != inRHS) { Release(); mPtr = inRHS; AddRef(); } return *this; }
+	inline Ref<T> &			operator = (const Ref<T> &inRHS)				{ if (mPtr != inRHS.mPtr) { Release(); mPtr = inRHS.mPtr; AddRef(); } return *this; }
+	inline Ref<T> &			operator = (Ref<T> &&inRHS) noexcept			{ if (mPtr != inRHS.mPtr) { Release(); mPtr = inRHS.mPtr; inRHS.mPtr = nullptr; } return *this; }
+
+	/// Casting operators
+	inline					operator T *() const							{ return mPtr; }
+
+	/// Access like a normal pointer
+	inline T *				operator -> () const							{ return mPtr; }
+	inline T &				operator * () const								{ return *mPtr; }
+
+	/// Comparison
+	inline bool				operator == (const T * inRHS) const				{ return mPtr == inRHS; }
+	inline bool				operator == (const Ref<T> &inRHS) const			{ return mPtr == inRHS.mPtr; }
+	inline bool				operator != (const T * inRHS) const				{ return mPtr != inRHS; }
+	inline bool				operator != (const Ref<T> &inRHS) const			{ return mPtr != inRHS.mPtr; }
+
+	/// Get pointer
+	inline T *				GetPtr() const									{ return mPtr; }
+
+	/// Get hash for this object
+	uint64					GetHash() const
+	{
+		return Hash<T *> { } (mPtr);
+	}
+
+	/// INTERNAL HELPER FUNCTION USED BY SERIALIZATION
+	void **					InternalGetPointer()							{ return reinterpret_cast<void **>(&mPtr); }
+
+private:
+	template <class T2> friend class RefConst;
+
+	/// Use "variable = nullptr;" to release an object, do not call these functions
+	inline void				AddRef()										{ if (mPtr != nullptr) mPtr->AddRef(); }
+	inline void				Release()										{ if (mPtr != nullptr) mPtr->Release(); }
+
+	T *						mPtr;											///< Pointer to object that we are reference counting
+};
+
+/// Class for automatic referencing, this is the equivalent of a CONST pointer to type T
+/// if you assign a value to this class it will increment the reference count by one
+/// of this object, and if you assign something else it will decrease the reference
+/// count of the first object again. If it reaches a reference count of zero it will
+/// be deleted
+template <class T>
+class RefConst
+{
+public:
+	/// Constructor
+	inline					RefConst()										: mPtr(nullptr) { }
+	inline					RefConst(const T * inRHS)						: mPtr(inRHS) { AddRef(); }
+	inline					RefConst(const RefConst<T> &inRHS)				: mPtr(inRHS.mPtr) { AddRef(); }
+	inline					RefConst(RefConst<T> &&inRHS) noexcept			: mPtr(inRHS.mPtr) { inRHS.mPtr = nullptr; }
+	inline					RefConst(const Ref<T> &inRHS)					: mPtr(inRHS.mPtr) { AddRef(); }
+	inline					RefConst(Ref<T> &&inRHS) noexcept				: mPtr(inRHS.mPtr) { inRHS.mPtr = nullptr; }
+	inline					~RefConst()										{ Release(); }
+
+	/// Assignment operators
+	inline RefConst<T> &	operator = (const T * inRHS)					{ if (mPtr != inRHS) { Release(); mPtr = inRHS; AddRef(); } return *this; }
+	inline RefConst<T> &	operator = (const RefConst<T> &inRHS)			{ if (mPtr != inRHS.mPtr) { Release(); mPtr = inRHS.mPtr; AddRef(); } return *this; }
+	inline RefConst<T> &	operator = (RefConst<T> &&inRHS) noexcept		{ if (mPtr != inRHS.mPtr) { Release(); mPtr = inRHS.mPtr; inRHS.mPtr = nullptr; } return *this; }
+	inline RefConst<T> &	operator = (const Ref<T> &inRHS)				{ if (mPtr != inRHS.mPtr) { Release(); mPtr = inRHS.mPtr; AddRef(); } return *this; }
+	inline RefConst<T> &	operator = (Ref<T> &&inRHS) noexcept			{ if (mPtr != inRHS.mPtr) { Release(); mPtr = inRHS.mPtr; inRHS.mPtr = nullptr; } return *this; }
+
+	/// Casting operators
+	inline					operator const T * () const						{ return mPtr; }
+
+	/// Access like a normal pointer
+	inline const T *		operator -> () const							{ return mPtr; }
+	inline const T &		operator * () const								{ return *mPtr; }
+
+	/// Comparison
+	inline bool				operator == (const T * inRHS) const				{ return mPtr == inRHS; }
+	inline bool				operator == (const RefConst<T> &inRHS) const	{ return mPtr == inRHS.mPtr; }
+	inline bool				operator == (const Ref<T> &inRHS) const			{ return mPtr == inRHS.mPtr; }
+	inline bool				operator != (const T * inRHS) const				{ return mPtr != inRHS; }
+	inline bool				operator != (const RefConst<T> &inRHS) const	{ return mPtr != inRHS.mPtr; }
+	inline bool				operator != (const Ref<T> &inRHS) const			{ return mPtr != inRHS.mPtr; }
+
+	/// Get pointer
+	inline const T *		GetPtr() const									{ return mPtr; }
+
+	/// Get hash for this object
+	uint64					GetHash() const
+	{
+		return Hash<const T *> { } (mPtr);
+	}
+
+	/// INTERNAL HELPER FUNCTION USED BY SERIALIZATION
+	void **					InternalGetPointer()							{ return const_cast<void **>(reinterpret_cast<const void **>(&mPtr)); }
+
+private:
+	/// Use "variable = nullptr;" to release an object, do not call these functions
+	inline void				AddRef()										{ if (mPtr != nullptr) mPtr->AddRef(); }
+	inline void				Release()										{ if (mPtr != nullptr) mPtr->Release(); }
+
+	const T *				mPtr;											///< Pointer to object that we are reference counting
+};
+
+JPH_NAMESPACE_END
+
+JPH_SUPPRESS_WARNING_PUSH
+JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat")
+
+namespace std
+{
+	/// Declare std::hash for Ref
+	template <class T>
+	struct hash<JPH::Ref<T>>
+	{
+		size_t operator () (const JPH::Ref<T> &inRHS) const
+		{
+			return size_t(inRHS.GetHash());
+		}
+	};
+
+	/// Declare std::hash for RefConst
+	template <class T>
+	struct hash<JPH::RefConst<T>>
+	{
+		size_t operator () (const JPH::RefConst<T> &inRHS) const
+		{
+			return size_t(inRHS.GetHash());
+		}
+	};
+}
+
+JPH_SUPPRESS_WARNING_POP
--- a/thirdparty/jolt_physics/Jolt/Core/Result.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Result.h
@@ -0,0 +1,174 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Helper class that either contains a valid result or an error
+template <class Type>
+class Result
+{
+public:
+	/// Default constructor
+						Result()									{ }
+
+	/// Copy constructor
+						Result(const Result<Type> &inRHS) :
+		mState(inRHS.mState)
+	{
+		switch (inRHS.mState)
+		{
+		case EState::Valid:
+			new (&mResult) Type (inRHS.mResult);
+			break;
+
+		case EState::Error:
+			new (&mError) String(inRHS.mError);
+			break;
+
+		case EState::Invalid:
+			break;
+		}
+	}
+
+	/// Move constructor
+						Result(Result<Type> &&inRHS) noexcept :
+		mState(inRHS.mState)
+	{
+		switch (inRHS.mState)
+		{
+		case EState::Valid:
+			new (&mResult) Type (std::move(inRHS.mResult));
+			break;
+
+		case EState::Error:
+			new (&mError) String(std::move(inRHS.mError));
+			break;
+
+		case EState::Invalid:
+			break;
+		}
+
+		// Don't reset the state of inRHS, the destructors still need to be called after a move operation
+	}
+
+	/// Destructor
+						~Result()									{ Clear(); }
+
+	/// Copy assignment
+	Result<Type> &		operator = (const Result<Type> &inRHS)
+	{
+		Clear();
+
+		mState = inRHS.mState;
+
+		switch (inRHS.mState)
+		{
+		case EState::Valid:
+			new (&mResult) Type (inRHS.mResult);
+			break;
+
+		case EState::Error:
+			new (&mError) String(inRHS.mError);
+			break;
+
+		case EState::Invalid:
+			break;
+		}
+
+		return *this;
+	}
+
+	/// Move assignment
+	Result<Type> &		operator = (Result<Type> &&inRHS) noexcept
+	{
+		Clear();
+
+		mState = inRHS.mState;
+
+		switch (inRHS.mState)
+		{
+		case EState::Valid:
+			new (&mResult) Type (std::move(inRHS.mResult));
+			break;
+
+		case EState::Error:
+			new (&mError) String(std::move(inRHS.mError));
+			break;
+
+		case EState::Invalid:
+			break;
+		}
+
+		// Don't reset the state of inRHS, the destructors still need to be called after a move operation
+
+		return *this;
+	}
+
+	/// Clear result or error
+	void				Clear()
+	{
+		switch (mState)
+		{
+		case EState::Valid:
+			mResult.~Type();
+			break;
+
+		case EState::Error:
+			mError.~String();
+			break;
+
+		case EState::Invalid:
+			break;
+		}
+
+		mState = EState::Invalid;
+	}
+
+	/// Checks if the result is still uninitialized
+	bool				IsEmpty() const								{ return mState == EState::Invalid; }
+
+	/// Checks if the result is valid
+	bool				IsValid() const								{ return mState == EState::Valid; }
+
+	/// Get the result value
+	const Type &		Get() const									{ JPH_ASSERT(IsValid()); return mResult; }
+
+	/// Set the result value
+	void				Set(const Type &inResult)					{ Clear(); new (&mResult) Type(inResult); mState = EState::Valid; }
+
+	/// Set the result value (move value)
+	void				Set(Type &&inResult)						{ Clear(); new (&mResult) Type(std::move(inResult)); mState = EState::Valid; }
+
+	/// Check if we had an error
+	bool				HasError() const							{ return mState == EState::Error; }
+
+	/// Get the error value
+	const String &		GetError() const							{ JPH_ASSERT(HasError()); return mError; }
+
+	/// Set an error value
+	void				SetError(const char *inError)				{ Clear(); new (&mError) String(inError); mState = EState::Error; }
+	void				SetError(const string_view &inError)		{ Clear(); new (&mError) String(inError); mState = EState::Error; }
+	void				SetError(String &&inError)					{ Clear(); new (&mError) String(std::move(inError)); mState = EState::Error; }
+
+private:
+	union
+	{
+		Type			mResult;									///< The actual result object
+		String			mError;										///< The error description if the result failed
+	};
+
+	/// State of the result
+	enum class EState : uint8
+	{
+		Invalid,
+		Valid,
+		Error
+	};
+
+	EState				mState = EState::Invalid;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/STLAlignedAllocator.h
+++ b/thirdparty/jolt_physics/Jolt/Core/STLAlignedAllocator.h
@@ -0,0 +1,72 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// STL allocator that takes care that memory is aligned to N bytes
+template <typename T, size_t N>
+class STLAlignedAllocator
+{
+public:
+	using value_type = T;
+
+	/// Pointer to type
+	using pointer = T *;
+	using const_pointer = const T *;
+
+	/// Reference to type.
+	/// Can be removed in C++20.
+	using reference = T &;
+	using const_reference = const T &;
+
+	using size_type = size_t;
+	using difference_type = ptrdiff_t;
+
+	/// The allocator is stateless
+	using is_always_equal = std::true_type;
+
+	/// Allocator supports moving
+	using propagate_on_container_move_assignment = std::true_type;
+
+	/// Constructor
+	inline					STLAlignedAllocator() = default;
+
+	/// Constructor from other allocator
+	template <typename T2>
+	inline explicit			STLAlignedAllocator(const STLAlignedAllocator<T2, N> &) { }
+
+	/// Allocate memory
+	inline pointer			allocate(size_type inN)
+	{
+		return (pointer)AlignedAllocate(inN * sizeof(value_type), N);
+	}
+
+	/// Free memory
+	inline void				deallocate(pointer inPointer, size_type)
+	{
+		AlignedFree(inPointer);
+	}
+
+	/// Allocators are stateless so assumed to be equal
+	inline bool				operator == (const STLAlignedAllocator<T, N> &) const
+	{
+		return true;
+	}
+
+	inline bool				operator != (const STLAlignedAllocator<T, N> &) const
+	{
+		return false;
+	}
+
+	/// Converting to allocator for other type
+	template <typename T2>
+	struct rebind
+	{
+		using other = STLAlignedAllocator<T2, N>;
+	};
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/STLAllocator.h
+++ b/thirdparty/jolt_physics/Jolt/Core/STLAllocator.h
@@ -0,0 +1,127 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Default implementation of AllocatorHasReallocate which tells if an allocator has a reallocate function
+template <class T> struct AllocatorHasReallocate { static constexpr bool sValue = false; };
+
+#ifndef JPH_DISABLE_CUSTOM_ALLOCATOR
+
+/// STL allocator that forwards to our allocation functions
+template <typename T>
+class STLAllocator
+{
+public:
+	using value_type = T;
+
+	/// Pointer to type
+	using pointer = T *;
+	using const_pointer = const T *;
+
+	/// Reference to type.
+	/// Can be removed in C++20.
+	using reference = T &;
+	using const_reference = const T &;
+
+	using size_type = size_t;
+	using difference_type = ptrdiff_t;
+
+	/// The allocator is stateless
+	using is_always_equal = std::true_type;
+
+	/// Allocator supports moving
+	using propagate_on_container_move_assignment = std::true_type;
+
+	/// Constructor
+	inline					STLAllocator() = default;
+
+	/// Constructor from other allocator
+	template <typename T2>
+	inline					STLAllocator(const STLAllocator<T2> &) { }
+
+	/// If this allocator needs to fall back to aligned allocations because the type requires it
+	static constexpr bool	needs_aligned_allocate = alignof(T) > (JPH_CPU_ADDRESS_BITS == 32? 8 : 16);
+
+	/// Allocate memory
+	inline pointer			allocate(size_type inN)
+	{
+		if constexpr (needs_aligned_allocate)
+			return pointer(AlignedAllocate(inN * sizeof(value_type), alignof(T)));
+		else
+			return pointer(Allocate(inN * sizeof(value_type)));
+	}
+
+	/// Should we expose a reallocate function?
+	static constexpr bool	has_reallocate = std::is_trivially_copyable<T>() && !needs_aligned_allocate;
+
+	/// Reallocate memory
+	template <bool has_reallocate_v = has_reallocate, typename = std::enable_if_t<has_reallocate_v>>
+	inline pointer			reallocate(pointer inOldPointer, size_type inOldSize, size_type inNewSize)
+	{
+		JPH_ASSERT(inNewSize > 0); // Reallocating to zero size is implementation dependent, so we don't allow it
+		return pointer(Reallocate(inOldPointer, inOldSize * sizeof(value_type), inNewSize * sizeof(value_type)));
+	}
+
+	/// Free memory
+	inline void				deallocate(pointer inPointer, size_type)
+	{
+		if constexpr (needs_aligned_allocate)
+			AlignedFree(inPointer);
+		else
+			Free(inPointer);
+	}
+
+	/// Allocators are stateless so assumed to be equal
+	inline bool				operator == (const STLAllocator<T> &) const
+	{
+		return true;
+	}
+
+	inline bool				operator != (const STLAllocator<T> &) const
+	{
+		return false;
+	}
+
+	/// Converting to allocator for other type
+	template <typename T2>
+	struct rebind
+	{
+		using other = STLAllocator<T2>;
+	};
+};
+
+/// The STLAllocator implements the reallocate function if the alignment of the class is smaller or equal to the default alignment for the platform
+template <class T> struct AllocatorHasReallocate<STLAllocator<T>> { static constexpr bool sValue = STLAllocator<T>::has_reallocate; };
+
+#else
+
+template <typename T> using STLAllocator = std::allocator<T>;
+
+#endif // !JPH_DISABLE_CUSTOM_ALLOCATOR
+
+// Declare STL containers that use our allocator
+using String = std::basic_string<char, std::char_traits<char>, STLAllocator<char>>;
+using IStringStream = std::basic_istringstream<char, std::char_traits<char>, STLAllocator<char>>;
+
+JPH_NAMESPACE_END
+
+#if (!defined(JPH_PLATFORM_WINDOWS) || defined(JPH_COMPILER_MINGW)) && !defined(JPH_DISABLE_CUSTOM_ALLOCATOR)
+
+namespace std
+{
+	/// Declare std::hash for String, for some reason on Linux based platforms template deduction takes the wrong variant
+	template <>
+	struct hash<JPH::String>
+	{
+		inline size_t operator () (const JPH::String &inRHS) const
+		{
+			return hash<string_view> { } (inRHS);
+		}
+	};
+}
+
+#endif // (!JPH_PLATFORM_WINDOWS || JPH_COMPILER_MINGW) && !JPH_DISABLE_CUSTOM_ALLOCATOR
--- a/thirdparty/jolt_physics/Jolt/Core/STLLocalAllocator.h
+++ b/thirdparty/jolt_physics/Jolt/Core/STLLocalAllocator.h
@@ -0,0 +1,170 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2025 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/STLAllocator.h>
+
+JPH_NAMESPACE_BEGIN
+
+#ifndef JPH_DISABLE_CUSTOM_ALLOCATOR
+
+/// STL allocator that keeps N elements in a local buffer before falling back to regular allocations
+template <typename T, size_t N>
+class STLLocalAllocator : private STLAllocator<T>
+{
+	using Base = STLAllocator<T>;
+
+public:
+	/// General properties
+	using value_type = T;
+	using pointer = T *;
+	using const_pointer = const T *;
+	using reference = T &;
+	using const_reference = const T &;
+	using size_type = size_t;
+	using difference_type = ptrdiff_t;
+
+	/// The allocator is not stateless (has local buffer)
+	using is_always_equal = std::false_type;
+
+	/// We cannot copy, move or swap allocators
+	using propagate_on_container_copy_assignment = std::false_type;
+	using propagate_on_container_move_assignment = std::false_type;
+	using propagate_on_container_swap = std::false_type;
+
+	/// Constructor
+							STLLocalAllocator() = default;
+							STLLocalAllocator(const STLLocalAllocator &) = delete; // Can't copy an allocator as the buffer is local to the original
+							STLLocalAllocator(STLLocalAllocator &&) = delete; // Can't move an allocator as the buffer is local to the original
+	STLLocalAllocator &		operator = (const STLLocalAllocator &) = delete; // Can't copy an allocator as the buffer is local to the original
+
+	/// Constructor used when rebinding to another type. This expects the allocator to use the original memory pool from the first allocator,
+	/// but in our case we cannot use the local buffer of the original allocator as it has different size and alignment rules.
+	/// To solve this we make this allocator fall back to the heap immediately.
+	template <class T2>
+	explicit				STLLocalAllocator(const STLLocalAllocator<T2, N> &) : mNumElementsUsed(N) { }
+
+	/// Check if inPointer is in the local buffer
+	inline bool				is_local(const_pointer inPointer) const
+	{
+		ptrdiff_t diff = inPointer - reinterpret_cast<const_pointer>(mElements);
+		return diff >= 0 && diff < ptrdiff_t(N);
+	}
+
+	/// Allocate memory
+	inline pointer			allocate(size_type inN)
+	{
+		// If we allocate more than we have, fall back to the heap
+		if (mNumElementsUsed + inN > N)
+			return Base::allocate(inN);
+
+		// Allocate from our local buffer
+		pointer result = reinterpret_cast<pointer>(mElements) + mNumElementsUsed;
+		mNumElementsUsed += inN;
+		return result;
+	}
+
+	/// Always implements a reallocate function as we can often reallocate in place
+	static constexpr bool	has_reallocate = true;
+
+	/// Reallocate memory
+	inline pointer			reallocate(pointer inOldPointer, size_type inOldSize, size_type inNewSize)
+	{
+		JPH_ASSERT(inNewSize > 0); // Reallocating to zero size is implementation dependent, so we don't allow it
+
+		// If there was no previous allocation, we can go through the regular allocate function
+		if (inOldPointer == nullptr)
+			return allocate(inNewSize);
+
+		// If the pointer is outside our local buffer, fall back to the heap
+		if (!is_local(inOldPointer))
+		{
+			if constexpr (AllocatorHasReallocate<Base>::sValue)
+				return Base::reallocate(inOldPointer, inOldSize, inNewSize);
+			else
+				return ReallocateImpl(inOldPointer, inOldSize, inNewSize);
+		}
+
+		// If we happen to have space left, we only need to update our bookkeeping
+		pointer base_ptr = reinterpret_cast<pointer>(mElements) + mNumElementsUsed - inOldSize;
+		if (inOldPointer == base_ptr
+			&& mNumElementsUsed - inOldSize + inNewSize <= N)
+		{
+			mNumElementsUsed += inNewSize - inOldSize;
+			return base_ptr;
+		}
+
+		// We can't reallocate in place, fall back to the heap
+		return ReallocateImpl(inOldPointer, inOldSize, inNewSize);
+	}
+
+	/// Free memory
+	inline void				deallocate(pointer inPointer, size_type inN)
+	{
+		// If the pointer is not in our local buffer, fall back to the heap
+		if (!is_local(inPointer))
+			return Base::deallocate(inPointer, inN);
+
+		// Else we can only reclaim memory if it was the last allocation
+		if (inPointer == reinterpret_cast<pointer>(mElements) + mNumElementsUsed - inN)
+			mNumElementsUsed -= inN;
+	}
+
+	/// Allocators are not-stateless, assume if allocator address matches that the allocators are the same
+	inline bool				operator == (const STLLocalAllocator<T, N> &inRHS) const
+	{
+		return this == &inRHS;
+	}
+
+	inline bool				operator != (const STLLocalAllocator<T, N> &inRHS) const
+	{
+		return this != &inRHS;
+	}
+
+	/// Converting to allocator for other type
+	template <typename T2>
+	struct rebind
+	{
+		using other = STLLocalAllocator<T2, N>;
+	};
+
+private:
+	/// Implements reallocate when the base class doesn't or when we go from local buffer to heap
+	inline pointer			ReallocateImpl(pointer inOldPointer, size_type inOldSize, size_type inNewSize)
+	{
+		pointer new_pointer = Base::allocate(inNewSize);
+		size_type n = min(inOldSize, inNewSize);
+		if constexpr (std::is_trivially_copyable<T>())
+		{
+			// Can use mem copy
+			memcpy(new_pointer, inOldPointer, n * sizeof(T));
+		}
+		else
+		{
+			// Need to actually move the elements
+			for (size_t i = 0; i < n; ++i)
+			{
+				new (new_pointer + i) T(std::move(inOldPointer[i]));
+				inOldPointer[i].~T();
+			}
+		}
+		deallocate(inOldPointer, inOldSize);
+		return new_pointer;
+	}
+
+	alignas(T) uint8		mElements[N * sizeof(T)];
+	size_type				mNumElementsUsed = 0;
+};
+
+/// The STLLocalAllocator always implements a reallocate function as it can often reallocate in place
+template <class T, size_t N> struct AllocatorHasReallocate<STLLocalAllocator<T, N>> { static constexpr bool sValue = STLLocalAllocator<T, N>::has_reallocate; };
+
+#else
+
+template <typename T, size_t N> using STLLocalAllocator = std::allocator<T>;
+
+#endif // !JPH_DISABLE_CUSTOM_ALLOCATOR
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/STLTempAllocator.h
+++ b/thirdparty/jolt_physics/Jolt/Core/STLTempAllocator.h
@@ -0,0 +1,80 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/TempAllocator.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// STL allocator that wraps around TempAllocator
+template <typename T>
+class STLTempAllocator
+{
+public:
+	using value_type = T;
+
+	/// Pointer to type
+	using pointer = T *;
+	using const_pointer = const T *;
+
+	/// Reference to type.
+	/// Can be removed in C++20.
+	using reference = T &;
+	using const_reference = const T &;
+
+	using size_type = size_t;
+	using difference_type = ptrdiff_t;
+
+	/// The allocator is not stateless (depends on the temp allocator)
+	using is_always_equal = std::false_type;
+
+	/// Constructor
+	inline					STLTempAllocator(TempAllocator &inAllocator) : mAllocator(inAllocator) { }
+
+	/// Constructor from other allocator
+	template <typename T2>
+	inline explicit			STLTempAllocator(const STLTempAllocator<T2> &inRHS) : mAllocator(inRHS.GetAllocator()) { }
+
+	/// Allocate memory
+	inline pointer			allocate(size_type inN)
+	{
+		return pointer(mAllocator.Allocate(uint(inN * sizeof(value_type))));
+	}
+
+	/// Free memory
+	inline void				deallocate(pointer inPointer, size_type inN)
+	{
+		mAllocator.Free(inPointer, uint(inN * sizeof(value_type)));
+	}
+
+	/// Allocators are not-stateless, assume if allocator address matches that the allocators are the same
+	inline bool				operator == (const STLTempAllocator<T> &inRHS) const
+	{
+		return &mAllocator == &inRHS.mAllocator;
+	}
+
+	inline bool				operator != (const STLTempAllocator<T> &inRHS) const
+	{
+		return &mAllocator != &inRHS.mAllocator;
+	}
+
+	/// Converting to allocator for other type
+	template <typename T2>
+	struct rebind
+	{
+		using other = STLTempAllocator<T2>;
+	};
+
+	/// Get our temp allocator
+	TempAllocator &			GetAllocator() const
+	{
+		return mAllocator;
+	}
+
+private:
+	TempAllocator &			mAllocator;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/ScopeExit.h
+++ b/thirdparty/jolt_physics/Jolt/Core/ScopeExit.h
@@ -0,0 +1,49 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Class that calls a function when it goes out of scope
+template <class F>
+class ScopeExit : public NonCopyable
+{
+public:
+	/// Constructor specifies the exit function
+	JPH_INLINE explicit	ScopeExit(F &&inFunction) : mFunction(std::move(inFunction)) { }
+
+	/// Destructor calls the exit function
+	JPH_INLINE			~ScopeExit() { if (!mInvoked) mFunction(); }
+
+	/// Call the exit function now instead of when going out of scope
+	JPH_INLINE void		Invoke()
+	{
+		if (!mInvoked)
+		{
+			mFunction();
+			mInvoked = true;
+		}
+	}
+
+	/// No longer call the exit function when going out of scope
+	JPH_INLINE void		Release()
+	{
+		mInvoked = true;
+	}
+
+private:
+	F					mFunction;
+	bool				mInvoked = false;
+};
+
+#define JPH_SCOPE_EXIT_TAG2(line)			scope_exit##line
+#define JPH_SCOPE_EXIT_TAG(line)			JPH_SCOPE_EXIT_TAG2(line)
+
+/// Usage: JPH_SCOPE_EXIT([]{ code to call on scope exit });
+#define JPH_SCOPE_EXIT(...) ScopeExit JPH_SCOPE_EXIT_TAG(__LINE__)(__VA_ARGS__)
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Semaphore.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/Semaphore.cpp
@@ -0,0 +1,134 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2023 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/Semaphore.h>
+
+#ifdef JPH_PLATFORM_WINDOWS
+	JPH_SUPPRESS_WARNING_PUSH
+	JPH_MSVC_SUPPRESS_WARNING(5039) // winbase.h(13179): warning C5039: 'TpSetCallbackCleanupGroup': pointer or reference to potentially throwing function passed to 'extern "C"' function under -EHc. Undefined behavior may occur if this function throws an exception.
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
+#ifndef JPH_COMPILER_MINGW
+	#include <Windows.h>
+#else
+	#include <windows.h>
+#endif
+	JPH_SUPPRESS_WARNING_POP
+#endif
+
+JPH_NAMESPACE_BEGIN
+
+Semaphore::Semaphore()
+{
+#ifdef JPH_PLATFORM_WINDOWS
+	mSemaphore = CreateSemaphore(nullptr, 0, INT_MAX, nullptr);
+	if (mSemaphore == nullptr)
+	{
+		Trace("Failed to create semaphore");
+		std::abort();
+	}
+#elif defined(JPH_USE_PTHREADS)
+	int ret = sem_init(&mSemaphore, 0, 0);
+	if (ret == -1)
+	{
+		Trace("Failed to create semaphore");
+		std::abort();
+	}
+#elif defined(JPH_USE_GRAND_CENTRAL_DISPATCH)
+	mSemaphore = dispatch_semaphore_create(0);
+	if (mSemaphore == nullptr)
+	{
+		Trace("Failed to create semaphore");
+		std::abort();
+	}
+#elif defined(JPH_PLATFORM_BLUE)
+	if (!JPH_PLATFORM_BLUE_SEMAPHORE_INIT(mSemaphore))
+	{
+		Trace("Failed to create semaphore");
+		std::abort();
+	}
+#endif
+}
+
+Semaphore::~Semaphore()
+{
+#ifdef JPH_PLATFORM_WINDOWS
+	CloseHandle(mSemaphore);
+#elif defined(JPH_USE_PTHREADS)
+	sem_destroy(&mSemaphore);
+#elif defined(JPH_USE_GRAND_CENTRAL_DISPATCH)
+	dispatch_release(mSemaphore);
+#elif defined(JPH_PLATFORM_BLUE)
+	JPH_PLATFORM_BLUE_SEMAPHORE_DESTROY(mSemaphore);
+#endif
+}
+
+void Semaphore::Release(uint inNumber)
+{
+	JPH_ASSERT(inNumber > 0);
+
+#if defined(JPH_PLATFORM_WINDOWS) || defined(JPH_USE_PTHREADS) || defined(JPH_USE_GRAND_CENTRAL_DISPATCH) || defined(JPH_PLATFORM_BLUE)
+	int old_value = mCount.fetch_add(inNumber, std::memory_order_release);
+	if (old_value < 0)
+	{
+		int new_value = old_value + (int)inNumber;
+		int num_to_release = min(new_value, 0) - old_value;
+	#ifdef JPH_PLATFORM_WINDOWS
+		::ReleaseSemaphore(mSemaphore, num_to_release, nullptr);
+	#elif defined(JPH_USE_PTHREADS)
+		for (int i = 0; i < num_to_release; ++i)
+			sem_post(&mSemaphore);
+	#elif defined(JPH_USE_GRAND_CENTRAL_DISPATCH)
+		for (int i = 0; i < num_to_release; ++i)
+			dispatch_semaphore_signal(mSemaphore);
+	#elif defined(JPH_PLATFORM_BLUE)
+		JPH_PLATFORM_BLUE_SEMAPHORE_SIGNAL(mSemaphore, num_to_release);
+	#endif
+	}
+#else
+	std::lock_guard lock(mLock);
+	mCount.fetch_add(inNumber, std::memory_order_relaxed);
+	if (inNumber > 1)
+		mWaitVariable.notify_all();
+	else
+		mWaitVariable.notify_one();
+#endif
+}
+
+void Semaphore::Acquire(uint inNumber)
+{
+	JPH_ASSERT(inNumber > 0);
+
+#if defined(JPH_PLATFORM_WINDOWS) || defined(JPH_USE_PTHREADS) || defined(JPH_USE_GRAND_CENTRAL_DISPATCH) || defined(JPH_PLATFORM_BLUE)
+	int old_value = mCount.fetch_sub(inNumber, std::memory_order_acquire);
+	int new_value = old_value - (int)inNumber;
+	if (new_value < 0)
+	{
+		int num_to_acquire = min(old_value, 0) - new_value;
+	#ifdef JPH_PLATFORM_WINDOWS
+		for (int i = 0; i < num_to_acquire; ++i)
+			WaitForSingleObject(mSemaphore, INFINITE);
+	#elif defined(JPH_USE_PTHREADS)
+		for (int i = 0; i < num_to_acquire; ++i)
+			sem_wait(&mSemaphore);
+	#elif defined(JPH_USE_GRAND_CENTRAL_DISPATCH)
+		for (int i = 0; i < num_to_acquire; ++i)
+			dispatch_semaphore_wait(mSemaphore, DISPATCH_TIME_FOREVER);
+	#elif defined(JPH_PLATFORM_BLUE)
+		JPH_PLATFORM_BLUE_SEMAPHORE_WAIT(mSemaphore, num_to_acquire);
+	#endif
+	}
+#else
+	std::unique_lock lock(mLock);
+	mWaitVariable.wait(lock, [this, inNumber]() {
+		return mCount.load(std::memory_order_relaxed) >= int(inNumber);
+	});
+	mCount.fetch_sub(inNumber, std::memory_order_relaxed);
+#endif
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/Semaphore.h
+++ b/thirdparty/jolt_physics/Jolt/Core/Semaphore.h
@@ -0,0 +1,68 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2023 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/Atomics.h>
+
+// Determine which platform specific construct we'll use
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#ifdef JPH_PLATFORM_WINDOWS
+	// We include windows.h in the cpp file, the semaphore itself is a void pointer
+#elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_BSD) || defined(JPH_PLATFORM_WASM)
+	#include <semaphore.h>
+	#define JPH_USE_PTHREADS
+#elif defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS)
+	#include <dispatch/dispatch.h>
+	#define JPH_USE_GRAND_CENTRAL_DISPATCH
+#elif defined(JPH_PLATFORM_BLUE)
+	// Jolt/Core/PlatformBlue.h should have defined everything that is needed below
+#else
+	#include <mutex>
+	#include <condition_variable>
+#endif
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+/// Implements a semaphore
+/// When we switch to C++20 we can use counting_semaphore to unify this
+class JPH_EXPORT Semaphore
+{
+public:
+	/// Constructor
+							Semaphore();
+							~Semaphore();
+
+	/// Release the semaphore, signaling the thread waiting on the barrier that there may be work
+	void					Release(uint inNumber = 1);
+
+	/// Acquire the semaphore inNumber times
+	void					Acquire(uint inNumber = 1);
+
+	/// Get the current value of the semaphore
+	inline int				GetValue() const								{ return mCount.load(std::memory_order_relaxed); }
+
+private:
+#if defined(JPH_PLATFORM_WINDOWS) || defined(JPH_USE_PTHREADS) || defined(JPH_USE_GRAND_CENTRAL_DISPATCH) || defined(JPH_PLATFORM_BLUE)
+#ifdef JPH_PLATFORM_WINDOWS
+	using SemaphoreType = void *;
+#elif defined(JPH_USE_PTHREADS)
+	using SemaphoreType = sem_t;
+#elif defined(JPH_USE_GRAND_CENTRAL_DISPATCH)
+	using SemaphoreType = dispatch_semaphore_t;
+#elif defined(JPH_PLATFORM_BLUE)
+	using SemaphoreType = JPH_PLATFORM_BLUE_SEMAPHORE;
+#endif
+	alignas(JPH_CACHE_LINE_SIZE) atomic<int> mCount { 0 };					///< We increment mCount for every release, to acquire we decrement the count. If the count is negative we know that we are waiting on the actual semaphore.
+	SemaphoreType			mSemaphore { };									///< The semaphore is an expensive construct so we only acquire/release it if we know that we need to wait/have waiting threads
+#else
+	// Other platforms: Emulate a semaphore using a mutex, condition variable and count
+	std::mutex				mLock;
+	std::condition_variable	mWaitVariable;
+	atomic<int>				mCount { 0 };
+#endif
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/StaticArray.h
+++ b/thirdparty/jolt_physics/Jolt/Core/StaticArray.h
@@ -0,0 +1,329 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/HashCombine.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Simple variable length array backed by a fixed size buffer
+template <class T, uint N>
+class [[nodiscard]] StaticArray
+{
+public:
+	using value_type = T;
+
+	using size_type = uint;
+
+	static constexpr uint Capacity = N;
+
+	/// Default constructor
+						StaticArray() = default;
+
+	/// Constructor from initializer list
+	explicit			StaticArray(std::initializer_list<T> inList)
+	{
+		JPH_ASSERT(inList.size() <= N);
+		for (const T &v : inList)
+			new (reinterpret_cast<T *>(&mElements[mSize++])) T(v);
+	}
+
+	/// Copy constructor
+						StaticArray(const StaticArray<T, N> &inRHS)
+	{
+		while (mSize < inRHS.mSize)
+		{
+			new (&mElements[mSize]) T(inRHS[mSize]);
+			++mSize;
+		}
+	}
+
+	/// Destruct all elements
+						~StaticArray()
+	{
+		if constexpr (!std::is_trivially_destructible<T>())
+			for (T *e = reinterpret_cast<T *>(mElements), *end = e + mSize; e < end; ++e)
+				e->~T();
+	}
+
+	/// Destruct all elements and set length to zero
+	void				clear()
+	{
+		if constexpr (!std::is_trivially_destructible<T>())
+			for (T *e = reinterpret_cast<T *>(mElements), *end = e + mSize; e < end; ++e)
+				e->~T();
+		mSize = 0;
+	}
+
+	/// Add element to the back of the array
+	void				push_back(const T &inElement)
+	{
+		JPH_ASSERT(mSize < N);
+		new (&mElements[mSize++]) T(inElement);
+	}
+
+	/// Construct element at the back of the array
+	template <class... A>
+	void				emplace_back(A &&... inElement)
+	{
+		JPH_ASSERT(mSize < N);
+		new (&mElements[mSize++]) T(std::forward<A>(inElement)...);
+	}
+
+	/// Remove element from the back of the array
+	void				pop_back()
+	{
+		JPH_ASSERT(mSize > 0);
+		reinterpret_cast<T &>(mElements[--mSize]).~T();
+	}
+
+	/// Returns true if there are no elements in the array
+	bool				empty() const
+	{
+		return mSize == 0;
+	}
+
+	/// Returns amount of elements in the array
+	size_type			size() const
+	{
+		return mSize;
+	}
+
+	/// Returns maximum amount of elements the array can hold
+	size_type			capacity() const
+	{
+		return N;
+	}
+
+	/// Resize array to new length
+	void				resize(size_type inNewSize)
+	{
+		JPH_ASSERT(inNewSize <= N);
+		if constexpr (!std::is_trivially_constructible<T>())
+			for (T *element = reinterpret_cast<T *>(mElements) + mSize, *element_end = reinterpret_cast<T *>(mElements) + inNewSize; element < element_end; ++element)
+				new (element) T;
+		if constexpr (!std::is_trivially_destructible<T>())
+			for (T *element = reinterpret_cast<T *>(mElements) + inNewSize, *element_end = reinterpret_cast<T *>(mElements) + mSize; element < element_end; ++element)
+				element->~T();
+		mSize = inNewSize;
+	}
+
+	using const_iterator = const T *;
+
+	/// Iterators
+	const_iterator		begin() const
+	{
+		return reinterpret_cast<const T *>(mElements);
+	}
+
+	const_iterator		end() const
+	{
+		return reinterpret_cast<const T *>(mElements + mSize);
+	}
+
+	using iterator = T *;
+
+	iterator			begin()
+	{
+		return reinterpret_cast<T *>(mElements);
+	}
+
+	iterator			end()
+	{
+		return reinterpret_cast<T *>(mElements + mSize);
+	}
+
+	const T *			data() const
+	{
+		return reinterpret_cast<const T *>(mElements);
+	}
+
+	T *					data()
+	{
+		return reinterpret_cast<T *>(mElements);
+	}
+
+	/// Access element
+	T &					operator [] (size_type inIdx)
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return reinterpret_cast<T &>(mElements[inIdx]);
+	}
+
+	const T &			operator [] (size_type inIdx) const
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return reinterpret_cast<const T &>(mElements[inIdx]);
+	}
+
+	/// Access element
+	T &					at(size_type inIdx)
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return reinterpret_cast<T &>(mElements[inIdx]);
+	}
+
+	const T &			at(size_type inIdx) const
+	{
+		JPH_ASSERT(inIdx < mSize);
+		return reinterpret_cast<const T &>(mElements[inIdx]);
+	}
+
+	/// First element in the array
+	const T &			front() const
+	{
+		JPH_ASSERT(mSize > 0);
+		return reinterpret_cast<const T &>(mElements[0]);
+	}
+
+	T &					front()
+	{
+		JPH_ASSERT(mSize > 0);
+		return reinterpret_cast<T &>(mElements[0]);
+	}
+
+	/// Last element in the array
+	const T &			back() const
+	{
+		JPH_ASSERT(mSize > 0);
+		return reinterpret_cast<const T &>(mElements[mSize - 1]);
+	}
+
+	T &					back()
+	{
+		JPH_ASSERT(mSize > 0);
+		return reinterpret_cast<T &>(mElements[mSize - 1]);
+	}
+
+	/// Remove one element from the array
+	void				erase(const_iterator inIter)
+	{
+		size_type p = size_type(inIter - begin());
+		JPH_ASSERT(p < mSize);
+		reinterpret_cast<T &>(mElements[p]).~T();
+		if (p + 1 < mSize)
+			memmove(mElements + p, mElements + p + 1, (mSize - p - 1) * sizeof(T));
+		--mSize;
+	}
+
+	/// Remove multiple element from the array
+	void				erase(const_iterator inBegin, const_iterator inEnd)
+	{
+		size_type p = size_type(inBegin - begin());
+		size_type n = size_type(inEnd - inBegin);
+		JPH_ASSERT(inEnd <= end());
+		for (size_type i = 0; i < n; ++i)
+			reinterpret_cast<T &>(mElements[p + i]).~T();
+		if (p + n < mSize)
+			memmove(mElements + p, mElements + p + n, (mSize - p - n) * sizeof(T));
+		mSize -= n;
+	}
+
+	/// Assignment operator
+	StaticArray<T, N> &	operator = (const StaticArray<T, N> &inRHS)
+	{
+		size_type rhs_size = inRHS.size();
+
+		if (static_cast<const void *>(this) != static_cast<const void *>(&inRHS))
+		{
+			clear();
+
+			while (mSize < rhs_size)
+			{
+				new (&mElements[mSize]) T(inRHS[mSize]);
+				++mSize;
+			}
+		}
+
+		return *this;
+	}
+
+	/// Assignment operator with static array of different max length
+	template <uint M>
+	StaticArray<T, N> &	operator = (const StaticArray<T, M> &inRHS)
+	{
+		size_type rhs_size = inRHS.size();
+		JPH_ASSERT(rhs_size <= N);
+
+		if (static_cast<const void *>(this) != static_cast<const void *>(&inRHS))
+		{
+			clear();
+
+			while (mSize < rhs_size)
+			{
+				new (&mElements[mSize]) T(inRHS[mSize]);
+				++mSize;
+			}
+		}
+
+		return *this;
+	}
+
+	/// Comparing arrays
+	bool				operator == (const StaticArray<T, N> &inRHS) const
+	{
+		if (mSize != inRHS.mSize)
+			return false;
+		for (size_type i = 0; i < mSize; ++i)
+			if (!(reinterpret_cast<const T &>(mElements[i]) == reinterpret_cast<const T &>(inRHS.mElements[i])))
+				return false;
+		return true;
+	}
+
+	bool				operator != (const StaticArray<T, N> &inRHS) const
+	{
+		if (mSize != inRHS.mSize)
+			return true;
+		for (size_type i = 0; i < mSize; ++i)
+			if (reinterpret_cast<const T &>(mElements[i]) != reinterpret_cast<const T &>(inRHS.mElements[i]))
+				return true;
+		return false;
+	}
+
+	/// Get hash for this array
+	uint64					GetHash() const
+	{
+		// Hash length first
+		uint64 ret = Hash<uint32> { } (uint32(size()));
+
+		// Then hash elements
+		for (const T *element = reinterpret_cast<const T *>(mElements), *element_end = reinterpret_cast<const T *>(mElements) + mSize; element < element_end; ++element)
+			HashCombine(ret, *element);
+
+		return ret;
+	}
+
+protected:
+	struct alignas(T) Storage
+	{
+		uint8			mData[sizeof(T)];
+	};
+
+	static_assert(sizeof(T) == sizeof(Storage), "Mismatch in size");
+	static_assert(alignof(T) == alignof(Storage), "Mismatch in alignment");
+
+	size_type			mSize = 0;
+	Storage				mElements[N];
+};
+
+JPH_NAMESPACE_END
+
+JPH_SUPPRESS_WARNING_PUSH
+JPH_CLANG_SUPPRESS_WARNING("-Wc++98-compat")
+
+namespace std
+{
+	/// Declare std::hash for StaticArray
+	template <class T, JPH::uint N>
+	struct hash<JPH::StaticArray<T, N>>
+	{
+		size_t operator () (const JPH::StaticArray<T, N> &inRHS) const
+		{
+			return std::size_t(inRHS.GetHash());
+		}
+	};
+}
+
+JPH_SUPPRESS_WARNING_POP
--- a/thirdparty/jolt_physics/Jolt/Core/StreamIn.h
+++ b/thirdparty/jolt_physics/Jolt/Core/StreamIn.h
@@ -0,0 +1,120 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Simple binary input stream
+class JPH_EXPORT StreamIn : public NonCopyable
+{
+public:
+	/// Virtual destructor
+	virtual				~StreamIn() = default;
+
+	/// Read a string of bytes from the binary stream
+	virtual void		ReadBytes(void *outData, size_t inNumBytes) = 0;
+
+	/// Returns true when an attempt has been made to read past the end of the file.
+	/// Note that this follows the convention of std::basic_ios::eof which only returns true when an attempt is made to read past the end, not when the read pointer is at the end.
+	virtual bool		IsEOF() const = 0;
+
+	/// Returns true if there was an IO failure
+	virtual bool		IsFailed() const = 0;
+
+	/// Read a primitive (e.g. float, int, etc.) from the binary stream
+	template <class T, std::enable_if_t<std::is_trivially_copyable_v<T>, bool> = true>
+	void				Read(T &outT)
+	{
+		ReadBytes(&outT, sizeof(outT));
+	}
+
+	/// Read a vector of primitives from the binary stream
+	template <class T, class A, std::enable_if_t<std::is_trivially_copyable_v<T>, bool> = true>
+	void				Read(Array<T, A> &outT)
+	{
+		uint32 len = uint32(outT.size()); // Initialize to previous array size, this is used for validation in the StateRecorder class
+		Read(len);
+		if (!IsEOF() && !IsFailed())
+		{
+			outT.resize(len);
+			if constexpr (std::is_same_v<T, Vec3> || std::is_same_v<T, DVec3> || std::is_same_v<T, DMat44>)
+			{
+				// These types have unused components that we don't want to read
+				for (typename Array<T, A>::size_type i = 0; i < len; ++i)
+					Read(outT[i]);
+			}
+			else
+			{
+				// Read all elements at once
+				ReadBytes(outT.data(), len * sizeof(T));
+			}
+		}
+		else
+			outT.clear();
+	}
+
+	/// Read a string from the binary stream (reads the number of characters and then the characters)
+	template <class Type, class Traits, class Allocator>
+	void				Read(std::basic_string<Type, Traits, Allocator> &outString)
+	{
+		uint32 len = 0;
+		Read(len);
+		if (!IsEOF() && !IsFailed())
+		{
+			outString.resize(len);
+			ReadBytes(outString.data(), len * sizeof(Type));
+		}
+		else
+			outString.clear();
+	}
+
+	/// Read a vector of primitives from the binary stream using a custom function to read the elements
+	template <class T, class A, typename F>
+	void				Read(Array<T, A> &outT, const F &inReadElement)
+	{
+		uint32 len = uint32(outT.size()); // Initialize to previous array size, this is used for validation in the StateRecorder class
+		Read(len);
+		if (!IsEOF() && !IsFailed())
+		{
+			outT.resize(len);
+			for (typename Array<T, A>::size_type i = 0; i < len; ++i)
+				inReadElement(*this, outT[i]);
+		}
+		else
+			outT.clear();
+	}
+
+	/// Read a Vec3 (don't read W)
+	void				Read(Vec3 &outVec)
+	{
+		ReadBytes(&outVec, 3 * sizeof(float));
+		outVec = Vec3::sFixW(outVec.mValue);
+	}
+
+	/// Read a DVec3 (don't read W)
+	void				Read(DVec3 &outVec)
+	{
+		ReadBytes(&outVec, 3 * sizeof(double));
+		outVec = DVec3::sFixW(outVec.mValue);
+	}
+
+	/// Read a DMat44 (don't read W component of translation)
+	void				Read(DMat44 &outVec)
+	{
+		Vec4 x, y, z;
+		Read(x);
+		Read(y);
+		Read(z);
+
+		DVec3 t;
+		Read(t);
+
+		outVec = DMat44(x, y, z, t);
+	}
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/StreamOut.h
+++ b/thirdparty/jolt_physics/Jolt/Core/StreamOut.h
@@ -0,0 +1,97 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Simple binary output stream
+class JPH_EXPORT StreamOut : public NonCopyable
+{
+public:
+	/// Virtual destructor
+	virtual				~StreamOut() = default;
+
+	/// Write a string of bytes to the binary stream
+	virtual void		WriteBytes(const void *inData, size_t inNumBytes) = 0;
+
+	/// Returns true if there was an IO failure
+	virtual bool		IsFailed() const = 0;
+
+	/// Write a primitive (e.g. float, int, etc.) to the binary stream
+	template <class T, std::enable_if_t<std::is_trivially_copyable_v<T>, bool> = true>
+	void				Write(const T &inT)
+	{
+		WriteBytes(&inT, sizeof(inT));
+	}
+
+	/// Write a vector of primitives to the binary stream
+	template <class T, class A, std::enable_if_t<std::is_trivially_copyable_v<T>, bool> = true>
+	void				Write(const Array<T, A> &inT)
+	{
+		uint32 len = uint32(inT.size());
+		Write(len);
+		if (!IsFailed())
+		{
+			if constexpr (std::is_same_v<T, Vec3> || std::is_same_v<T, DVec3> || std::is_same_v<T, DMat44>)
+			{
+				// These types have unused components that we don't want to write
+				for (typename Array<T, A>::size_type i = 0; i < len; ++i)
+					Write(inT[i]);
+			}
+			else
+			{
+				// Write all elements at once
+				WriteBytes(inT.data(), len * sizeof(T));
+			}
+		}
+	}
+
+	/// Write a string to the binary stream (writes the number of characters and then the characters)
+	template <class Type, class Traits, class Allocator>
+	void				Write(const std::basic_string<Type, Traits, Allocator> &inString)
+	{
+		uint32 len = uint32(inString.size());
+		Write(len);
+		if (!IsFailed())
+			WriteBytes(inString.data(), len * sizeof(Type));
+	}
+
+	/// Write a vector of primitives to the binary stream using a custom write function
+	template <class T, class A, typename F>
+	void				Write(const Array<T, A> &inT, const F &inWriteElement)
+	{
+		uint32 len = uint32(inT.size());
+		Write(len);
+		if (!IsFailed())
+			for (typename Array<T, A>::size_type i = 0; i < len; ++i)
+				inWriteElement(inT[i], *this);
+	}
+
+	/// Write a Vec3 (don't write W)
+	void				Write(const Vec3 &inVec)
+	{
+		WriteBytes(&inVec, 3 * sizeof(float));
+	}
+
+	/// Write a DVec3 (don't write W)
+	void				Write(const DVec3 &inVec)
+	{
+		WriteBytes(&inVec, 3 * sizeof(double));
+	}
+
+	/// Write a DMat44 (don't write W component of translation)
+	void				Write(const DMat44 &inVec)
+	{
+		Write(inVec.GetColumn4(0));
+		Write(inVec.GetColumn4(1));
+		Write(inVec.GetColumn4(2));
+
+		Write(inVec.GetTranslation());
+	}
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/StreamUtils.h
+++ b/thirdparty/jolt_physics/Jolt/Core/StreamUtils.h
@@ -0,0 +1,168 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/Result.h>
+#include <Jolt/Core/StreamIn.h>
+#include <Jolt/Core/StreamOut.h>
+#include <Jolt/Core/UnorderedMap.h>
+#include <Jolt/Core/Factory.h>
+
+JPH_NAMESPACE_BEGIN
+
+namespace StreamUtils {
+
+template <class Type>
+using ObjectToIDMap = UnorderedMap<const Type *, uint32>;
+
+template <class Type>
+using IDToObjectMap = Array<Ref<Type>>;
+
+// Restore a single object by reading the hash of the type, constructing it and then calling the restore function
+template <class Type>
+Result<Ref<Type>>	RestoreObject(StreamIn &inStream, void (Type::*inRestoreBinaryStateFunction)(StreamIn &))
+{
+	Result<Ref<Type>> result;
+
+	// Read the hash of the type
+	uint32 hash;
+	inStream.Read(hash);
+	if (inStream.IsEOF() || inStream.IsFailed())
+	{
+		result.SetError("Failed to read type hash");
+		return result;
+	}
+
+	// Get the RTTI for the type
+	const RTTI *rtti = Factory::sInstance->Find(hash);
+	if (rtti == nullptr)
+	{
+		result.SetError("Failed to create instance of type");
+		return result;
+	}
+
+	// Construct and read the data of the type
+	Ref<Type> object = reinterpret_cast<Type *>(rtti->CreateObject());
+	(object->*inRestoreBinaryStateFunction)(inStream);
+	if (inStream.IsEOF() || inStream.IsFailed())
+	{
+		result.SetError("Failed to restore object");
+		return result;
+	}
+
+	result.Set(object);
+	return result;
+}
+
+/// Save an object reference to a stream. Uses a map to map objects to IDs which is also used to prevent writing duplicates.
+template <class Type>
+void				SaveObjectReference(StreamOut &inStream, const Type *inObject, ObjectToIDMap<Type> *ioObjectToIDMap)
+{
+	if (ioObjectToIDMap == nullptr || inObject == nullptr)
+	{
+		// Write null ID
+		inStream.Write(~uint32(0));
+	}
+	else
+	{
+		typename ObjectToIDMap<Type>::const_iterator id = ioObjectToIDMap->find(inObject);
+		if (id != ioObjectToIDMap->end())
+		{
+			// Existing object, write ID
+			inStream.Write(id->second);
+		}
+		else
+		{
+			// New object, write the ID
+			uint32 new_id = uint32(ioObjectToIDMap->size());
+			(*ioObjectToIDMap)[inObject] = new_id;
+			inStream.Write(new_id);
+
+			// Write the object
+			inObject->SaveBinaryState(inStream);
+		}
+	}
+}
+
+/// Restore an object reference from stream.
+template <class Type>
+Result<Ref<Type>>	RestoreObjectReference(StreamIn &inStream, IDToObjectMap<Type> &ioIDToObjectMap)
+{
+	Result<Ref<Type>> result;
+
+	// Read id
+	uint32 id = ~uint32(0);
+	inStream.Read(id);
+
+	// Check null
+	if (id == ~uint32(0))
+	{
+		result.Set(nullptr);
+		return result;
+	}
+
+	// Check if it already exists
+	if (id >= ioIDToObjectMap.size())
+	{
+		// New object, restore it
+		result = Type::sRestoreFromBinaryState(inStream);
+		if (result.HasError())
+			return result;
+		JPH_ASSERT(id == ioIDToObjectMap.size());
+		ioIDToObjectMap.push_back(result.Get());
+	}
+	else
+	{
+		// Existing object filter
+		result.Set(ioIDToObjectMap[id].GetPtr());
+	}
+
+	return result;
+}
+
+// Save an array of objects to a stream.
+template <class ArrayType, class ValueType>
+void				SaveObjectArray(StreamOut &inStream, const ArrayType &inArray, ObjectToIDMap<ValueType> *ioObjectToIDMap)
+{
+	uint32 len = uint32(inArray.size());
+	inStream.Write(len);
+	for (const ValueType *value: inArray)
+		SaveObjectReference(inStream, value, ioObjectToIDMap);
+}
+
+// Restore an array of objects from a stream.
+template <class ArrayType, class ValueType>
+Result<ArrayType>	RestoreObjectArray(StreamIn &inStream, IDToObjectMap<ValueType> &ioIDToObjectMap)
+{
+	Result<ArrayType> result;
+
+	uint32 len;
+	inStream.Read(len);
+	if (inStream.IsEOF() || inStream.IsFailed())
+	{
+		result.SetError("Failed to read stream");
+		return result;
+	}
+
+	ArrayType values;
+	values.reserve(len);
+	for (size_t i = 0; i < len; ++i)
+	{
+		Result value = RestoreObjectReference(inStream, ioIDToObjectMap);
+		if (value.HasError())
+		{
+			result.SetError(value.GetError());
+			return result;
+		}
+		values.push_back(std::move(value.Get()));
+	}
+
+	result.Set(values);
+	return result;
+}
+
+} // StreamUtils
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/StreamWrapper.h
+++ b/thirdparty/jolt_physics/Jolt/Core/StreamWrapper.h
@@ -0,0 +1,53 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/StreamIn.h>
+#include <Jolt/Core/StreamOut.h>
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <ostream>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+/// Wrapper around std::ostream
+class StreamOutWrapper : public StreamOut
+{
+public:
+	/// Constructor
+						StreamOutWrapper(ostream &ioWrapped)						: mWrapped(ioWrapped) { }
+
+	/// Write a string of bytes to the binary stream
+	virtual void		WriteBytes(const void *inData, size_t inNumBytes) override	{ mWrapped.write((const char *)inData, inNumBytes); }
+
+	/// Returns true if there was an IO failure
+	virtual bool		IsFailed() const override									{ return mWrapped.fail(); }
+
+private:
+	ostream &			mWrapped;
+};
+
+/// Wrapper around std::istream
+class StreamInWrapper : public StreamIn
+{
+public:
+	/// Constructor
+						StreamInWrapper(istream &ioWrapped)							: mWrapped(ioWrapped) { }
+
+	/// Write a string of bytes to the binary stream
+	virtual void		ReadBytes(void *outData, size_t inNumBytes) override		{ mWrapped.read((char *)outData, inNumBytes); }
+
+	/// Returns true when an attempt has been made to read past the end of the file
+	virtual bool		IsEOF() const override										{ return mWrapped.eof(); }
+
+	/// Returns true if there was an IO failure
+	virtual bool		IsFailed() const override									{ return mWrapped.fail(); }
+
+private:
+	istream &			mWrapped;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/StridedPtr.h
+++ b/thirdparty/jolt_physics/Jolt/Core/StridedPtr.h
@@ -0,0 +1,63 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// A strided pointer behaves exactly like a normal pointer except that the
+/// elements that the pointer points to can be part of a larger structure.
+/// The stride gives the number of bytes from one element to the next.
+template <class T>
+class JPH_EXPORT StridedPtr
+{
+public:
+	using value_type = T;
+
+	/// Constructors
+							StridedPtr() = default;
+							StridedPtr(const StridedPtr &inRHS) = default;
+							StridedPtr(T *inPtr, int inStride = sizeof(T))			: mPtr(const_cast<uint8 *>(reinterpret_cast<const uint8 *>(inPtr))), mStride(inStride) { }
+
+	/// Assignment
+	inline StridedPtr &		operator = (const StridedPtr &inRHS) = default;
+
+	/// Incrementing / decrementing
+	inline StridedPtr &		operator ++ ()											{ mPtr += mStride; return *this; }
+	inline StridedPtr &		operator -- ()											{ mPtr -= mStride; return *this; }
+	inline StridedPtr		operator ++ (int)										{ StridedPtr old_ptr(*this); mPtr += mStride; return old_ptr; }
+	inline StridedPtr		operator -- (int)										{ StridedPtr old_ptr(*this); mPtr -= mStride; return old_ptr; }
+	inline StridedPtr		operator + (int inOffset) const							{ StridedPtr new_ptr(*this); new_ptr.mPtr += inOffset * mStride; return new_ptr; }
+	inline StridedPtr		operator - (int inOffset) const							{ StridedPtr new_ptr(*this); new_ptr.mPtr -= inOffset * mStride; return new_ptr; }
+	inline void				operator += (int inOffset)								{ mPtr += inOffset * mStride; }
+	inline void				operator -= (int inOffset)								{ mPtr -= inOffset * mStride; }
+
+	/// Distance between two pointers in elements
+	inline int				operator - (const StridedPtr &inRHS) const				{ JPH_ASSERT(inRHS.mStride == mStride); return (mPtr - inRHS.mPtr) / mStride; }
+
+	/// Comparison operators
+	inline bool				operator == (const StridedPtr &inRHS) const				{ return mPtr == inRHS.mPtr; }
+	inline bool				operator != (const StridedPtr &inRHS) const				{ return mPtr != inRHS.mPtr; }
+	inline bool				operator <= (const StridedPtr &inRHS) const				{ return mPtr <= inRHS.mPtr; }
+	inline bool				operator >= (const StridedPtr &inRHS) const				{ return mPtr >= inRHS.mPtr; }
+	inline bool				operator <  (const StridedPtr &inRHS) const				{ return mPtr <  inRHS.mPtr; }
+	inline bool				operator >  (const StridedPtr &inRHS) const				{ return mPtr >  inRHS.mPtr; }
+
+	/// Access value
+	inline T &				operator * () const										{ return *reinterpret_cast<T *>(mPtr); }
+	inline T *				operator -> () const									{ return reinterpret_cast<T *>(mPtr); }
+	inline T &				operator [] (int inOffset) const						{ uint8 *ptr = mPtr + inOffset * mStride; return *reinterpret_cast<T *>(ptr); }
+
+	/// Explicit conversion
+	inline T *				GetPtr() const											{ return reinterpret_cast<T *>(mPtr); }
+
+	/// Get stride in bytes
+	inline int				GetStride() const										{ return mStride; }
+
+private:
+	uint8 *					mPtr = nullptr;											/// Pointer to element
+	int						mStride = 0;											/// Stride (number of bytes) between elements
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/StringTools.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/StringTools.cpp
@@ -0,0 +1,101 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/StringTools.h>
+
+JPH_SUPPRESS_WARNINGS_STD_BEGIN
+#include <cstdarg>
+JPH_SUPPRESS_WARNINGS_STD_END
+
+JPH_NAMESPACE_BEGIN
+
+String StringFormat(const char *inFMT, ...)
+{
+	char buffer[1024];
+
+	// Format the string
+	va_list list;
+	va_start(list, inFMT);
+	vsnprintf(buffer, sizeof(buffer), inFMT, list);
+	va_end(list);
+
+	return String(buffer);
+}
+
+void StringReplace(String &ioString, const string_view &inSearch, const string_view &inReplace)
+{
+	size_t index = 0;
+	for (;;)
+	{
+		 index = ioString.find(inSearch, index);
+		 if (index == String::npos)
+			 break;
+
+		 ioString.replace(index, inSearch.size(), inReplace);
+
+		 index += inReplace.size();
+	}
+}
+
+void StringToVector(const string_view &inString, Array<String> &outVector, const string_view &inDelimiter, bool inClearVector)
+{
+	JPH_ASSERT(inDelimiter.size() > 0);
+
+	// Ensure vector empty
+	if (inClearVector)
+		outVector.clear();
+
+	// No string? no elements
+	if (inString.empty())
+		return;
+
+	// Start with initial string
+	String s(inString);
+
+	// Add to vector while we have a delimiter
+	size_t i;
+	while (!s.empty() && (i = s.find(inDelimiter)) != String::npos)
+	{
+		outVector.push_back(s.substr(0, i));
+		s.erase(0, i + inDelimiter.length());
+	}
+
+	// Add final element
+	outVector.push_back(s);
+}
+
+void VectorToString(const Array<String> &inVector, String &outString, const string_view &inDelimiter)
+{
+	// Ensure string empty
+	outString.clear();
+
+	for (const String &s : inVector)
+	{
+		// Add delimiter if not first element
+		if (!outString.empty())
+			outString.append(inDelimiter);
+
+		// Add element
+		outString.append(s);
+	}
+}
+
+String ToLower(const string_view &inString)
+{
+	String out;
+	out.reserve(inString.length());
+	for (char c : inString)
+		out.push_back((char)tolower(c));
+	return out;
+}
+
+const char *NibbleToBinary(uint32 inNibble)
+{
+	static const char *nibbles[] = { "0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111" };
+	return nibbles[inNibble & 0xf];
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/StringTools.h
+++ b/thirdparty/jolt_physics/Jolt/Core/StringTools.h
@@ -0,0 +1,38 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Create a formatted text string for debugging purposes.
+/// Note that this function has an internal buffer of 1024 characters, so long strings will be trimmed.
+JPH_EXPORT String StringFormat(const char *inFMT, ...);
+
+/// Convert type to string
+template<typename T>
+String ConvertToString(const T &inValue)
+{
+	using OStringStream = std::basic_ostringstream<char, std::char_traits<char>, STLAllocator<char>>;
+	OStringStream oss;
+	oss << inValue;
+	return oss.str();
+}
+
+/// Replace substring with other string
+JPH_EXPORT void StringReplace(String &ioString, const string_view &inSearch, const string_view &inReplace);
+
+/// Convert a delimited string to an array of strings
+JPH_EXPORT void StringToVector(const string_view &inString, Array<String> &outVector, const string_view &inDelimiter = ",", bool inClearVector = true);
+
+/// Convert an array strings to a delimited string
+JPH_EXPORT void VectorToString(const Array<String> &inVector, String &outString, const string_view &inDelimiter = ",");
+
+/// Convert a string to lower case
+JPH_EXPORT String ToLower(const string_view &inString);
+
+/// Converts the lower 4 bits of inNibble to a string that represents the number in binary format
+JPH_EXPORT const char *NibbleToBinary(uint32 inNibble);
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/TempAllocator.h
+++ b/thirdparty/jolt_physics/Jolt/Core/TempAllocator.h
@@ -0,0 +1,188 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Allocator for temporary allocations.
+/// This allocator works as a stack: The blocks must always be freed in the reverse order as they are allocated.
+/// Note that allocations and frees can take place from different threads, but the order is guaranteed though
+/// job dependencies, so it is not needed to use any form of locking.
+class JPH_EXPORT TempAllocator : public NonCopyable
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Destructor
+	virtual							~TempAllocator() = default;
+
+	/// Allocates inSize bytes of memory, returned memory address must be JPH_RVECTOR_ALIGNMENT byte aligned
+	virtual void *					Allocate(uint inSize) = 0;
+
+	/// Frees inSize bytes of memory located at inAddress
+	virtual void					Free(void *inAddress, uint inSize) = 0;
+};
+
+/// Default implementation of the temp allocator that allocates a large block through malloc upfront
+class JPH_EXPORT TempAllocatorImpl final : public TempAllocator
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructs the allocator with a maximum allocatable size of inSize
+	explicit						TempAllocatorImpl(size_t inSize) :
+		mBase(static_cast<uint8 *>(AlignedAllocate(inSize, JPH_RVECTOR_ALIGNMENT))),
+		mSize(inSize)
+	{
+	}
+
+	/// Destructor, frees the block
+	virtual							~TempAllocatorImpl() override
+	{
+		JPH_ASSERT(mTop == 0);
+		AlignedFree(mBase);
+	}
+
+	// See: TempAllocator
+	virtual void *					Allocate(uint inSize) override
+	{
+		if (inSize == 0)
+		{
+			return nullptr;
+		}
+		else
+		{
+			size_t new_top = mTop + AlignUp(inSize, JPH_RVECTOR_ALIGNMENT);
+			if (new_top > mSize)
+			{
+				Trace("TempAllocator: Out of memory trying to allocate %u bytes", inSize);
+				std::abort();
+			}
+			void *address = mBase + mTop;
+			mTop = new_top;
+			return address;
+		}
+	}
+
+	// See: TempAllocator
+	virtual void					Free(void *inAddress, uint inSize) override
+	{
+		if (inAddress == nullptr)
+		{
+			JPH_ASSERT(inSize == 0);
+		}
+		else
+		{
+			mTop -= AlignUp(inSize, JPH_RVECTOR_ALIGNMENT);
+			if (mBase + mTop != inAddress)
+			{
+				Trace("TempAllocator: Freeing in the wrong order");
+				std::abort();
+			}
+		}
+	}
+
+	/// Check if no allocations have been made
+	bool							IsEmpty() const
+	{
+		return mTop == 0;
+	}
+
+	/// Get the total size of the fixed buffer
+	size_t							GetSize() const
+	{
+		return mSize;
+	}
+
+	/// Get current usage in bytes of the buffer
+	size_t							GetUsage() const
+	{
+		return mTop;
+	}
+
+	/// Check if an allocation of inSize can be made in this fixed buffer allocator
+	bool							CanAllocate(uint inSize) const
+	{
+		return mTop + AlignUp(inSize, JPH_RVECTOR_ALIGNMENT) <= mSize;
+	}
+
+	/// Check if memory block at inAddress is owned by this allocator
+	bool							OwnsMemory(const void *inAddress) const
+	{
+		return inAddress >= mBase && inAddress < mBase + mSize;
+	}
+
+private:
+	uint8 *							mBase;							///< Base address of the memory block
+	size_t							mSize;							///< Size of the memory block
+	size_t							mTop = 0;						///< End of currently allocated area
+};
+
+/// Implementation of the TempAllocator that just falls back to malloc/free
+/// Note: This can be quite slow when running in the debugger as large memory blocks need to be initialized with 0xcd
+class JPH_EXPORT TempAllocatorMalloc final : public TempAllocator
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	// See: TempAllocator
+	virtual void *					Allocate(uint inSize) override
+	{
+		return inSize > 0? AlignedAllocate(inSize, JPH_RVECTOR_ALIGNMENT) : nullptr;
+	}
+
+	// See: TempAllocator
+	virtual void					Free(void *inAddress, [[maybe_unused]] uint inSize) override
+	{
+		if (inAddress != nullptr)
+			AlignedFree(inAddress);
+	}
+};
+
+/// Implementation of the TempAllocator that tries to allocate from a large preallocated block, but falls back to malloc when it is exhausted
+class JPH_EXPORT TempAllocatorImplWithMallocFallback final : public TempAllocator
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructs the allocator with an initial fixed block if inSize
+	explicit						TempAllocatorImplWithMallocFallback(uint inSize) :
+		mAllocator(inSize)
+	{
+	}
+
+	// See: TempAllocator
+	virtual void *					Allocate(uint inSize) override
+	{
+		if (mAllocator.CanAllocate(inSize))
+			return mAllocator.Allocate(inSize);
+		else
+			return mFallbackAllocator.Allocate(inSize);
+	}
+
+	// See: TempAllocator
+	virtual void					Free(void *inAddress, uint inSize) override
+	{
+		if (inAddress == nullptr)
+		{
+			JPH_ASSERT(inSize == 0);
+		}
+		else
+		{
+			if (mAllocator.OwnsMemory(inAddress))
+				mAllocator.Free(inAddress, inSize);
+			else
+				mFallbackAllocator.Free(inAddress, inSize);
+		}
+	}
+
+private:
+	TempAllocatorImpl				mAllocator;
+	TempAllocatorMalloc				mFallbackAllocator;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/TickCounter.cpp
+++ b/thirdparty/jolt_physics/Jolt/Core/TickCounter.cpp
@@ -0,0 +1,36 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Core/TickCounter.h>
+
+#if defined(JPH_PLATFORM_WINDOWS)
+	JPH_SUPPRESS_WARNING_PUSH
+	JPH_MSVC_SUPPRESS_WARNING(5039) // winbase.h(13179): warning C5039: 'TpSetCallbackCleanupGroup': pointer or reference to potentially throwing function passed to 'extern "C"' function under -EHc. Undefined behavior may occur if this function throws an exception.
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
+#ifndef JPH_COMPILER_MINGW
+	#include <Windows.h>
+#else
+	#include <windows.h>
+#endif
+	JPH_SUPPRESS_WARNING_POP
+#endif
+
+JPH_NAMESPACE_BEGIN
+
+#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM))
+
+uint64 GetProcessorTickCount()
+{
+	LARGE_INTEGER count;
+	QueryPerformanceCounter(&count);
+	return uint64(count.QuadPart);
+}
+
+#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM)
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/TickCounter.h
+++ b/thirdparty/jolt_physics/Jolt/Core/TickCounter.h
@@ -0,0 +1,47 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+// Include for __rdtsc
+#if defined(JPH_PLATFORM_WINDOWS)
+	#include <intrin.h>
+#elif defined(JPH_CPU_X86) && defined(JPH_COMPILER_GCC)
+	#include <x86intrin.h>
+#elif defined(JPH_CPU_E2K)
+	#include <x86intrin.h>
+#endif
+
+JPH_NAMESPACE_BEGIN
+
+#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM))
+
+/// Functionality to get the processors cycle counter
+uint64 GetProcessorTickCount(); // Not inline to avoid having to include Windows.h
+
+#else
+
+/// Functionality to get the processors cycle counter
+JPH_INLINE uint64 GetProcessorTickCount()
+{
+#if defined(JPH_PLATFORM_BLUE)
+	return JPH_PLATFORM_BLUE_GET_TICKS();
+#elif defined(JPH_CPU_X86)
+	return __rdtsc();
+#elif defined(JPH_CPU_E2K)
+	return __rdtsc();
+#elif defined(JPH_CPU_ARM) && defined(JPH_USE_NEON)
+	uint64 val;
+	asm volatile("mrs %0, cntvct_el0" : "=r" (val));
+	return val;
+#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_WASM) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
+	return 0; // Not supported
+#else
+	#error Undefined
+#endif
+}
+
+#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM)
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/UnorderedMap.h
+++ b/thirdparty/jolt_physics/Jolt/Core/UnorderedMap.h
@@ -0,0 +1,80 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/HashTable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Internal helper class to provide context for UnorderedMap
+template <class Key, class Value>
+class UnorderedMapDetail
+{
+public:
+	/// Get key from key value pair
+	static const Key &			sGetKey(const std::pair<Key, Value> &inKeyValue)
+	{
+		return inKeyValue.first;
+	}
+};
+
+/// Hash Map class
+/// @tparam Key Key type
+/// @tparam Value Value type
+/// @tparam Hash Hash function (note should be 64-bits)
+/// @tparam KeyEqual Equality comparison function
+template <class Key, class Value, class Hash = JPH::Hash<Key>, class KeyEqual = std::equal_to<Key>>
+class UnorderedMap : public HashTable<Key, std::pair<Key, Value>, UnorderedMapDetail<Key, Value>, Hash, KeyEqual>
+{
+	using Base = HashTable<Key, std::pair<Key, Value>, UnorderedMapDetail<Key, Value>, Hash, KeyEqual>;
+
+public:
+	using size_type = typename Base::size_type;
+	using iterator = typename Base::iterator;
+	using const_iterator = typename Base::const_iterator;
+	using value_type = typename Base::value_type;
+
+	Value &						operator [] (const Key &inKey)
+	{
+		size_type index;
+		bool inserted = this->InsertKey(inKey, index);
+		value_type &key_value = this->GetElement(index);
+		if (inserted)
+			new (&key_value) value_type(inKey, Value());
+		return key_value.second;
+	}
+
+	template<class... Args>
+	std::pair<iterator, bool>	try_emplace(const Key &inKey, Args &&...inArgs)
+	{
+		size_type index;
+		bool inserted = this->InsertKey(inKey, index);
+		if (inserted)
+			new (&this->GetElement(index)) value_type(std::piecewise_construct, std::forward_as_tuple(inKey), std::forward_as_tuple(std::forward<Args>(inArgs)...));
+		return std::make_pair(iterator(this, index), inserted);
+	}
+
+	template<class... Args>
+	std::pair<iterator, bool>	try_emplace(Key &&inKey, Args &&...inArgs)
+	{
+		size_type index;
+		bool inserted = this->InsertKey(inKey, index);
+		if (inserted)
+			new (&this->GetElement(index)) value_type(std::piecewise_construct, std::forward_as_tuple(std::move(inKey)), std::forward_as_tuple(std::forward<Args>(inArgs)...));
+		return std::make_pair(iterator(this, index), inserted);
+	}
+
+	/// Const version of find
+	using Base::find;
+
+	/// Non-const version of find
+	iterator					find(const Key &inKey)
+	{
+		const_iterator it = Base::find(inKey);
+		return iterator(this, it.mIndex);
+	}
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Core/UnorderedSet.h
+++ b/thirdparty/jolt_physics/Jolt/Core/UnorderedSet.h
@@ -0,0 +1,32 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2024 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/HashTable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Internal helper class to provide context for UnorderedSet
+template <class Key>
+class UnorderedSetDetail
+{
+public:
+	/// The key is the key, just return it
+	static const Key &		sGetKey(const Key &inKey)
+	{
+		return inKey;
+	}
+};
+
+/// Hash Set class
+/// @tparam Key Key type
+/// @tparam Hash Hash function (note should be 64-bits)
+/// @tparam KeyEqual Equality comparison function
+template <class Key, class Hash = JPH::Hash<Key>, class KeyEqual = std::equal_to<Key>>
+class UnorderedSet : public HashTable<Key, Key, UnorderedSetDetail<Key>, Hash, KeyEqual>
+{
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/AABox.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/AABox.h
@@ -0,0 +1,313 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/Triangle.h>
+#include <Jolt/Geometry/IndexedTriangle.h>
+#include <Jolt/Geometry/Plane.h>
+#include <Jolt/Math/Mat44.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Axis aligned box
+class [[nodiscard]] AABox
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+					AABox()												: mMin(Vec3::sReplicate(FLT_MAX)), mMax(Vec3::sReplicate(-FLT_MAX)) { }
+					AABox(Vec3Arg inMin, Vec3Arg inMax)					: mMin(inMin), mMax(inMax) { }
+					AABox(DVec3Arg inMin, DVec3Arg inMax)				: mMin(inMin.ToVec3RoundDown()), mMax(inMax.ToVec3RoundUp()) { }
+					AABox(Vec3Arg inCenter, float inRadius)				: mMin(inCenter - Vec3::sReplicate(inRadius)), mMax(inCenter + Vec3::sReplicate(inRadius)) { }
+
+	/// Create box from 2 points
+	static AABox	sFromTwoPoints(Vec3Arg inP1, Vec3Arg inP2)			{ return AABox(Vec3::sMin(inP1, inP2), Vec3::sMax(inP1, inP2)); }
+
+	/// Create box from indexed triangle
+	static AABox	sFromTriangle(const VertexList &inVertices, const IndexedTriangle &inTriangle)
+	{
+		AABox box = sFromTwoPoints(Vec3(inVertices[inTriangle.mIdx[0]]), Vec3(inVertices[inTriangle.mIdx[1]]));
+		box.Encapsulate(Vec3(inVertices[inTriangle.mIdx[2]]));
+		return box;
+	}
+
+	/// Get bounding box of size FLT_MAX
+	static AABox	sBiggest()
+	{
+		/// Max half extent of AABox is 0.5 * FLT_MAX so that GetSize() remains finite
+		return AABox(Vec3::sReplicate(-0.5f * FLT_MAX), Vec3::sReplicate(0.5f * FLT_MAX));
+	}
+
+	/// Comparison operators
+	bool			operator == (const AABox &inRHS) const				{ return mMin == inRHS.mMin && mMax == inRHS.mMax; }
+	bool			operator != (const AABox &inRHS) const				{ return mMin != inRHS.mMin || mMax != inRHS.mMax; }
+
+	/// Reset the bounding box to an empty bounding box
+	void			SetEmpty()
+	{
+		mMin = Vec3::sReplicate(FLT_MAX);
+		mMax = Vec3::sReplicate(-FLT_MAX);
+	}
+
+	/// Check if the bounding box is valid (max >= min)
+	bool			IsValid() const
+	{
+		return mMin.GetX() <= mMax.GetX() && mMin.GetY() <= mMax.GetY() && mMin.GetZ() <= mMax.GetZ();
+	}
+
+	/// Encapsulate point in bounding box
+	void			Encapsulate(Vec3Arg inPos)
+	{
+		mMin = Vec3::sMin(mMin, inPos);
+		mMax = Vec3::sMax(mMax, inPos);
+	}
+
+	/// Encapsulate bounding box in bounding box
+	void			Encapsulate(const AABox &inRHS)
+	{
+		mMin = Vec3::sMin(mMin, inRHS.mMin);
+		mMax = Vec3::sMax(mMax, inRHS.mMax);
+	}
+
+	/// Encapsulate triangle in bounding box
+	void			Encapsulate(const Triangle &inRHS)
+	{
+		Vec3 v = Vec3::sLoadFloat3Unsafe(inRHS.mV[0]);
+		Encapsulate(v);
+		v = Vec3::sLoadFloat3Unsafe(inRHS.mV[1]);
+		Encapsulate(v);
+		v = Vec3::sLoadFloat3Unsafe(inRHS.mV[2]);
+		Encapsulate(v);
+	}
+
+	/// Encapsulate triangle in bounding box
+	void			Encapsulate(const VertexList &inVertices, const IndexedTriangle &inTriangle)
+	{
+		for (uint32 idx : inTriangle.mIdx)
+			Encapsulate(Vec3(inVertices[idx]));
+	}
+
+	/// Intersect this bounding box with inOther, returns the intersection
+	AABox			Intersect(const AABox &inOther) const
+	{
+		return AABox(Vec3::sMax(mMin, inOther.mMin), Vec3::sMin(mMax, inOther.mMax));
+	}
+
+	/// Make sure that each edge of the bounding box has a minimal length
+	void			EnsureMinimalEdgeLength(float inMinEdgeLength)
+	{
+		Vec3 min_length = Vec3::sReplicate(inMinEdgeLength);
+		mMax = Vec3::sSelect(mMax, mMin + min_length, Vec3::sLess(mMax - mMin, min_length));
+	}
+
+	/// Widen the box on both sides by inVector
+	void			ExpandBy(Vec3Arg inVector)
+	{
+		mMin -= inVector;
+		mMax += inVector;
+	}
+
+	/// Get center of bounding box
+	Vec3			GetCenter() const
+	{
+		return 0.5f * (mMin + mMax);
+	}
+
+	/// Get extent of bounding box (half of the size)
+	Vec3			GetExtent() const
+	{
+		return 0.5f * (mMax - mMin);
+	}
+
+	/// Get size of bounding box
+	Vec3			GetSize() const
+	{
+		return mMax - mMin;
+	}
+
+	/// Get surface area of bounding box
+	float			GetSurfaceArea() const
+	{
+		Vec3 extent = mMax - mMin;
+		return 2.0f * (extent.GetX() * extent.GetY() + extent.GetX() * extent.GetZ() + extent.GetY() * extent.GetZ());
+	}
+
+	/// Get volume of bounding box
+	float			GetVolume() const
+	{
+		Vec3 extent = mMax - mMin;
+		return extent.GetX() * extent.GetY() * extent.GetZ();
+	}
+
+	/// Check if this box contains another box
+	bool			Contains(const AABox &inOther) const
+	{
+		return UVec4::sAnd(Vec3::sLessOrEqual(mMin, inOther.mMin), Vec3::sGreaterOrEqual(mMax, inOther.mMax)).TestAllXYZTrue();
+	}
+
+	/// Check if this box contains a point
+	bool			Contains(Vec3Arg inOther) const
+	{
+		return UVec4::sAnd(Vec3::sLessOrEqual(mMin, inOther), Vec3::sGreaterOrEqual(mMax, inOther)).TestAllXYZTrue();
+	}
+
+	/// Check if this box contains a point
+	bool			Contains(DVec3Arg inOther) const
+	{
+		return Contains(Vec3(inOther));
+	}
+
+	/// Check if this box overlaps with another box
+	bool			Overlaps(const AABox &inOther) const
+	{
+		return !UVec4::sOr(Vec3::sGreater(mMin, inOther.mMax), Vec3::sLess(mMax, inOther.mMin)).TestAnyXYZTrue();
+	}
+
+	/// Check if this box overlaps with a plane
+	bool			Overlaps(const Plane &inPlane) const
+	{
+		Vec3 normal = inPlane.GetNormal();
+		float dist_normal = inPlane.SignedDistance(GetSupport(normal));
+		float dist_min_normal = inPlane.SignedDistance(GetSupport(-normal));
+		return dist_normal * dist_min_normal <= 0.0f; // If both support points are on the same side of the plane we don't overlap
+	}
+
+	/// Translate bounding box
+	void			Translate(Vec3Arg inTranslation)
+	{
+		mMin += inTranslation;
+		mMax += inTranslation;
+	}
+
+	/// Translate bounding box
+	void			Translate(DVec3Arg inTranslation)
+	{
+		mMin = (DVec3(mMin) + inTranslation).ToVec3RoundDown();
+		mMax = (DVec3(mMax) + inTranslation).ToVec3RoundUp();
+	}
+
+	/// Transform bounding box
+	AABox			Transformed(Mat44Arg inMatrix) const
+	{
+		// Start with the translation of the matrix
+		Vec3 new_min, new_max;
+		new_min = new_max = inMatrix.GetTranslation();
+
+		// Now find the extreme points by considering the product of the min and max with each column of inMatrix
+		for (int c = 0; c < 3; ++c)
+		{
+			Vec3 col = inMatrix.GetColumn3(c);
+
+			Vec3 a = col * mMin[c];
+			Vec3 b = col * mMax[c];
+
+			new_min += Vec3::sMin(a, b);
+			new_max += Vec3::sMax(a, b);
+		}
+
+		// Return the new bounding box
+		return AABox(new_min, new_max);
+	}
+
+	/// Transform bounding box
+	AABox			Transformed(DMat44Arg inMatrix) const
+	{
+		AABox transformed = Transformed(inMatrix.GetRotation());
+		transformed.Translate(inMatrix.GetTranslation());
+		return transformed;
+	}
+
+	/// Scale this bounding box, can handle non-uniform and negative scaling
+	AABox			Scaled(Vec3Arg inScale) const
+	{
+		return AABox::sFromTwoPoints(mMin * inScale, mMax * inScale);
+	}
+
+	/// Calculate the support vector for this convex shape.
+	Vec3			GetSupport(Vec3Arg inDirection) const
+	{
+		return Vec3::sSelect(mMax, mMin, Vec3::sLess(inDirection, Vec3::sZero()));
+	}
+
+	/// Get the vertices of the face that faces inDirection the most
+	template <class VERTEX_ARRAY>
+	void			GetSupportingFace(Vec3Arg inDirection, VERTEX_ARRAY &outVertices) const
+	{
+		outVertices.resize(4);
+
+		int axis = inDirection.Abs().GetHighestComponentIndex();
+		if (inDirection[axis] < 0.0f)
+		{
+			switch (axis)
+			{
+			case 0:
+				outVertices[0] = Vec3(mMax.GetX(), mMin.GetY(), mMin.GetZ());
+				outVertices[1] = Vec3(mMax.GetX(), mMax.GetY(), mMin.GetZ());
+				outVertices[2] = Vec3(mMax.GetX(), mMax.GetY(), mMax.GetZ());
+				outVertices[3] = Vec3(mMax.GetX(), mMin.GetY(), mMax.GetZ());
+				break;
+
+			case 1:
+				outVertices[0] = Vec3(mMin.GetX(), mMax.GetY(), mMin.GetZ());
+				outVertices[1] = Vec3(mMin.GetX(), mMax.GetY(), mMax.GetZ());
+				outVertices[2] = Vec3(mMax.GetX(), mMax.GetY(), mMax.GetZ());
+				outVertices[3] = Vec3(mMax.GetX(), mMax.GetY(), mMin.GetZ());
+				break;
+
+			case 2:
+				outVertices[0] = Vec3(mMin.GetX(), mMin.GetY(), mMax.GetZ());
+				outVertices[1] = Vec3(mMax.GetX(), mMin.GetY(), mMax.GetZ());
+				outVertices[2] = Vec3(mMax.GetX(), mMax.GetY(), mMax.GetZ());
+				outVertices[3] = Vec3(mMin.GetX(), mMax.GetY(), mMax.GetZ());
+				break;
+			}
+		}
+		else
+		{
+			switch (axis)
+			{
+			case 0:
+				outVertices[0] = Vec3(mMin.GetX(), mMin.GetY(), mMin.GetZ());
+				outVertices[1] = Vec3(mMin.GetX(), mMin.GetY(), mMax.GetZ());
+				outVertices[2] = Vec3(mMin.GetX(), mMax.GetY(), mMax.GetZ());
+				outVertices[3] = Vec3(mMin.GetX(), mMax.GetY(), mMin.GetZ());
+				break;
+
+			case 1:
+				outVertices[0] = Vec3(mMin.GetX(), mMin.GetY(), mMin.GetZ());
+				outVertices[1] = Vec3(mMax.GetX(), mMin.GetY(), mMin.GetZ());
+				outVertices[2] = Vec3(mMax.GetX(), mMin.GetY(), mMax.GetZ());
+				outVertices[3] = Vec3(mMin.GetX(), mMin.GetY(), mMax.GetZ());
+				break;
+
+			case 2:
+				outVertices[0] = Vec3(mMin.GetX(), mMin.GetY(), mMin.GetZ());
+				outVertices[1] = Vec3(mMin.GetX(), mMax.GetY(), mMin.GetZ());
+				outVertices[2] = Vec3(mMax.GetX(), mMax.GetY(), mMin.GetZ());
+				outVertices[3] = Vec3(mMax.GetX(), mMin.GetY(), mMin.GetZ());
+				break;
+			}
+		}
+	}
+
+	/// Get the closest point on or in this box to inPoint
+	Vec3			GetClosestPoint(Vec3Arg inPoint) const
+	{
+		return Vec3::sMin(Vec3::sMax(inPoint, mMin), mMax);
+	}
+
+	/// Get the squared distance between inPoint and this box (will be 0 if in Point is inside the box)
+	inline float	GetSqDistanceTo(Vec3Arg inPoint) const
+	{
+		return (GetClosestPoint(inPoint) - inPoint).LengthSq();
+	}
+
+	/// Bounding box min and max
+	Vec3			mMin;
+	Vec3			mMax;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/AABox4.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/AABox4.h
@@ -0,0 +1,224 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/OrientedBox.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Helper functions that process 4 axis aligned boxes at the same time using SIMD
+/// Test if 4 bounding boxes overlap with 1 bounding box, splat 1 box
+JPH_INLINE UVec4 AABox4VsBox(const AABox &inBox1, Vec4Arg inBox2MinX, Vec4Arg inBox2MinY, Vec4Arg inBox2MinZ, Vec4Arg inBox2MaxX, Vec4Arg inBox2MaxY, Vec4Arg inBox2MaxZ)
+{
+	// Splat values of box 1
+	Vec4 box1_minx = inBox1.mMin.SplatX();
+	Vec4 box1_miny = inBox1.mMin.SplatY();
+	Vec4 box1_minz = inBox1.mMin.SplatZ();
+	Vec4 box1_maxx = inBox1.mMax.SplatX();
+	Vec4 box1_maxy = inBox1.mMax.SplatY();
+	Vec4 box1_maxz = inBox1.mMax.SplatZ();
+
+	// Test separation over each axis
+	UVec4 nooverlapx = UVec4::sOr(Vec4::sGreater(box1_minx, inBox2MaxX), Vec4::sGreater(inBox2MinX, box1_maxx));
+	UVec4 nooverlapy = UVec4::sOr(Vec4::sGreater(box1_miny, inBox2MaxY), Vec4::sGreater(inBox2MinY, box1_maxy));
+	UVec4 nooverlapz = UVec4::sOr(Vec4::sGreater(box1_minz, inBox2MaxZ), Vec4::sGreater(inBox2MinZ, box1_maxz));
+
+	// Return overlap
+	return UVec4::sNot(UVec4::sOr(UVec4::sOr(nooverlapx, nooverlapy), nooverlapz));
+}
+
+/// Scale 4 axis aligned boxes
+JPH_INLINE void AABox4Scale(Vec3Arg inScale, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ, Vec4 &outBoundsMinX, Vec4 &outBoundsMinY, Vec4 &outBoundsMinZ, Vec4 &outBoundsMaxX, Vec4 &outBoundsMaxY, Vec4 &outBoundsMaxZ)
+{
+	Vec4 scale_x = inScale.SplatX();
+	Vec4 scaled_min_x = scale_x * inBoxMinX;
+	Vec4 scaled_max_x = scale_x * inBoxMaxX;
+	outBoundsMinX = Vec4::sMin(scaled_min_x, scaled_max_x); // Negative scale can flip min and max
+	outBoundsMaxX = Vec4::sMax(scaled_min_x, scaled_max_x);
+
+	Vec4 scale_y = inScale.SplatY();
+	Vec4 scaled_min_y = scale_y * inBoxMinY;
+	Vec4 scaled_max_y = scale_y * inBoxMaxY;
+	outBoundsMinY = Vec4::sMin(scaled_min_y, scaled_max_y);
+	outBoundsMaxY = Vec4::sMax(scaled_min_y, scaled_max_y);
+
+	Vec4 scale_z = inScale.SplatZ();
+	Vec4 scaled_min_z = scale_z * inBoxMinZ;
+	Vec4 scaled_max_z = scale_z * inBoxMaxZ;
+	outBoundsMinZ = Vec4::sMin(scaled_min_z, scaled_max_z);
+	outBoundsMaxZ = Vec4::sMax(scaled_min_z, scaled_max_z);
+}
+
+/// Enlarge 4 bounding boxes with extent (add to both sides)
+JPH_INLINE void AABox4EnlargeWithExtent(Vec3Arg inExtent, Vec4 &ioBoundsMinX, Vec4 &ioBoundsMinY, Vec4 &ioBoundsMinZ, Vec4 &ioBoundsMaxX, Vec4 &ioBoundsMaxY, Vec4 &ioBoundsMaxZ)
+{
+	Vec4 extent_x = inExtent.SplatX();
+	ioBoundsMinX -= extent_x;
+	ioBoundsMaxX += extent_x;
+
+	Vec4 extent_y = inExtent.SplatY();
+	ioBoundsMinY -= extent_y;
+	ioBoundsMaxY += extent_y;
+
+	Vec4 extent_z = inExtent.SplatZ();
+	ioBoundsMinZ -= extent_z;
+	ioBoundsMaxZ += extent_z;
+}
+
+/// Test if 4 bounding boxes overlap with a point
+JPH_INLINE UVec4 AABox4VsPoint(Vec3Arg inPoint, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ)
+{
+	// Splat point to 4 component vectors
+	Vec4 point_x = Vec4(inPoint).SplatX();
+	Vec4 point_y = Vec4(inPoint).SplatY();
+	Vec4 point_z = Vec4(inPoint).SplatZ();
+
+	// Test if point overlaps with box
+	UVec4 overlapx = UVec4::sAnd(Vec4::sGreaterOrEqual(point_x, inBoxMinX), Vec4::sLessOrEqual(point_x, inBoxMaxX));
+	UVec4 overlapy = UVec4::sAnd(Vec4::sGreaterOrEqual(point_y, inBoxMinY), Vec4::sLessOrEqual(point_y, inBoxMaxY));
+	UVec4 overlapz = UVec4::sAnd(Vec4::sGreaterOrEqual(point_z, inBoxMinZ), Vec4::sLessOrEqual(point_z, inBoxMaxZ));
+
+	// Test if all are overlapping
+	return UVec4::sAnd(UVec4::sAnd(overlapx, overlapy), overlapz);
+}
+
+/// Test if 4 bounding boxes overlap with an oriented box
+JPH_INLINE UVec4 AABox4VsBox(Mat44Arg inOrientation, Vec3Arg inHalfExtents, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ, float inEpsilon = 1.0e-6f)
+{
+	// Taken from: Real Time Collision Detection - Christer Ericson
+	// Chapter 4.4.1, page 103-105.
+	// Note that the code is swapped around: A is the aabox and B is the oriented box (this saves us from having to invert the orientation of the oriented box)
+
+	// Compute translation vector t (the translation of B in the space of A)
+	Vec4 t[3] {
+		inOrientation.GetTranslation().SplatX() - 0.5f * (inBoxMinX + inBoxMaxX),
+		inOrientation.GetTranslation().SplatY() - 0.5f * (inBoxMinY + inBoxMaxY),
+		inOrientation.GetTranslation().SplatZ() - 0.5f * (inBoxMinZ + inBoxMaxZ) };
+
+	// Compute common subexpressions. Add in an epsilon term to
+	// counteract arithmetic errors when two edges are parallel and
+	// their cross product is (near) null (see text for details)
+	Vec3 epsilon = Vec3::sReplicate(inEpsilon);
+	Vec3 abs_r[3] { inOrientation.GetAxisX().Abs() + epsilon, inOrientation.GetAxisY().Abs() + epsilon, inOrientation.GetAxisZ().Abs() + epsilon };
+
+	// Half extents for a
+	Vec4 a_half_extents[3] {
+		0.5f * (inBoxMaxX - inBoxMinX),
+		0.5f * (inBoxMaxY - inBoxMinY),
+		0.5f * (inBoxMaxZ - inBoxMinZ) };
+
+	// Half extents of b
+	Vec4 b_half_extents_x = inHalfExtents.SplatX();
+	Vec4 b_half_extents_y = inHalfExtents.SplatY();
+	Vec4 b_half_extents_z = inHalfExtents.SplatZ();
+
+	// Each component corresponds to 1 overlapping OBB vs ABB
+	UVec4 overlaps = UVec4(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);
+
+	// Test axes L = A0, L = A1, L = A2
+	Vec4 ra, rb;
+	for (int i = 0; i < 3; i++)
+	{
+		ra = a_half_extents[i];
+		rb = b_half_extents_x * abs_r[0][i] + b_half_extents_y * abs_r[1][i] + b_half_extents_z * abs_r[2][i];
+		overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual(t[i].Abs(), ra + rb));
+	}
+
+	// Test axes L = B0, L = B1, L = B2
+	for (int i = 0; i < 3; i++)
+	{
+		ra = a_half_extents[0] * abs_r[i][0] + a_half_extents[1] * abs_r[i][1] + a_half_extents[2] * abs_r[i][2];
+		rb = Vec4::sReplicate(inHalfExtents[i]);
+		overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[0] * inOrientation(0, i) + t[1] * inOrientation(1, i) + t[2] * inOrientation(2, i)).Abs(), ra + rb));
+	}
+
+	// Test axis L = A0 x B0
+	ra = a_half_extents[1] * abs_r[0][2] + a_half_extents[2] * abs_r[0][1];
+	rb = b_half_extents_y * abs_r[2][0] + b_half_extents_z * abs_r[1][0];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[2] * inOrientation(1, 0) - t[1] * inOrientation(2, 0)).Abs(), ra + rb));
+
+	// Test axis L = A0 x B1
+	ra = a_half_extents[1] * abs_r[1][2] + a_half_extents[2] * abs_r[1][1];
+	rb = b_half_extents_x * abs_r[2][0] + b_half_extents_z * abs_r[0][0];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[2] * inOrientation(1, 1) - t[1] * inOrientation(2, 1)).Abs(), ra + rb));
+
+	// Test axis L = A0 x B2
+	ra = a_half_extents[1] * abs_r[2][2] + a_half_extents[2] * abs_r[2][1];
+	rb = b_half_extents_x * abs_r[1][0] + b_half_extents_y * abs_r[0][0];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[2] * inOrientation(1, 2) - t[1] * inOrientation(2, 2)).Abs(), ra + rb));
+
+	// Test axis L = A1 x B0
+	ra = a_half_extents[0] * abs_r[0][2] + a_half_extents[2] * abs_r[0][0];
+	rb = b_half_extents_y * abs_r[2][1] + b_half_extents_z * abs_r[1][1];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[0] * inOrientation(2, 0) - t[2] * inOrientation(0, 0)).Abs(), ra + rb));
+
+	// Test axis L = A1 x B1
+	ra = a_half_extents[0] * abs_r[1][2] + a_half_extents[2] * abs_r[1][0];
+	rb = b_half_extents_x * abs_r[2][1] + b_half_extents_z * abs_r[0][1];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[0] * inOrientation(2, 1) - t[2] * inOrientation(0, 1)).Abs(), ra + rb));
+
+	// Test axis L = A1 x B2
+	ra = a_half_extents[0] * abs_r[2][2] + a_half_extents[2] * abs_r[2][0];
+	rb = b_half_extents_x * abs_r[1][1] + b_half_extents_y * abs_r[0][1];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[0] * inOrientation(2, 2) - t[2] * inOrientation(0, 2)).Abs(), ra + rb));
+
+	// Test axis L = A2 x B0
+	ra = a_half_extents[0] * abs_r[0][1] + a_half_extents[1] * abs_r[0][0];
+	rb = b_half_extents_y * abs_r[2][2] + b_half_extents_z * abs_r[1][2];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[1] * inOrientation(0, 0) - t[0] * inOrientation(1, 0)).Abs(), ra + rb));
+
+	// Test axis L = A2 x B1
+	ra = a_half_extents[0] * abs_r[1][1] + a_half_extents[1] * abs_r[1][0];
+	rb = b_half_extents_x * abs_r[2][2] + b_half_extents_z * abs_r[0][2];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[1] * inOrientation(0, 1) - t[0] * inOrientation(1, 1)).Abs(), ra + rb));
+
+	// Test axis L = A2 x B2
+	ra = a_half_extents[0] * abs_r[2][1] + a_half_extents[1] * abs_r[2][0];
+	rb = b_half_extents_x * abs_r[1][2] + b_half_extents_y * abs_r[0][2];
+	overlaps = UVec4::sAnd(overlaps, Vec4::sLessOrEqual((t[1] * inOrientation(0, 2) - t[0] * inOrientation(1, 2)).Abs(), ra + rb));
+
+	// Return if the OBB vs AABBs are intersecting
+	return overlaps;
+}
+
+/// Convenience function that tests 4 AABoxes vs OrientedBox
+JPH_INLINE UVec4 AABox4VsBox(const OrientedBox &inBox, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ, float inEpsilon = 1.0e-6f)
+{
+	return AABox4VsBox(inBox.mOrientation, inBox.mHalfExtents, inBoxMinX, inBoxMinY, inBoxMinZ, inBoxMaxX, inBoxMaxY, inBoxMaxZ, inEpsilon);
+}
+
+/// Get the squared distance between 4 AABoxes and a point
+JPH_INLINE Vec4 AABox4DistanceSqToPoint(Vec4Arg inPointX, Vec4Arg inPointY, Vec4Arg inPointZ, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ)
+{
+	// Get closest point on box
+	Vec4 closest_x = Vec4::sMin(Vec4::sMax(inPointX, inBoxMinX), inBoxMaxX);
+	Vec4 closest_y = Vec4::sMin(Vec4::sMax(inPointY, inBoxMinY), inBoxMaxY);
+	Vec4 closest_z = Vec4::sMin(Vec4::sMax(inPointZ, inBoxMinZ), inBoxMaxZ);
+
+	// Return the squared distance between the box and point
+	return Square(closest_x - inPointX) + Square(closest_y - inPointY) + Square(closest_z - inPointZ);
+}
+
+/// Get the squared distance between 4 AABoxes and a point
+JPH_INLINE Vec4 AABox4DistanceSqToPoint(Vec3 inPoint, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ)
+{
+	return AABox4DistanceSqToPoint(inPoint.SplatX(), inPoint.SplatY(), inPoint.SplatZ(), inBoxMinX, inBoxMinY, inBoxMinZ, inBoxMaxX, inBoxMaxY, inBoxMaxZ);
+}
+
+/// Test 4 AABoxes vs a sphere
+JPH_INLINE UVec4 AABox4VsSphere(Vec4Arg inCenterX, Vec4Arg inCenterY, Vec4Arg inCenterZ, Vec4Arg inRadiusSq, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ)
+{
+	// Test the distance from the center of the sphere to the box is smaller than the radius
+	Vec4 distance_sq = AABox4DistanceSqToPoint(inCenterX, inCenterY, inCenterZ, inBoxMinX, inBoxMinY, inBoxMinZ, inBoxMaxX, inBoxMaxY, inBoxMaxZ);
+	return Vec4::sLessOrEqual(distance_sq, inRadiusSq);
+}
+
+/// Test 4 AABoxes vs a sphere
+JPH_INLINE UVec4 AABox4VsSphere(Vec3Arg inCenter, float inRadiusSq, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ)
+{
+	return AABox4VsSphere(inCenter.SplatX(), inCenter.SplatY(), inCenter.SplatZ(), Vec4::sReplicate(inRadiusSq), inBoxMinX, inBoxMinY, inBoxMinZ, inBoxMaxX, inBoxMaxY, inBoxMaxZ);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/ClipPoly.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/ClipPoly.h
@@ -0,0 +1,200 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/AABox.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Clip inPolygonToClip against the positive halfspace of plane defined by inPlaneOrigin and inPlaneNormal.
+/// inPlaneNormal does not need to be normalized.
+template <class VERTEX_ARRAY>
+void ClipPolyVsPlane(const VERTEX_ARRAY &inPolygonToClip, Vec3Arg inPlaneOrigin, Vec3Arg inPlaneNormal, VERTEX_ARRAY &outClippedPolygon)
+{
+	JPH_ASSERT(inPolygonToClip.size() >= 2);
+	JPH_ASSERT(outClippedPolygon.empty());
+
+	// Determine state of last point
+	Vec3 e1 = inPolygonToClip[inPolygonToClip.size() - 1];
+	float prev_num = (inPlaneOrigin - e1).Dot(inPlaneNormal);
+	bool prev_inside = prev_num < 0.0f;
+
+	// Loop through all vertices
+	for (typename VERTEX_ARRAY::size_type j = 0; j < inPolygonToClip.size(); ++j)
+	{
+		// Check if second point is inside
+		Vec3Arg e2 = inPolygonToClip[j];
+		float num = (inPlaneOrigin - e2).Dot(inPlaneNormal);
+		bool cur_inside = num < 0.0f;
+
+		// In -> Out or Out -> In: Add point on clipping plane
+		if (cur_inside != prev_inside)
+		{
+			// Solve: (X - inPlaneOrigin) . inPlaneNormal = 0 and X = e1 + t * (e2 - e1) for X
+			Vec3 e12 = e2 - e1;
+			float denom = e12.Dot(inPlaneNormal);
+			if (denom != 0.0f)
+				outClippedPolygon.push_back(e1 + (prev_num / denom) * e12);
+			else
+				cur_inside = prev_inside; // Edge is parallel to plane, treat point as if it were on the same side as the last point
+		}
+
+		// Point inside, add it
+		if (cur_inside)
+			outClippedPolygon.push_back(e2);
+
+		// Update previous state
+		prev_num = num;
+		prev_inside = cur_inside;
+		e1 = e2;
+	}
+}
+
+/// Clip polygon versus polygon.
+/// Both polygons are assumed to be in counter clockwise order.
+/// @param inClippingPolygonNormal is used to create planes of all edges in inClippingPolygon against which inPolygonToClip is clipped, inClippingPolygonNormal does not need to be normalized
+/// @param inClippingPolygon is the polygon which inClippedPolygon is clipped against
+/// @param inPolygonToClip is the polygon that is clipped
+/// @param outClippedPolygon will contain clipped polygon when function returns
+template <class VERTEX_ARRAY>
+void ClipPolyVsPoly(const VERTEX_ARRAY &inPolygonToClip, const VERTEX_ARRAY &inClippingPolygon, Vec3Arg inClippingPolygonNormal, VERTEX_ARRAY &outClippedPolygon)
+{
+	JPH_ASSERT(inPolygonToClip.size() >= 2);
+	JPH_ASSERT(inClippingPolygon.size() >= 3);
+
+	VERTEX_ARRAY tmp_vertices[2];
+	int tmp_vertices_idx = 0;
+
+	for (typename VERTEX_ARRAY::size_type i = 0; i < inClippingPolygon.size(); ++i)
+	{
+		// Get edge to clip against
+		Vec3 clip_e1 = inClippingPolygon[i];
+		Vec3 clip_e2 = inClippingPolygon[(i + 1) % inClippingPolygon.size()];
+		Vec3 clip_normal = inClippingPolygonNormal.Cross(clip_e2 - clip_e1); // Pointing inward to the clipping polygon
+
+		// Get source and target polygon
+		const VERTEX_ARRAY &src_polygon = (i == 0)? inPolygonToClip : tmp_vertices[tmp_vertices_idx];
+		tmp_vertices_idx ^= 1;
+		VERTEX_ARRAY &tgt_polygon = (i == inClippingPolygon.size() - 1)? outClippedPolygon : tmp_vertices[tmp_vertices_idx];
+		tgt_polygon.clear();
+
+		// Clip against the edge
+		ClipPolyVsPlane(src_polygon, clip_e1, clip_normal, tgt_polygon);
+
+		// Break out if no polygon left
+		if (tgt_polygon.size() < 3)
+		{
+			outClippedPolygon.clear();
+			break;
+		}
+	}
+}
+
+/// Clip inPolygonToClip against an edge, the edge is projected on inPolygonToClip using inClippingEdgeNormal.
+/// The positive half space (the side on the edge in the direction of inClippingEdgeNormal) is cut away.
+template <class VERTEX_ARRAY>
+void ClipPolyVsEdge(const VERTEX_ARRAY &inPolygonToClip, Vec3Arg inEdgeVertex1, Vec3Arg inEdgeVertex2, Vec3Arg inClippingEdgeNormal, VERTEX_ARRAY &outClippedPolygon)
+{
+	JPH_ASSERT(inPolygonToClip.size() >= 3);
+	JPH_ASSERT(outClippedPolygon.empty());
+
+	// Get normal that is perpendicular to the edge and the clipping edge normal
+	Vec3 edge = inEdgeVertex2 - inEdgeVertex1;
+	Vec3 edge_normal = inClippingEdgeNormal.Cross(edge);
+
+	// Project vertices of edge on inPolygonToClip
+	Vec3 polygon_normal = (inPolygonToClip[2] - inPolygonToClip[0]).Cross(inPolygonToClip[1] - inPolygonToClip[0]);
+	float polygon_normal_len_sq = polygon_normal.LengthSq();
+	Vec3 v1 = inEdgeVertex1 + polygon_normal.Dot(inPolygonToClip[0] - inEdgeVertex1) * polygon_normal / polygon_normal_len_sq;
+	Vec3 v2 = inEdgeVertex2 + polygon_normal.Dot(inPolygonToClip[0] - inEdgeVertex2) * polygon_normal / polygon_normal_len_sq;
+	Vec3 v12 = v2 - v1;
+	float v12_len_sq = v12.LengthSq();
+
+	// Determine state of last point
+	Vec3 e1 = inPolygonToClip[inPolygonToClip.size() - 1];
+	float prev_num = (inEdgeVertex1 - e1).Dot(edge_normal);
+	bool prev_inside = prev_num < 0.0f;
+
+	// Loop through all vertices
+	for (typename VERTEX_ARRAY::size_type j = 0; j < inPolygonToClip.size(); ++j)
+	{
+		// Check if second point is inside
+		Vec3 e2 = inPolygonToClip[j];
+		float num = (inEdgeVertex1 - e2).Dot(edge_normal);
+		bool cur_inside = num < 0.0f;
+
+		// In -> Out or Out -> In: Add point on clipping plane
+		if (cur_inside != prev_inside)
+		{
+			// Solve: (inEdgeVertex1 - X) . edge_normal = 0 and X = e1 + t * (e2 - e1) for X
+			Vec3 e12 = e2 - e1;
+			float denom = e12.Dot(edge_normal);
+			Vec3 clipped_point = denom != 0.0f? e1 + (prev_num / denom) * e12 : e1;
+
+			// Project point on line segment v1, v2 so see if it falls outside if the edge
+			float projection = (clipped_point - v1).Dot(v12);
+			if (projection < 0.0f)
+				outClippedPolygon.push_back(v1);
+			else if (projection > v12_len_sq)
+				outClippedPolygon.push_back(v2);
+			else
+				outClippedPolygon.push_back(clipped_point);
+		}
+
+		// Update previous state
+		prev_num = num;
+		prev_inside = cur_inside;
+		e1 = e2;
+	}
+}
+
+/// Clip polygon vs axis aligned box, inPolygonToClip is assume to be in counter clockwise order.
+/// Output will be stored in outClippedPolygon. Everything inside inAABox will be kept.
+template <class VERTEX_ARRAY>
+void ClipPolyVsAABox(const VERTEX_ARRAY &inPolygonToClip, const AABox &inAABox, VERTEX_ARRAY &outClippedPolygon)
+{
+	JPH_ASSERT(inPolygonToClip.size() >= 2);
+
+	VERTEX_ARRAY tmp_vertices[2];
+	int tmp_vertices_idx = 0;
+
+	for (int coord = 0; coord < 3; ++coord)
+		for (int side = 0; side < 2; ++side)
+		{
+			// Get plane to clip against
+			Vec3 origin = Vec3::sZero(), normal = Vec3::sZero();
+			if (side == 0)
+			{
+				normal.SetComponent(coord, 1.0f);
+				origin.SetComponent(coord, inAABox.mMin[coord]);
+			}
+			else
+			{
+				normal.SetComponent(coord, -1.0f);
+				origin.SetComponent(coord, inAABox.mMax[coord]);
+			}
+
+			// Get source and target polygon
+			const VERTEX_ARRAY &src_polygon = tmp_vertices_idx == 0? inPolygonToClip : tmp_vertices[tmp_vertices_idx & 1];
+			tmp_vertices_idx++;
+			VERTEX_ARRAY &tgt_polygon = tmp_vertices_idx == 6? outClippedPolygon : tmp_vertices[tmp_vertices_idx & 1];
+			tgt_polygon.clear();
+
+			// Clip against the edge
+			ClipPolyVsPlane(src_polygon, origin, normal, tgt_polygon);
+
+			// Break out if no polygon left
+			if (tgt_polygon.size() < 3)
+			{
+				outClippedPolygon.clear();
+				return;
+			}
+
+			// Flip normal
+			normal = -normal;
+		}
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/ClosestPoint.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/ClosestPoint.h
@@ -0,0 +1,498 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+// Turn off fused multiply add instruction because it makes the equations of the form a * b - c * d inaccurate below
+JPH_PRECISE_MATH_ON
+
+/// Helper utils to find the closest point to a line segment, triangle or tetrahedron
+namespace ClosestPoint
+{
+	/// Compute barycentric coordinates of closest point to origin for infinite line defined by (inA, inB)
+	/// Point can then be computed as inA * outU + inB * outV
+	/// Returns false if the points inA, inB do not form a line (are at the same point)
+	inline bool GetBaryCentricCoordinates(Vec3Arg inA, Vec3Arg inB, float &outU, float &outV)
+	{
+		Vec3 ab = inB - inA;
+		float denominator = ab.LengthSq();
+		if (denominator < Square(FLT_EPSILON))
+		{
+			// Degenerate line segment, fallback to points
+			if (inA.LengthSq() < inB.LengthSq())
+			{
+				// A closest
+				outU = 1.0f;
+				outV = 0.0f;
+			}
+			else
+			{
+				// B closest
+				outU = 0.0f;
+				outV = 1.0f;
+			}
+			return false;
+		}
+		else
+		{
+			outV = -inA.Dot(ab) / denominator;
+			outU = 1.0f - outV;
+		}
+		return true;
+	}
+
+	/// Compute barycentric coordinates of closest point to origin for plane defined by (inA, inB, inC)
+	/// Point can then be computed as inA * outU + inB * outV + inC * outW
+	/// Returns false if the points inA, inB, inC do not form a plane (are on the same line or at the same point)
+	inline bool GetBaryCentricCoordinates(Vec3Arg inA, Vec3Arg inB, Vec3Arg inC, float &outU, float &outV, float &outW)
+	{
+		// Taken from: Real-Time Collision Detection - Christer Ericson (Section: Barycentric Coordinates)
+		// With p = 0
+		// Adjusted to always include the shortest edge of the triangle in the calculation to improve numerical accuracy
+
+		// First calculate the three edges
+		Vec3 v0 = inB - inA;
+		Vec3 v1 = inC - inA;
+		Vec3 v2 = inC - inB;
+
+		// Make sure that the shortest edge is included in the calculation to keep the products a * b - c * d as small as possible to preserve accuracy
+		float d00 = v0.LengthSq();
+		float d11 = v1.LengthSq();
+		float d22 = v2.LengthSq();
+		if (d00 <= d22)
+		{
+			// Use v0 and v1 to calculate barycentric coordinates
+			float d01 = v0.Dot(v1);
+
+			// Denominator must be positive:
+			// |v0|^2 * |v1|^2 - (v0 . v1)^2 = |v0|^2 * |v1|^2 * (1 - cos(angle)^2) >= 0
+			float denominator = d00 * d11 - d01 * d01;
+			if (denominator < 1.0e-12f)
+			{
+				// Degenerate triangle, return coordinates along longest edge
+				if (d00 > d11)
+				{
+					GetBaryCentricCoordinates(inA, inB, outU, outV);
+					outW = 0.0f;
+				}
+				else
+				{
+					GetBaryCentricCoordinates(inA, inC, outU, outW);
+					outV = 0.0f;
+				}
+				return false;
+			}
+			else
+			{
+				float a0 = inA.Dot(v0);
+				float a1 = inA.Dot(v1);
+				outV = (d01 * a1 - d11 * a0) / denominator;
+				outW = (d01 * a0 - d00 * a1) / denominator;
+				outU = 1.0f - outV - outW;
+			}
+		}
+		else
+		{
+			// Use v1 and v2 to calculate barycentric coordinates
+			float d12 = v1.Dot(v2);
+
+			float denominator = d11 * d22 - d12 * d12;
+			if (denominator < 1.0e-12f)
+			{
+				// Degenerate triangle, return coordinates along longest edge
+				if (d11 > d22)
+				{
+					GetBaryCentricCoordinates(inA, inC, outU, outW);
+					outV = 0.0f;
+				}
+				else
+				{
+					GetBaryCentricCoordinates(inB, inC, outV, outW);
+					outU = 0.0f;
+				}
+				return false;
+			}
+			else
+			{
+				float c1 = inC.Dot(v1);
+				float c2 = inC.Dot(v2);
+				outU = (d22 * c1 - d12 * c2) / denominator;
+				outV = (d11 * c2 - d12 * c1) / denominator;
+				outW = 1.0f - outU - outV;
+			}
+		}
+		return true;
+	}
+
+	/// Get the closest point to the origin of line (inA, inB)
+	/// outSet describes which features are closest: 1 = a, 2 = b, 3 = line segment ab
+	inline Vec3	GetClosestPointOnLine(Vec3Arg inA, Vec3Arg inB, uint32 &outSet)
+	{
+		float u, v;
+		GetBaryCentricCoordinates(inA, inB, u, v);
+		if (v <= 0.0f)
+		{
+			// inA is closest point
+			outSet = 0b0001;
+			return inA;
+		}
+		else if (u <= 0.0f)
+		{
+			// inB is closest point
+			outSet = 0b0010;
+			return inB;
+		}
+		else
+		{
+			// Closest point lies on line inA inB
+			outSet = 0b0011;
+			return u * inA + v * inB;
+		}
+	}
+
+	/// Get the closest point to the origin of triangle (inA, inB, inC)
+	/// outSet describes which features are closest: 1 = a, 2 = b, 4 = c, 5 = line segment ac, 7 = triangle interior etc.
+	/// If MustIncludeC is true, the function assumes that C is part of the closest feature (vertex, edge, face) and does less work, if the assumption is not true then a closest point to the other features is returned.
+	template <bool MustIncludeC = false>
+	inline Vec3	GetClosestPointOnTriangle(Vec3Arg inA, Vec3Arg inB, Vec3Arg inC, uint32 &outSet)
+	{
+		// Taken from: Real-Time Collision Detection - Christer Ericson (Section: Closest Point on Triangle to Point)
+		// With p = 0
+
+		// The most accurate normal is calculated by using the two shortest edges
+		// See: https://box2d.org/posts/2014/01/troublesome-triangle/
+		// The difference in normals is most pronounced when one edge is much smaller than the others (in which case the other 2 must have roughly the same length).
+		// Therefore we can suffice by just picking the shortest from 2 edges and use that with the 3rd edge to calculate the normal.
+		// We first check which of the edges is shorter and if bc is shorter than ac then we swap a with c to a is always on the shortest edge
+		UVec4 swap_ac;
+		{
+			Vec3 ac = inC - inA;
+			Vec3 bc = inC - inB;
+			swap_ac = Vec4::sLess(bc.DotV4(bc), ac.DotV4(ac));
+		}
+		Vec3 a = Vec3::sSelect(inA, inC, swap_ac);
+		Vec3 c = Vec3::sSelect(inC, inA, swap_ac);
+
+		// Calculate normal
+		Vec3 ab = inB - a;
+		Vec3 ac = c - a;
+		Vec3 n = ab.Cross(ac);
+		float n_len_sq = n.LengthSq();
+
+		// Check degenerate
+		if (n_len_sq < 1.0e-10f) // Square(FLT_EPSILON) was too small and caused numerical problems, see test case TestCollideParallelTriangleVsCapsule
+		{
+			// Degenerate, fallback to vertices and edges
+
+			// Start with vertex C being the closest
+			uint32 closest_set = 0b0100;
+			Vec3 closest_point = inC;
+			float best_dist_sq = inC.LengthSq();
+
+			// If the closest point must include C then A or B cannot be closest
+			// Note that we test vertices first because we want to prefer a closest vertex over a closest edge (this results in an outSet with fewer bits set)
+			if constexpr (!MustIncludeC)
+			{
+				// Try vertex A
+				float a_len_sq = inA.LengthSq();
+				if (a_len_sq < best_dist_sq)
+				{
+					closest_set = 0b0001;
+					closest_point = inA;
+					best_dist_sq = a_len_sq;
+				}
+
+				// Try vertex B
+				float b_len_sq = inB.LengthSq();
+				if (b_len_sq < best_dist_sq)
+				{
+					closest_set = 0b0010;
+					closest_point = inB;
+					best_dist_sq = b_len_sq;
+				}
+			}
+
+			// Edge AC
+			float ac_len_sq = ac.LengthSq();
+			if (ac_len_sq > Square(FLT_EPSILON))
+			{
+				float v = Clamp(-a.Dot(ac) / ac_len_sq, 0.0f, 1.0f);
+				Vec3 q = a + v * ac;
+				float dist_sq = q.LengthSq();
+				if (dist_sq < best_dist_sq)
+				{
+					closest_set = 0b0101;
+					closest_point = q;
+					best_dist_sq = dist_sq;
+				}
+			}
+
+			// Edge BC
+			Vec3 bc = inC - inB;
+			float bc_len_sq = bc.LengthSq();
+			if (bc_len_sq > Square(FLT_EPSILON))
+			{
+				float v = Clamp(-inB.Dot(bc) / bc_len_sq, 0.0f, 1.0f);
+				Vec3 q = inB + v * bc;
+				float dist_sq = q.LengthSq();
+				if (dist_sq < best_dist_sq)
+				{
+					closest_set = 0b0110;
+					closest_point = q;
+					best_dist_sq = dist_sq;
+				}
+			}
+
+			// If the closest point must include C then AB cannot be closest
+			if constexpr (!MustIncludeC)
+			{
+				// Edge AB
+				ab = inB - inA;
+				float ab_len_sq = ab.LengthSq();
+				if (ab_len_sq > Square(FLT_EPSILON))
+				{
+					float v = Clamp(-inA.Dot(ab) / ab_len_sq, 0.0f, 1.0f);
+					Vec3 q = inA + v * ab;
+					float dist_sq = q.LengthSq();
+					if (dist_sq < best_dist_sq)
+					{
+						closest_set = 0b0011;
+						closest_point = q;
+						best_dist_sq = dist_sq;
+					}
+				}
+			}
+
+			outSet = closest_set;
+			return closest_point;
+		}
+
+		// Check if P in vertex region outside A
+		Vec3 ap = -a;
+		float d1 = ab.Dot(ap);
+		float d2 = ac.Dot(ap);
+		if (d1 <= 0.0f && d2 <= 0.0f)
+		{
+			outSet = swap_ac.GetX()? 0b0100 : 0b0001;
+			return a; // barycentric coordinates (1,0,0)
+		}
+
+		// Check if P in vertex region outside B
+		Vec3 bp = -inB;
+		float d3 = ab.Dot(bp);
+		float d4 = ac.Dot(bp);
+		if (d3 >= 0.0f && d4 <= d3)
+		{
+			outSet = 0b0010;
+			return inB; // barycentric coordinates (0,1,0)
+		}
+
+		// Check if P in edge region of AB, if so return projection of P onto AB
+		if (d1 * d4 <= d3 * d2 && d1 >= 0.0f && d3 <= 0.0f)
+		{
+			float v = d1 / (d1 - d3);
+			outSet = swap_ac.GetX()? 0b0110 : 0b0011;
+			return a + v * ab; // barycentric coordinates (1-v,v,0)
+		}
+
+		// Check if P in vertex region outside C
+		Vec3 cp = -c;
+		float d5 = ab.Dot(cp);
+		float d6 = ac.Dot(cp);
+		if (d6 >= 0.0f && d5 <= d6)
+		{
+			outSet = swap_ac.GetX()? 0b0001 : 0b0100;
+			return c; // barycentric coordinates (0,0,1)
+		}
+
+		// Check if P in edge region of AC, if so return projection of P onto AC
+		if (d5 * d2 <= d1 * d6 && d2 >= 0.0f && d6 <= 0.0f)
+		{
+			float w = d2 / (d2 - d6);
+			outSet = 0b0101;
+			return a + w * ac; // barycentric coordinates (1-w,0,w)
+		}
+
+		// Check if P in edge region of BC, if so return projection of P onto BC
+		float d4_d3 = d4 - d3;
+		float d5_d6 = d5 - d6;
+		if (d3 * d6 <= d5 * d4 && d4_d3 >= 0.0f && d5_d6 >= 0.0f)
+		{
+			float w = d4_d3 / (d4_d3 + d5_d6);
+			outSet = swap_ac.GetX()? 0b0011 : 0b0110;
+			return inB + w * (c - inB); // barycentric coordinates (0,1-w,w)
+		}
+
+		// P inside face region.
+		// Here we deviate from Christer Ericson's article to improve accuracy.
+		// Determine distance between triangle and origin: distance = (centroid - origin) . normal / |normal|
+		// Closest point to origin is then: distance . normal / |normal|
+		// Note that this way of calculating the closest point is much more accurate than first calculating barycentric coordinates
+		// and then calculating the closest point based on those coordinates.
+		outSet = 0b0111;
+		return n * (a + inB + c).Dot(n) / (3.0f * n_len_sq);
+	}
+
+	/// Check if the origin is outside the plane of triangle (inA, inB, inC). inD specifies the front side of the plane.
+	inline bool OriginOutsideOfPlane(Vec3Arg inA, Vec3Arg inB, Vec3Arg inC, Vec3Arg inD)
+	{
+		// Taken from: Real-Time Collision Detection - Christer Ericson (Section: Closest Point on Tetrahedron to Point)
+		// With p = 0
+
+		// Test if point p and d lie on opposite sides of plane through abc
+		Vec3 n = (inB - inA).Cross(inC - inA);
+		float signp = inA.Dot(n); // [AP AB AC]
+		float signd = (inD - inA).Dot(n); // [AD AB AC]
+
+		// Points on opposite sides if expression signs are the same
+		// Note that we left out the minus sign in signp so we need to check > 0 instead of < 0 as in Christer's book
+		// We compare against a small negative value to allow for a little bit of slop in the calculations
+		return signp * signd > -FLT_EPSILON;
+	}
+
+	/// Returns for each of the planes of the tetrahedron if the origin is inside it
+	/// Roughly equivalent to:
+	///	[OriginOutsideOfPlane(inA, inB, inC, inD),
+	///	 OriginOutsideOfPlane(inA, inC, inD, inB),
+	///	 OriginOutsideOfPlane(inA, inD, inB, inC),
+	///	 OriginOutsideOfPlane(inB, inD, inC, inA)]
+	inline UVec4 OriginOutsideOfTetrahedronPlanes(Vec3Arg inA, Vec3Arg inB, Vec3Arg inC, Vec3Arg inD)
+	{
+		Vec3 ab = inB - inA;
+		Vec3 ac = inC - inA;
+		Vec3 ad = inD - inA;
+		Vec3 bd = inD - inB;
+		Vec3 bc = inC - inB;
+
+		Vec3 ab_cross_ac = ab.Cross(ac);
+		Vec3 ac_cross_ad = ac.Cross(ad);
+		Vec3 ad_cross_ab = ad.Cross(ab);
+		Vec3 bd_cross_bc = bd.Cross(bc);
+
+		// For each plane get the side on which the origin is
+		float signp0 = inA.Dot(ab_cross_ac); // ABC
+		float signp1 = inA.Dot(ac_cross_ad); // ACD
+		float signp2 = inA.Dot(ad_cross_ab); // ADB
+		float signp3 = inB.Dot(bd_cross_bc); // BDC
+		Vec4 signp(signp0, signp1, signp2, signp3);
+
+		// For each plane get the side that is outside (determined by the 4th point)
+		float signd0 = ad.Dot(ab_cross_ac);  // D
+		float signd1 = ab.Dot(ac_cross_ad);  // B
+		float signd2 = ac.Dot(ad_cross_ab);  // C
+		float signd3 = -ab.Dot(bd_cross_bc); // A
+		Vec4 signd(signd0, signd1, signd2, signd3);
+
+		// The winding of all triangles has been chosen so that signd should have the
+		// same sign for all components. If this is not the case the tetrahedron
+		// is degenerate and we return that the origin is in front of all sides
+		int sign_bits = signd.GetSignBits();
+		switch (sign_bits)
+		{
+		case 0:
+			// All positive
+			return Vec4::sGreaterOrEqual(signp, Vec4::sReplicate(-FLT_EPSILON));
+
+		case 0xf:
+			// All negative
+			return Vec4::sLessOrEqual(signp, Vec4::sReplicate(FLT_EPSILON));
+
+		default:
+			// Mixed signs, degenerate tetrahedron
+			return UVec4::sReplicate(0xffffffff);
+		}
+	}
+
+	/// Get the closest point between tetrahedron (inA, inB, inC, inD) to the origin
+	/// outSet specifies which feature was closest, 1 = a, 2 = b, 4 = c, 8 = d. Edges have 2 bits set, triangles 3 and if the point is in the interior 4 bits are set.
+	/// If MustIncludeD is true, the function assumes that D is part of the closest feature (vertex, edge, face, tetrahedron) and does less work, if the assumption is not true then a closest point to the other features is returned.
+	template <bool MustIncludeD = false>
+	inline Vec3	GetClosestPointOnTetrahedron(Vec3Arg inA, Vec3Arg inB, Vec3Arg inC, Vec3Arg inD, uint32 &outSet)
+	{
+		// Taken from: Real-Time Collision Detection - Christer Ericson (Section: Closest Point on Tetrahedron to Point)
+		// With p = 0
+
+		// Start out assuming point inside all halfspaces, so closest to itself
+		uint32 closest_set = 0b1111;
+		Vec3 closest_point = Vec3::sZero();
+		float best_dist_sq = FLT_MAX;
+
+		// Determine for each of the faces of the tetrahedron if the origin is in front of the plane
+		UVec4 origin_out_of_planes = OriginOutsideOfTetrahedronPlanes(inA, inB, inC, inD);
+
+		// If point outside face abc then compute closest point on abc
+		if (origin_out_of_planes.GetX()) // OriginOutsideOfPlane(inA, inB, inC, inD)
+		{
+			if constexpr (MustIncludeD)
+			{
+				// If the closest point must include D then ABC cannot be closest but the closest point
+				// cannot be an interior point either so we return A as closest point
+				closest_set = 0b0001;
+				closest_point = inA;
+			}
+			else
+			{
+				// Test the face normally
+				closest_point = GetClosestPointOnTriangle<false>(inA, inB, inC, closest_set);
+			}
+			best_dist_sq = closest_point.LengthSq();
+		}
+
+		// Repeat test for face acd
+		if (origin_out_of_planes.GetY()) // OriginOutsideOfPlane(inA, inC, inD, inB)
+		{
+			uint32 set;
+			Vec3 q = GetClosestPointOnTriangle<MustIncludeD>(inA, inC, inD, set);
+			float dist_sq = q.LengthSq();
+			if (dist_sq < best_dist_sq)
+			{
+				best_dist_sq = dist_sq;
+				closest_point = q;
+				closest_set = (set & 0b0001) + ((set & 0b0110) << 1);
+			}
+		}
+
+		// Repeat test for face adb
+		if (origin_out_of_planes.GetZ()) // OriginOutsideOfPlane(inA, inD, inB, inC)
+		{
+			// Keep original vertex order, it doesn't matter if the triangle is facing inward or outward
+			// and it improves consistency for GJK which will always add a new vertex D and keep the closest
+			// feature from the previous iteration in ABC
+			uint32 set;
+			Vec3 q = GetClosestPointOnTriangle<MustIncludeD>(inA, inB, inD, set);
+			float dist_sq = q.LengthSq();
+			if (dist_sq < best_dist_sq)
+			{
+				best_dist_sq = dist_sq;
+				closest_point = q;
+				closest_set = (set & 0b0011) + ((set & 0b0100) << 1);
+			}
+		}
+
+		// Repeat test for face bdc
+		if (origin_out_of_planes.GetW()) // OriginOutsideOfPlane(inB, inD, inC, inA)
+		{
+			// Keep original vertex order, it doesn't matter if the triangle is facing inward or outward
+			// and it improves consistency for GJK which will always add a new vertex D and keep the closest
+			// feature from the previous iteration in ABC
+			uint32 set;
+			Vec3 q = GetClosestPointOnTriangle<MustIncludeD>(inB, inC, inD, set);
+			float dist_sq = q.LengthSq();
+			if (dist_sq < best_dist_sq)
+			{
+				closest_point = q;
+				closest_set = set << 1;
+			}
+		}
+
+		outSet = closest_set;
+		return closest_point;
+	}
+};
+
+JPH_PRECISE_MATH_OFF
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder.cpp
+++ b/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder.cpp
--- a/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder.h
@@ -0,0 +1,276 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+//#define JPH_CONVEX_BUILDER_DEBUG
+//#define JPH_CONVEX_BUILDER_DUMP_SHAPE
+
+#ifdef JPH_CONVEX_BUILDER_DEBUG
+	#include <Jolt/Core/Color.h>
+#endif
+
+#include <Jolt/Core/StaticArray.h>
+#include <Jolt/Core/NonCopyable.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// A convex hull builder that tries to create hulls as accurately as possible. Used for offline processing.
+class JPH_EXPORT ConvexHullBuilder : public NonCopyable
+{
+public:
+	// Forward declare
+	class Face;
+
+	/// Class that holds the information of an edge
+	class Edge : public NonCopyable
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		/// Constructor
+						Edge(Face *inFace, int inStartIdx)	: mFace(inFace), mStartIdx(inStartIdx) { }
+
+		/// Get the previous edge
+		inline Edge *	GetPreviousEdge()
+		{
+			Edge *prev_edge = this;
+			while (prev_edge->mNextEdge != this)
+				prev_edge = prev_edge->mNextEdge;
+			return prev_edge;
+		}
+
+		Face *			mFace;								///< Face that this edge belongs to
+		Edge *			mNextEdge = nullptr;				///< Next edge of this face
+		Edge *			mNeighbourEdge = nullptr;			///< Edge that this edge is connected to
+		int				mStartIdx;							///< Vertex index in mPositions that indicates the start vertex of this edge
+	};
+
+	using ConflictList = Array<int>;
+
+	/// Class that holds the information of one face
+	class Face : public NonCopyable
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		/// Destructor
+						~Face();
+
+		/// Initialize a face with three indices
+		void			Initialize(int inIdx0, int inIdx1, int inIdx2, const Vec3 *inPositions);
+
+		/// Calculates the centroid and normal for this face
+		void			CalculateNormalAndCentroid(const Vec3 *inPositions);
+
+		/// Check if face inFace is facing inPosition
+		inline bool		IsFacing(Vec3Arg inPosition) const
+		{
+			JPH_ASSERT(!mRemoved);
+			return mNormal.Dot(inPosition - mCentroid) > 0.0f;
+		}
+
+		Vec3			mNormal;							///< Normal of this face, length is 2 times area of face
+		Vec3			mCentroid;							///< Center of the face
+		ConflictList	mConflictList;						///< Positions associated with this edge (that are closest to this edge). The last position in the list is the point that is furthest away from the face.
+		Edge *			mFirstEdge = nullptr;				///< First edge of this face
+		float			mFurthestPointDistanceSq = 0.0f;	///< Squared distance of furthest point from the conflict list to the face
+		bool			mRemoved = false;					///< Flag that indicates that face has been removed (face will be freed later)
+#ifdef JPH_CONVEX_BUILDER_DEBUG
+		int				mIteration;							///< Iteration that this face was created
+#endif
+	};
+
+	// Typedefs
+	using Positions = Array<Vec3>;
+	using Faces = Array<Face *>;
+
+	/// Constructor
+	explicit			ConvexHullBuilder(const Positions &inPositions);
+
+	/// Destructor
+						~ConvexHullBuilder()				{ FreeFaces(); }
+
+	/// Result enum that indicates how the hull got created
+	enum class EResult
+	{
+		Success,											///< Hull building finished successfully
+		MaxVerticesReached,									///< Hull building finished successfully, but the desired accuracy was not reached because the max vertices limit was reached
+		TooFewPoints,										///< Too few points to create a hull
+		TooFewFaces,										///< Too few faces in the created hull (signifies precision errors during building)
+		Degenerate,											///< Degenerate hull detected
+	};
+
+	/// Takes all positions as provided by the constructor and use them to build a hull
+	/// Any points that are closer to the hull than inTolerance will be discarded
+	/// @param inMaxVertices Max vertices to allow in the hull. Specify INT_MAX if there is no limit.
+	/// @param inTolerance Max distance that a point is allowed to be outside of the hull
+	/// @param outError Error message when building fails
+	/// @return Status code that reports if the hull was created or not
+	EResult				Initialize(int inMaxVertices, float inTolerance, const char *&outError);
+
+	/// Returns the amount of vertices that are currently used by the hull
+	int					GetNumVerticesUsed() const;
+
+	/// Returns true if the hull contains a polygon with inIndices (counter clockwise indices in mPositions)
+	bool				ContainsFace(const Array<int> &inIndices) const;
+
+	/// Calculate the center of mass and the volume of the current convex hull
+	void				GetCenterOfMassAndVolume(Vec3 &outCenterOfMass, float &outVolume) const;
+
+	/// Determines the point that is furthest outside of the hull and reports how far it is outside of the hull (which indicates a failure during hull building)
+	/// @param outFaceWithMaxError The face that caused the error
+	/// @param outMaxError The maximum distance of a point to the hull
+	/// @param outMaxErrorPositionIdx The index of the point that had this distance
+	/// @param outCoplanarDistance Points that are less than this distance from the hull are considered on the hull. This should be used as a lowerbound for the allowed error.
+	void				DetermineMaxError(Face *&outFaceWithMaxError, float &outMaxError, int &outMaxErrorPositionIdx, float &outCoplanarDistance) const;
+
+	/// Access to the created faces. Memory is owned by the convex hull builder.
+	const Faces &		GetFaces() const					{ return mFaces; }
+
+private:
+	/// Minimal square area of a triangle (used for merging and checking if a triangle is degenerate)
+	static constexpr float cMinTriangleAreaSq = 1.0e-12f;
+
+#ifdef JPH_CONVEX_BUILDER_DEBUG
+	/// Factor to scale convex hull when debug drawing the construction process
+	static constexpr Real cDrawScale = 10;
+#endif
+
+	/// Class that holds an edge including start and end index
+	class FullEdge
+	{
+	public:
+		Edge *			mNeighbourEdge;						///< Edge that this edge is connected to
+		int				mStartIdx;							///< Vertex index in mPositions that indicates the start vertex of this edge
+		int				mEndIdx;							///< Vertex index in mPosition that indicates the end vertex of this edge
+	};
+
+	// Private typedefs
+	using FullEdges = Array<FullEdge>;
+
+	// Determine a suitable tolerance for detecting that points are coplanar
+	float				DetermineCoplanarDistance() const;
+
+	/// Find the face for which inPoint is furthest to the front
+	/// @param inPoint Point to test
+	/// @param inFaces List of faces to test
+	/// @param outFace Returns the best face
+	/// @param outDistSq Returns the squared distance how much inPoint is in front of the plane of the face
+	void				GetFaceForPoint(Vec3Arg inPoint, const Faces &inFaces, Face *&outFace, float &outDistSq) const;
+
+	/// @brief Calculates the distance between inPoint and inFace
+	/// @param inFace Face to test
+	/// @param inPoint Point to test
+	/// @return If the projection of the point on the plane is interior to the face 0, otherwise the squared distance to the closest edge
+	float				GetDistanceToEdgeSq(Vec3Arg inPoint, const Face *inFace) const;
+
+	/// Assigns a position to one of the supplied faces based on which face is closest.
+	/// @param inPositionIdx Index of the position to add
+	/// @param inFaces List of faces to consider
+	/// @param inToleranceSq Tolerance of the hull, if the point is closer to the face than this, we ignore it
+	/// @return True if point was assigned, false if it was discarded or added to the coplanar list
+	bool				AssignPointToFace(int inPositionIdx, const Faces &inFaces, float inToleranceSq);
+
+	/// Add a new point to the convex hull
+	void				AddPoint(Face *inFacingFace, int inIdx, float inToleranceSq, Faces &outNewFaces);
+
+	/// Remove all faces that have been marked 'removed' from mFaces list
+	void				GarbageCollectFaces();
+
+	/// Create a new face
+	Face *				CreateFace();
+
+	/// Create a new triangle
+	Face *				CreateTriangle(int inIdx1, int inIdx2, int inIdx3);
+
+	/// Delete a face (checking that it is not connected to any other faces)
+	void				FreeFace(Face *inFace);
+
+	/// Release all faces and edges
+	void				FreeFaces();
+
+	/// Link face edge to other face edge
+	static void			sLinkFace(Edge *inEdge1, Edge *inEdge2);
+
+	/// Unlink this face from all of its neighbours
+	static void			sUnlinkFace(Face *inFace);
+
+	/// Given one face that faces inVertex, find the edges of the faces that are not facing inVertex.
+	/// Will flag all those faces for removal.
+	void				FindEdge(Face *inFacingFace, Vec3Arg inVertex, FullEdges &outEdges) const;
+
+	/// Merges the two faces that share inEdge into the face inEdge->mFace
+	void				MergeFaces(Edge *inEdge);
+
+	/// Merges inFace with a neighbour if it is degenerate (a sliver)
+	void				MergeDegenerateFace(Face *inFace, Faces &ioAffectedFaces);
+
+	/// Merges any coplanar as well as neighbours that form a non-convex edge into inFace.
+	/// Faces are considered coplanar if the distance^2 of the other face's centroid is smaller than inToleranceSq.
+	void				MergeCoplanarOrConcaveFaces(Face *inFace, float inToleranceSq, Faces &ioAffectedFaces);
+
+	/// Mark face as affected if it is not already in the list
+	static void			sMarkAffected(Face *inFace, Faces &ioAffectedFaces);
+
+	/// Removes all invalid edges.
+	/// 1. Merges inFace with faces that share two edges with it since this means inFace or the other face cannot be convex or the edge is colinear.
+	/// 2. Removes edges that are interior to inFace (that have inFace on both sides)
+	/// Any faces that need to be checked for validity will be added to ioAffectedFaces.
+	void				RemoveInvalidEdges(Face *inFace, Faces &ioAffectedFaces);
+
+	/// Removes inFace if it consists of only 2 edges, linking its neighbouring faces together
+	/// Any faces that need to be checked for validity will be added to ioAffectedFaces.
+	/// @return True if face was removed.
+	bool				RemoveTwoEdgeFace(Face *inFace, Faces &ioAffectedFaces) const;
+
+#ifdef JPH_ENABLE_ASSERTS
+	/// Dumps the text representation of a face to the TTY
+	void				DumpFace(const Face *inFace) const;
+
+	/// Dumps the text representation of all faces to the TTY
+	void				DumpFaces() const;
+
+	/// Check consistency of 1 face
+	void				ValidateFace(const Face *inFace) const;
+
+	/// Check consistency of all faces
+	void				ValidateFaces() const;
+#endif
+
+#ifdef JPH_CONVEX_BUILDER_DEBUG
+	/// Draw state of algorithm
+	void				DrawState(bool inDrawConflictList = false) const;
+
+	/// Draw a face for debugging purposes
+	void				DrawWireFace(const Face *inFace, ColorArg inColor) const;
+
+	/// Draw an edge for debugging purposes
+	void				DrawEdge(const Edge *inEdge, ColorArg inColor) const;
+#endif
+
+#ifdef JPH_CONVEX_BUILDER_DUMP_SHAPE
+	void				DumpShape() const;
+#endif
+
+	const Positions &	mPositions;							///< List of positions (some of them are part of the hull)
+	Faces				mFaces;								///< List of faces that are part of the hull (if !mRemoved)
+
+	struct Coplanar
+	{
+		int				mPositionIdx;						///< Index in mPositions
+		float			mDistanceSq;						///< Distance to the edge of closest face (should be > 0)
+	};
+	using CoplanarList = Array<Coplanar>;
+
+	CoplanarList		mCoplanarList;						///< List of positions that are coplanar to a face but outside of the face, these are added to the hull at the end
+
+#ifdef JPH_CONVEX_BUILDER_DEBUG
+	int					mIteration;							///< Number of iterations we've had so far (for debug purposes)
+	mutable RVec3		mOffset;							///< Offset to use for state drawing
+	Vec3				mDelta;								///< Delta offset between next states
+#endif
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder2D.cpp
+++ b/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder2D.cpp
@@ -0,0 +1,335 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Geometry/ConvexHullBuilder2D.h>
+
+#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+	#include <Jolt/Renderer/DebugRenderer.h>
+#endif
+
+JPH_NAMESPACE_BEGIN
+
+void ConvexHullBuilder2D::Edge::CalculateNormalAndCenter(const Vec3 *inPositions)
+{
+	Vec3 p1 = inPositions[mStartIdx];
+	Vec3 p2 = inPositions[mNextEdge->mStartIdx];
+
+	// Center of edge
+	mCenter = 0.5f * (p1 + p2);
+
+	// Create outward pointing normal.
+	// We have two choices for the normal (which satisfies normal . edge = 0):
+	// normal1 = (-edge.y, edge.x, 0)
+	// normal2 = (edge.y, -edge.x, 0)
+	// We want (normal x edge).z > 0 so that the normal points out of the polygon. Only normal2 satisfies this condition.
+	Vec3 edge = p2 - p1;
+	mNormal = Vec3(edge.GetY(), -edge.GetX(), 0);
+}
+
+ConvexHullBuilder2D::ConvexHullBuilder2D(const Positions &inPositions) :
+	mPositions(inPositions)
+{
+#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+	// Center the drawing of the first hull around the origin and calculate the delta offset between states
+	mOffset = RVec3::sZero();
+	if (mPositions.empty())
+	{
+		// No hull will be generated
+		mDelta = Vec3::sZero();
+	}
+	else
+	{
+		Vec3 maxv = Vec3::sReplicate(-FLT_MAX), minv = Vec3::sReplicate(FLT_MAX);
+		for (Vec3 v : mPositions)
+		{
+			minv = Vec3::sMin(minv, v);
+			maxv = Vec3::sMax(maxv, v);
+			mOffset -= v;
+		}
+		mOffset /= Real(mPositions.size());
+		mDelta = Vec3((maxv - minv).GetX() + 0.5f, 0, 0);
+		mOffset += mDelta; // Don't start at origin, we're already drawing the final hull there
+	}
+#endif
+}
+
+ConvexHullBuilder2D::~ConvexHullBuilder2D()
+{
+	FreeEdges();
+}
+
+void ConvexHullBuilder2D::FreeEdges()
+{
+	if (mFirstEdge == nullptr)
+		return;
+
+	Edge *edge = mFirstEdge;
+	do
+	{
+		Edge *next = edge->mNextEdge;
+		delete edge;
+		edge = next;
+	} while (edge != mFirstEdge);
+
+	mFirstEdge = nullptr;
+	mNumEdges = 0;
+}
+
+#ifdef JPH_ENABLE_ASSERTS
+
+void ConvexHullBuilder2D::ValidateEdges() const
+{
+	if (mFirstEdge == nullptr)
+	{
+		JPH_ASSERT(mNumEdges == 0);
+		return;
+	}
+
+	int count = 0;
+
+	Edge *edge = mFirstEdge;
+	do
+	{
+		// Validate connectivity
+		JPH_ASSERT(edge->mNextEdge->mPrevEdge == edge);
+		JPH_ASSERT(edge->mPrevEdge->mNextEdge == edge);
+
+		++count;
+		edge = edge->mNextEdge;
+	} while (edge != mFirstEdge);
+
+	// Validate that count matches
+	JPH_ASSERT(count == mNumEdges);
+}
+
+#endif // JPH_ENABLE_ASSERTS
+
+void ConvexHullBuilder2D::AssignPointToEdge(int inPositionIdx, const Array<Edge *> &inEdges) const
+{
+	Vec3 point = mPositions[inPositionIdx];
+
+	Edge *best_edge = nullptr;
+	float best_dist_sq = 0.0f;
+
+	// Test against all edges
+	for (Edge *edge : inEdges)
+	{
+		// Determine distance to edge
+		float dot = edge->mNormal.Dot(point - edge->mCenter);
+		if (dot > 0.0f)
+		{
+			float dist_sq = dot * dot / edge->mNormal.LengthSq();
+			if (dist_sq > best_dist_sq)
+			{
+				best_edge = edge;
+				best_dist_sq = dist_sq;
+			}
+		}
+	}
+
+	// If this point is in front of the edge, add it to the conflict list
+	if (best_edge != nullptr)
+	{
+		if (best_dist_sq > best_edge->mFurthestPointDistanceSq)
+		{
+			// This point is further away than any others, update the distance and add point as last point
+			best_edge->mFurthestPointDistanceSq = best_dist_sq;
+			best_edge->mConflictList.push_back(inPositionIdx);
+		}
+		else
+		{
+			// Not the furthest point, add it as the before last point
+			best_edge->mConflictList.insert(best_edge->mConflictList.begin() + best_edge->mConflictList.size() - 1, inPositionIdx);
+		}
+	}
+}
+
+ConvexHullBuilder2D::EResult ConvexHullBuilder2D::Initialize(int inIdx1, int inIdx2, int inIdx3, int inMaxVertices, float inTolerance, Edges &outEdges)
+{
+	// Clear any leftovers
+	FreeEdges();
+	outEdges.clear();
+
+	// Reset flag
+	EResult result = EResult::Success;
+
+	// Determine a suitable tolerance for detecting that points are colinear
+	// Formula as per: Implementing Quickhull - Dirk Gregorius.
+	Vec3 vmax = Vec3::sZero();
+	for (Vec3 v : mPositions)
+		vmax = Vec3::sMax(vmax, v.Abs());
+	float colinear_tolerance_sq = Square(2.0f * FLT_EPSILON * (vmax.GetX() + vmax.GetY()));
+
+	// Increase desired tolerance if accuracy doesn't allow it
+	float tolerance_sq = max(colinear_tolerance_sq, Square(inTolerance));
+
+	// Start with the initial indices in counter clockwise order
+	float z = (mPositions[inIdx2] - mPositions[inIdx1]).Cross(mPositions[inIdx3] - mPositions[inIdx1]).GetZ();
+	if (z < 0.0f)
+		std::swap(inIdx1, inIdx2);
+
+	// Create and link edges
+	Edge *e1 = new Edge(inIdx1);
+	Edge *e2 = new Edge(inIdx2);
+	Edge *e3 = new Edge(inIdx3);
+	e1->mNextEdge = e2;
+	e1->mPrevEdge = e3;
+	e2->mNextEdge = e3;
+	e2->mPrevEdge = e1;
+	e3->mNextEdge = e1;
+	e3->mPrevEdge = e2;
+	mFirstEdge = e1;
+	mNumEdges = 3;
+
+	// Build the initial conflict lists
+	Array<Edge *> edges { e1, e2, e3 };
+	for (Edge *edge : edges)
+		edge->CalculateNormalAndCenter(mPositions.data());
+	for (int idx = 0; idx < (int)mPositions.size(); ++idx)
+		if (idx != inIdx1 && idx != inIdx2 && idx != inIdx3)
+			AssignPointToEdge(idx, edges);
+
+	JPH_IF_ENABLE_ASSERTS(ValidateEdges();)
+#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+	DrawState();
+#endif
+
+	// Add the remaining points to the hull
+	for (;;)
+	{
+		// Check if we've reached the max amount of vertices that are allowed
+		if (mNumEdges >= inMaxVertices)
+		{
+			result = EResult::MaxVerticesReached;
+			break;
+		}
+
+		// Find the edge with the furthest point on it
+		Edge *edge_with_furthest_point = nullptr;
+		float furthest_dist_sq = 0.0f;
+		Edge *edge = mFirstEdge;
+		do
+		{
+			if (edge->mFurthestPointDistanceSq > furthest_dist_sq)
+			{
+				furthest_dist_sq = edge->mFurthestPointDistanceSq;
+				edge_with_furthest_point = edge;
+			}
+			edge = edge->mNextEdge;
+		} while (edge != mFirstEdge);
+
+		// If there is none closer than our tolerance value, we're done
+		if (edge_with_furthest_point == nullptr || furthest_dist_sq < tolerance_sq)
+			break;
+
+		// Take the furthest point
+		int furthest_point_idx = edge_with_furthest_point->mConflictList.back();
+		edge_with_furthest_point->mConflictList.pop_back();
+		Vec3 furthest_point = mPositions[furthest_point_idx];
+
+		// Find the horizon of edges that need to be removed
+		Edge *first_edge = edge_with_furthest_point;
+		do
+		{
+			Edge *prev = first_edge->mPrevEdge;
+			if (!prev->IsFacing(furthest_point))
+				break;
+			first_edge = prev;
+		} while (first_edge != edge_with_furthest_point);
+
+		Edge *last_edge = edge_with_furthest_point;
+		do
+		{
+			Edge *next = last_edge->mNextEdge;
+			if (!next->IsFacing(furthest_point))
+				break;
+			last_edge = next;
+		} while (last_edge != edge_with_furthest_point);
+
+		// Create new edges
+		e1 = new Edge(first_edge->mStartIdx);
+		e2 = new Edge(furthest_point_idx);
+		e1->mNextEdge = e2;
+		e1->mPrevEdge = first_edge->mPrevEdge;
+		e2->mPrevEdge = e1;
+		e2->mNextEdge = last_edge->mNextEdge;
+		e1->mPrevEdge->mNextEdge = e1;
+		e2->mNextEdge->mPrevEdge = e2;
+		mFirstEdge = e1; // We could delete mFirstEdge so just update it to the newly created edge
+		mNumEdges += 2;
+
+		// Calculate normals
+		Array<Edge *> new_edges { e1, e2 };
+		for (Edge *new_edge : new_edges)
+			new_edge->CalculateNormalAndCenter(mPositions.data());
+
+		// Delete the old edges
+		for (;;)
+		{
+			Edge *next = first_edge->mNextEdge;
+
+			// Redistribute points in conflict list
+			for (int idx : first_edge->mConflictList)
+				AssignPointToEdge(idx, new_edges);
+
+			// Delete the old edge
+			delete first_edge;
+			--mNumEdges;
+
+			if (first_edge == last_edge)
+				break;
+			first_edge = next;
+		}
+
+		JPH_IF_ENABLE_ASSERTS(ValidateEdges();)
+	#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+		DrawState();
+	#endif
+	}
+
+	// Convert the edge list to a list of indices
+	outEdges.reserve(mNumEdges);
+	Edge *edge = mFirstEdge;
+	do
+	{
+		outEdges.push_back(edge->mStartIdx);
+		edge = edge->mNextEdge;
+	} while (edge != mFirstEdge);
+
+	return result;
+}
+
+#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+
+void ConvexHullBuilder2D::DrawState()
+{
+	int color_idx = 0;
+
+	const Edge *edge = mFirstEdge;
+	do
+	{
+		const Edge *next = edge->mNextEdge;
+
+		// Get unique color per edge
+		Color color = Color::sGetDistinctColor(color_idx++);
+
+		// Draw edge and normal
+		DebugRenderer::sInstance->DrawArrow(cDrawScale * (mOffset + mPositions[edge->mStartIdx]), cDrawScale * (mOffset + mPositions[next->mStartIdx]), color, 0.1f);
+		DebugRenderer::sInstance->DrawArrow(cDrawScale * (mOffset + edge->mCenter), cDrawScale * (mOffset + edge->mCenter) + edge->mNormal.NormalizedOr(Vec3::sZero()), Color::sGreen, 0.1f);
+
+		// Draw points that belong to this edge in the same color
+		for (int idx : edge->mConflictList)
+			DebugRenderer::sInstance->DrawMarker(cDrawScale * (mOffset + mPositions[idx]), color, 0.05f);
+
+		edge = next;
+	} while (edge != mFirstEdge);
+
+	mOffset += mDelta;
+}
+
+#endif
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder2D.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/ConvexHullBuilder2D.h
@@ -0,0 +1,105 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+
+//#define JPH_CONVEX_BUILDER_2D_DEBUG
+
+JPH_NAMESPACE_BEGIN
+
+/// A convex hull builder that tries to create 2D hulls as accurately as possible. Used for offline processing.
+class JPH_EXPORT ConvexHullBuilder2D : public NonCopyable
+{
+public:
+	using Positions = Array<Vec3>;
+	using Edges = Array<int>;
+
+	/// Constructor
+	/// @param inPositions Positions used to make the hull. Uses X and Y component of Vec3 only!
+	explicit			ConvexHullBuilder2D(const Positions &inPositions);
+
+	/// Destructor
+						~ConvexHullBuilder2D();
+
+	/// Result enum that indicates how the hull got created
+	enum class EResult
+	{
+		Success,													///< Hull building finished successfully
+		MaxVerticesReached,											///< Hull building finished successfully, but the desired accuracy was not reached because the max vertices limit was reached
+	};
+
+	/// Takes all positions as provided by the constructor and use them to build a hull
+	/// Any points that are closer to the hull than inTolerance will be discarded
+	/// @param inIdx1 , inIdx2 , inIdx3 The indices to use as initial hull (in any order)
+	/// @param inMaxVertices Max vertices to allow in the hull. Specify INT_MAX if there is no limit.
+	/// @param inTolerance Max distance that a point is allowed to be outside of the hull
+	/// @param outEdges On success this will contain the list of indices that form the hull (counter clockwise)
+	/// @return Status code that reports if the hull was created or not
+	EResult				Initialize(int inIdx1, int inIdx2, int inIdx3, int inMaxVertices, float inTolerance, Edges &outEdges);
+
+private:
+#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+	/// Factor to scale convex hull when debug drawing the construction process
+	static constexpr Real cDrawScale = 10;
+#endif
+
+	class Edge;
+
+	/// Frees all edges
+	void				FreeEdges();
+
+	/// Assigns a position to one of the supplied edges based on which edge is closest.
+	/// @param inPositionIdx Index of the position to add
+	/// @param inEdges List of edges to consider
+	void				AssignPointToEdge(int inPositionIdx, const Array<Edge *> &inEdges) const;
+
+#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+	/// Draw state of algorithm
+	void				DrawState();
+#endif
+
+#ifdef JPH_ENABLE_ASSERTS
+	/// Validate that the edge structure is intact
+	void				ValidateEdges() const;
+#endif
+
+	using ConflictList = Array<int>;
+
+	/// Linked list of edges
+	class Edge
+	{
+	public:
+		JPH_OVERRIDE_NEW_DELETE
+
+		/// Constructor
+		explicit		Edge(int inStartIdx)						: mStartIdx(inStartIdx) { }
+
+		/// Calculate the center of the edge and the edge normal
+		void			CalculateNormalAndCenter(const Vec3 *inPositions);
+
+		/// Check if this edge is facing inPosition
+		inline bool		IsFacing(Vec3Arg inPosition) const			{ return mNormal.Dot(inPosition - mCenter) > 0.0f; }
+
+		Vec3			mNormal;									///< Normal of the edge (not normalized)
+		Vec3			mCenter;									///< Center of the edge
+		ConflictList	mConflictList;								///< Positions associated with this edge (that are closest to this edge). Last entry is the one furthest away from the edge, remainder is unsorted.
+		Edge *			mPrevEdge = nullptr;						///< Previous edge in circular list
+		Edge *			mNextEdge = nullptr;						///< Next edge in circular list
+		int				mStartIdx;									///< Position index of start of this edge
+		float			mFurthestPointDistanceSq = 0.0f;			///< Squared distance of furthest point from the conflict list to the edge
+	};
+
+	const Positions &	mPositions;									///< List of positions (some of them are part of the hull)
+	Edge *				mFirstEdge = nullptr;						///< First edge of the hull
+	int					mNumEdges = 0;								///< Number of edges in hull
+
+#ifdef JPH_CONVEX_BUILDER_2D_DEBUG
+	RVec3				mOffset;									///< Offset to use for state drawing
+	Vec3				mDelta;										///< Delta offset between next states
+#endif
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/ConvexSupport.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/ConvexSupport.h
@@ -0,0 +1,188 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Math/Mat44.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Helper functions to get the support point for a convex object
+/// Structure that transforms a convex object (supports only uniform scaling)
+template <typename ConvexObject>
+struct TransformedConvexObject
+{
+	/// Create transformed convex object.
+	TransformedConvexObject(Mat44Arg inTransform, const ConvexObject &inObject) :
+		mTransform(inTransform),
+		mObject(inObject)
+	{
+	}
+
+	/// Calculate the support vector for this convex shape.
+	Vec3					GetSupport(Vec3Arg inDirection) const
+	{
+		return mTransform * mObject.GetSupport(mTransform.Multiply3x3Transposed(inDirection));
+	}
+
+	/// Get the vertices of the face that faces inDirection the most
+	template <class VERTEX_ARRAY>
+	void					GetSupportingFace(Vec3Arg inDirection, VERTEX_ARRAY &outVertices) const
+	{
+		mObject.GetSupportingFace(mTransform.Multiply3x3Transposed(inDirection), outVertices);
+
+		for (Vec3 &v : outVertices)
+			v = mTransform * v;
+	}
+
+	Mat44					mTransform;
+	const ConvexObject &	mObject;
+};
+
+/// Structure that adds a convex radius
+template <typename ConvexObject>
+struct AddConvexRadius
+{
+	AddConvexRadius(const ConvexObject &inObject, float inRadius) :
+		mObject(inObject),
+		mRadius(inRadius)
+	{
+	}
+
+	/// Calculate the support vector for this convex shape.
+	Vec3					GetSupport(Vec3Arg inDirection) const
+	{
+		float length = inDirection.Length();
+		return length > 0.0f ? mObject.GetSupport(inDirection) + (mRadius / length) * inDirection : mObject.GetSupport(inDirection);
+	}
+
+	const ConvexObject &	mObject;
+	float					mRadius;
+};
+
+/// Structure that performs a Minkowski difference A - B
+template <typename ConvexObjectA, typename ConvexObjectB>
+struct MinkowskiDifference
+{
+	MinkowskiDifference(const ConvexObjectA &inObjectA, const ConvexObjectB &inObjectB) :
+		mObjectA(inObjectA),
+		mObjectB(inObjectB)
+	{
+	}
+
+	/// Calculate the support vector for this convex shape.
+	Vec3					GetSupport(Vec3Arg inDirection) const
+	{
+		return mObjectA.GetSupport(inDirection) - mObjectB.GetSupport(-inDirection);
+	}
+
+	const ConvexObjectA &	mObjectA;
+	const ConvexObjectB &	mObjectB;
+};
+
+/// Class that wraps a point so that it can be used with convex collision detection
+struct PointConvexSupport
+{
+	/// Calculate the support vector for this convex shape.
+	Vec3					GetSupport([[maybe_unused]] Vec3Arg inDirection) const
+	{
+		return mPoint;
+	}
+
+	Vec3					mPoint;
+};
+
+/// Class that wraps a triangle so that it can used with convex collision detection
+struct TriangleConvexSupport
+{
+	/// Constructor
+							TriangleConvexSupport(Vec3Arg inV1, Vec3Arg inV2, Vec3Arg inV3) :
+		mV1(inV1),
+		mV2(inV2),
+		mV3(inV3)
+	{
+	}
+
+	/// Calculate the support vector for this convex shape.
+	Vec3					GetSupport(Vec3Arg inDirection) const
+	{
+		// Project vertices on inDirection
+		float d1 = mV1.Dot(inDirection);
+		float d2 = mV2.Dot(inDirection);
+		float d3 = mV3.Dot(inDirection);
+
+		// Return vertex with biggest projection
+		if (d1 > d2)
+		{
+			if (d1 > d3)
+				return mV1;
+			else
+				return mV3;
+		}
+		else
+		{
+			if (d2 > d3)
+				return mV2;
+			else
+				return mV3;
+		}
+	}
+
+	/// Get the vertices of the face that faces inDirection the most
+	template <class VERTEX_ARRAY>
+	void					GetSupportingFace([[maybe_unused]] Vec3Arg inDirection, VERTEX_ARRAY &outVertices) const
+	{
+		outVertices.push_back(mV1);
+		outVertices.push_back(mV2);
+		outVertices.push_back(mV3);
+	}
+
+	/// The three vertices of the triangle
+	Vec3					mV1;
+	Vec3					mV2;
+	Vec3					mV3;
+};
+
+/// Class that wraps a polygon so that it can used with convex collision detection
+template <class VERTEX_ARRAY>
+struct PolygonConvexSupport
+{
+	/// Constructor
+	explicit				PolygonConvexSupport(const VERTEX_ARRAY &inVertices) :
+		mVertices(inVertices)
+	{
+	}
+
+	/// Calculate the support vector for this convex shape.
+	Vec3					GetSupport(Vec3Arg inDirection) const
+	{
+		Vec3 support_point = mVertices[0];
+		float best_dot = mVertices[0].Dot(inDirection);
+
+		for (typename VERTEX_ARRAY::const_iterator v = mVertices.begin() + 1; v < mVertices.end(); ++v)
+		{
+			float dot = v->Dot(inDirection);
+			if (dot > best_dot)
+			{
+				best_dot = dot;
+				support_point = *v;
+			}
+		}
+
+		return support_point;
+	}
+
+	/// Get the vertices of the face that faces inDirection the most
+	template <class VERTEX_ARRAY_ARG>
+	void					GetSupportingFace([[maybe_unused]] Vec3Arg inDirection, VERTEX_ARRAY_ARG &outVertices) const
+	{
+		for (Vec3 v : mVertices)
+			outVertices.push_back(v);
+	}
+
+	/// The vertices of the polygon
+	const VERTEX_ARRAY &	mVertices;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/EPAConvexHullBuilder.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/EPAConvexHullBuilder.h
@@ -0,0 +1,845 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+// Define to validate the integrity of the hull structure
+//#define JPH_EPA_CONVEX_BUILDER_VALIDATE
+
+// Define to draw the building of the hull for debugging purposes
+//#define JPH_EPA_CONVEX_BUILDER_DRAW
+
+#include <Jolt/Core/NonCopyable.h>
+#include <Jolt/Core/BinaryHeap.h>
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+	#include <Jolt/Renderer/DebugRenderer.h>
+	#include <Jolt/Core/StringTools.h>
+#endif
+
+JPH_NAMESPACE_BEGIN
+
+/// A convex hull builder specifically made for the EPA penetration depth calculation. It trades accuracy for speed and will simply abort of the hull forms defects due to numerical precision problems.
+class EPAConvexHullBuilder : public NonCopyable
+{
+private:
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+	/// Factor to scale convex hull when debug drawing the construction process
+	static constexpr Real cDrawScale = 10;
+#endif
+
+public:
+	// Due to the Euler characteristic (https://en.wikipedia.org/wiki/Euler_characteristic) we know that Vertices - Edges + Faces = 2
+	// In our case we only have triangles and they are always fully connected, so each edge is shared exactly between 2 faces: Edges = Faces * 3 / 2
+	// Substituting: Vertices = Faces / 2 + 2 which is approximately Faces / 2.
+	static constexpr int cMaxTriangles = 256;				///< Max triangles in hull
+	static constexpr int cMaxPoints = cMaxTriangles / 2;	///< Max number of points in hull
+
+	// Constants
+	static constexpr int cMaxEdgeLength = 128;				///< Max number of edges in FindEdge
+	static constexpr float cMinTriangleArea = 1.0e-10f;		///< Minimum area of a triangle before, if smaller than this it will not be added to the priority queue
+	static constexpr float cBarycentricEpsilon = 1.0e-3f;	///< Epsilon value used to determine if a point is in the interior of a triangle
+
+	// Forward declare
+	class Triangle;
+
+	/// Class that holds the information of an edge
+	class Edge
+	{
+	public:
+		/// Information about neighbouring triangle
+		Triangle *		mNeighbourTriangle;					///< Triangle that neighbours this triangle
+		int				mNeighbourEdge;						///< Index in mEdge that specifies edge that this Edge is connected to
+
+		int				mStartIdx;							///< Vertex index in mPositions that indicates the start vertex of this edge
+	};
+
+	using Edges = StaticArray<Edge, cMaxEdgeLength>;
+	using NewTriangles = StaticArray<Triangle *, cMaxEdgeLength>;
+
+	/// Class that holds the information of one triangle
+	class Triangle : public NonCopyable
+	{
+	public:
+		/// Constructor
+		inline			Triangle(int inIdx0, int inIdx1, int inIdx2, const Vec3 *inPositions);
+
+		/// Check if triangle is facing inPosition
+		inline bool		IsFacing(Vec3Arg inPosition) const
+		{
+			JPH_ASSERT(!mRemoved);
+			return mNormal.Dot(inPosition - mCentroid) > 0.0f;
+		}
+
+		/// Check if triangle is facing the origin
+		inline bool		IsFacingOrigin() const
+		{
+			JPH_ASSERT(!mRemoved);
+			return mNormal.Dot(mCentroid) < 0.0f;
+		}
+
+		/// Get the next edge of edge inIndex
+		inline const Edge & GetNextEdge(int inIndex) const
+		{
+			return mEdge[(inIndex + 1) % 3];
+		}
+
+		Edge			mEdge[3];							///< 3 edges of this triangle
+		Vec3			mNormal;							///< Normal of this triangle, length is 2 times area of triangle
+		Vec3			mCentroid;							///< Center of the triangle
+		float			mClosestLenSq = FLT_MAX;			///< Closest distance^2 from origin to triangle
+		float			mLambda[2];							///< Barycentric coordinates of closest point to origin on triangle
+		bool			mLambdaRelativeTo0;					///< How to calculate the closest point, true: y0 + l0 * (y1 - y0) + l1 * (y2 - y0), false: y1 + l0 * (y0 - y1) + l1 * (y2 - y1)
+		bool			mClosestPointInterior = false;		///< Flag that indicates that the closest point from this triangle to the origin is an interior point
+		bool			mRemoved = false;					///< Flag that indicates that triangle has been removed
+		bool			mInQueue = false;					///< Flag that indicates that this triangle was placed in the sorted heap (stays true after it is popped because the triangle is freed by the main EPA algorithm loop)
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		int				mIteration;							///< Iteration that this triangle was created
+#endif
+	};
+
+	/// Factory that creates triangles in a fixed size buffer
+	class TriangleFactory : public NonCopyable
+	{
+	private:
+		/// Struct that stores both a triangle or a next pointer in case the triangle is unused
+		union alignas(Triangle) Block
+		{
+			uint8		mTriangle[sizeof(Triangle)];
+			Block *		mNextFree;
+		};
+
+		/// Storage for triangle data
+		Block			mTriangles[cMaxTriangles];			///< Storage for triangles
+		Block *			mNextFree = nullptr;				///< List of free triangles
+		int				mHighWatermark = 0;					///< High water mark for used triangles (if mNextFree == nullptr we can take one from here)
+
+	public:
+		/// Return all triangles to the free pool
+		void			Clear()
+		{
+			mNextFree = nullptr;
+			mHighWatermark = 0;
+		}
+
+		/// Allocate a new triangle with 3 indexes
+		Triangle *		CreateTriangle(int inIdx0, int inIdx1, int inIdx2, const Vec3 *inPositions)
+		{
+			Triangle *t;
+			if (mNextFree != nullptr)
+			{
+				// Entry available from the free list
+				t = reinterpret_cast<Triangle *>(&mNextFree->mTriangle);
+				mNextFree = mNextFree->mNextFree;
+			}
+			else
+			{
+				// Allocate from never used before triangle store
+				if (mHighWatermark >= cMaxTriangles)
+					return nullptr; // Buffer full
+				t = reinterpret_cast<Triangle *>(&mTriangles[mHighWatermark].mTriangle);
+				++mHighWatermark;
+			}
+
+			// Call constructor
+			new (t) Triangle(inIdx0, inIdx1, inIdx2, inPositions);
+
+			return t;
+		}
+
+		/// Free a triangle
+		void			FreeTriangle(Triangle *inT)
+		{
+			// Destruct triangle
+			inT->~Triangle();
+#ifdef JPH_DEBUG
+			memset(inT, 0xcd, sizeof(Triangle));
+#endif
+
+			// Add triangle to the free list
+			Block *tu = reinterpret_cast<Block *>(inT);
+			tu->mNextFree = mNextFree;
+			mNextFree = tu;
+		}
+	};
+
+	// Typedefs
+	using PointsBase = StaticArray<Vec3, cMaxPoints>;
+	using Triangles = StaticArray<Triangle *, cMaxTriangles>;
+
+	/// Specialized points list that allows direct access to the size
+	class Points : public PointsBase
+	{
+	public:
+		size_type &		GetSizeRef()
+		{
+			return mSize;
+		}
+	};
+
+	/// Specialized triangles list that keeps them sorted on closest distance to origin
+	class TriangleQueue : public Triangles
+	{
+	public:
+		/// Function to sort triangles on closest distance to origin
+		static bool		sTriangleSorter(const Triangle *inT1, const Triangle *inT2)
+		{
+			return inT1->mClosestLenSq > inT2->mClosestLenSq;
+		}
+
+		/// Add triangle to the list
+		void			push_back(Triangle *inT)
+		{
+			// Add to base
+			Triangles::push_back(inT);
+
+			// Mark in queue
+			inT->mInQueue = true;
+
+			// Resort heap
+			BinaryHeapPush(begin(), end(), sTriangleSorter);
+		}
+
+		/// Peek the next closest triangle without removing it
+		Triangle *		PeekClosest()
+		{
+			return front();
+		}
+
+		/// Get next closest triangle
+		Triangle *		PopClosest()
+		{
+			// Move closest to end
+			BinaryHeapPop(begin(), end(), sTriangleSorter);
+
+			// Remove last triangle
+			Triangle *t = back();
+			pop_back();
+			return t;
+		}
+	};
+
+	/// Constructor
+	explicit			EPAConvexHullBuilder(const Points &inPositions) :
+		mPositions(inPositions)
+	{
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		mIteration = 0;
+		mOffset = RVec3::sZero();
+#endif
+	}
+
+	/// Initialize the hull with 3 points
+	void				Initialize(int inIdx1, int inIdx2, int inIdx3)
+	{
+		// Release triangles
+		mFactory.Clear();
+
+		// Create triangles (back to back)
+		Triangle *t1 = CreateTriangle(inIdx1, inIdx2, inIdx3);
+		Triangle *t2 = CreateTriangle(inIdx1, inIdx3, inIdx2);
+
+		// Link triangles edges
+		sLinkTriangle(t1, 0, t2, 2);
+		sLinkTriangle(t1, 1, t2, 1);
+		sLinkTriangle(t1, 2, t2, 0);
+
+		// Always add both triangles to the priority queue
+		mTriangleQueue.push_back(t1);
+		mTriangleQueue.push_back(t2);
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		// Draw current state
+		DrawState();
+
+		// Increment iteration counter
+		++mIteration;
+#endif
+	}
+
+	/// Check if there's another triangle to process from the queue
+	bool				HasNextTriangle() const
+	{
+		return !mTriangleQueue.empty();
+	}
+
+	/// Access to the next closest triangle to the origin (won't remove it from the queue).
+	Triangle *			PeekClosestTriangleInQueue()
+	{
+		return mTriangleQueue.PeekClosest();
+	}
+
+	/// Access to the next closest triangle to the origin and remove it from the queue.
+	Triangle *			PopClosestTriangleFromQueue()
+	{
+		return mTriangleQueue.PopClosest();
+	}
+
+	/// Find the triangle on which inPosition is the furthest to the front
+	/// Note this function works as long as all points added have been added with AddPoint(..., FLT_MAX).
+	Triangle *			FindFacingTriangle(Vec3Arg inPosition, float &outBestDistSq)
+	{
+		Triangle *best = nullptr;
+		float best_dist_sq = 0.0f;
+
+		for (Triangle *t : mTriangleQueue)
+			if (!t->mRemoved)
+			{
+				float dot = t->mNormal.Dot(inPosition - t->mCentroid);
+				if (dot > 0.0f)
+				{
+					float dist_sq = dot * dot / t->mNormal.LengthSq();
+					if (dist_sq > best_dist_sq)
+					{
+						best = t;
+						best_dist_sq = dist_sq;
+					}
+				}
+			}
+
+		outBestDistSq = best_dist_sq;
+		return best;
+	}
+
+	/// Add a new point to the convex hull
+	bool				AddPoint(Triangle *inFacingTriangle, int inIdx, float inClosestDistSq, NewTriangles &outTriangles)
+	{
+		// Get position
+		Vec3 pos = mPositions[inIdx];
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		// Draw new support point
+		DrawMarker(pos, Color::sYellow, 1.0f);
+#endif
+
+#ifdef JPH_EPA_CONVEX_BUILDER_VALIDATE
+		// Check if structure is intact
+		ValidateTriangles();
+#endif
+
+		// Find edge of convex hull of triangles that are not facing the new vertex w
+		Edges edges;
+		if (!FindEdge(inFacingTriangle, pos, edges))
+			return false;
+
+		// Create new triangles
+		int num_edges = edges.size();
+		for (int i = 0; i < num_edges; ++i)
+		{
+			// Create new triangle
+			Triangle *nt = CreateTriangle(edges[i].mStartIdx, edges[(i + 1) % num_edges].mStartIdx, inIdx);
+			if (nt == nullptr)
+				return false;
+			outTriangles.push_back(nt);
+
+			// Check if we need to put this triangle in the priority queue
+			if ((nt->mClosestPointInterior && nt->mClosestLenSq < inClosestDistSq)	// For the main algorithm
+				|| nt->mClosestLenSq < 0.0f)										// For when the origin is not inside the hull yet
+				mTriangleQueue.push_back(nt);
+		}
+
+		// Link edges
+		for (int i = 0; i < num_edges; ++i)
+		{
+			sLinkTriangle(outTriangles[i], 0, edges[i].mNeighbourTriangle, edges[i].mNeighbourEdge);
+			sLinkTriangle(outTriangles[i], 1, outTriangles[(i + 1) % num_edges], 2);
+		}
+
+#ifdef JPH_EPA_CONVEX_BUILDER_VALIDATE
+		// Check if structure is intact
+		ValidateTriangles();
+#endif
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		// Draw state of the hull
+		DrawState();
+
+		// Increment iteration counter
+		++mIteration;
+#endif
+
+		return true;
+	}
+
+	/// Free a triangle
+	void				FreeTriangle(Triangle *inT)
+	{
+#ifdef JPH_ENABLE_ASSERTS
+		// Make sure that this triangle is not connected
+		JPH_ASSERT(inT->mRemoved);
+		for (const Edge &e : inT->mEdge)
+			JPH_ASSERT(e.mNeighbourTriangle == nullptr);
+#endif
+
+#if defined(JPH_EPA_CONVEX_BUILDER_VALIDATE) || defined(JPH_EPA_CONVEX_BUILDER_DRAW)
+		// Remove from list of all triangles
+		Triangles::iterator i = std::find(mTriangles.begin(), mTriangles.end(), inT);
+		JPH_ASSERT(i != mTriangles.end());
+		mTriangles.erase(i);
+#endif
+
+		mFactory.FreeTriangle(inT);
+	}
+
+private:
+	/// Create a new triangle
+	Triangle *			CreateTriangle(int inIdx1, int inIdx2, int inIdx3)
+	{
+		// Call provider to create triangle
+		Triangle *t = mFactory.CreateTriangle(inIdx1, inIdx2, inIdx3, mPositions.data());
+		if (t == nullptr)
+			return nullptr;
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		// Remember iteration counter
+		t->mIteration = mIteration;
+#endif
+
+#if defined(JPH_EPA_CONVEX_BUILDER_VALIDATE) || defined(JPH_EPA_CONVEX_BUILDER_DRAW)
+		// Add to list of triangles for debugging purposes
+		mTriangles.push_back(t);
+#endif
+
+		return t;
+	}
+
+	/// Link triangle edge to other triangle edge
+	static void			sLinkTriangle(Triangle *inT1, int inEdge1, Triangle *inT2, int inEdge2)
+	{
+		JPH_ASSERT(inEdge1 >= 0 && inEdge1 < 3);
+		JPH_ASSERT(inEdge2 >= 0 && inEdge2 < 3);
+		Edge &e1 = inT1->mEdge[inEdge1];
+		Edge &e2 = inT2->mEdge[inEdge2];
+
+		// Check not connected yet
+		JPH_ASSERT(e1.mNeighbourTriangle == nullptr);
+		JPH_ASSERT(e2.mNeighbourTriangle == nullptr);
+
+		// Check vertices match
+		JPH_ASSERT(e1.mStartIdx == inT2->GetNextEdge(inEdge2).mStartIdx);
+		JPH_ASSERT(e2.mStartIdx == inT1->GetNextEdge(inEdge1).mStartIdx);
+
+		// Link up
+		e1.mNeighbourTriangle = inT2;
+		e1.mNeighbourEdge = inEdge2;
+		e2.mNeighbourTriangle = inT1;
+		e2.mNeighbourEdge = inEdge1;
+	}
+
+	/// Unlink this triangle
+	void				UnlinkTriangle(Triangle *inT)
+	{
+		// Unlink from neighbours
+		for (int i = 0; i < 3; ++i)
+		{
+			Edge &edge = inT->mEdge[i];
+			if (edge.mNeighbourTriangle != nullptr)
+			{
+				Edge &neighbour_edge = edge.mNeighbourTriangle->mEdge[edge.mNeighbourEdge];
+
+				// Validate that neighbour points to us
+				JPH_ASSERT(neighbour_edge.mNeighbourTriangle == inT);
+				JPH_ASSERT(neighbour_edge.mNeighbourEdge == i);
+
+				// Unlink
+				neighbour_edge.mNeighbourTriangle = nullptr;
+				edge.mNeighbourTriangle = nullptr;
+			}
+		}
+
+		// If this triangle is not in the priority queue, we can delete it now
+		if (!inT->mInQueue)
+			FreeTriangle(inT);
+	}
+
+	/// Given one triangle that faces inVertex, find the edges of the triangles that are not facing inVertex.
+	/// Will flag all those triangles for removal.
+	bool				FindEdge(Triangle *inFacingTriangle, Vec3Arg inVertex, Edges &outEdges)
+	{
+		// Assert that we were given an empty array
+		JPH_ASSERT(outEdges.empty());
+
+		// Should start with a facing triangle
+		JPH_ASSERT(inFacingTriangle->IsFacing(inVertex));
+
+		// Flag as removed
+		inFacingTriangle->mRemoved = true;
+
+		// Instead of recursing, we build our own stack with the information we need
+		struct StackEntry
+		{
+			Triangle *	mTriangle;
+			int			mEdge;
+			int			mIter;
+		};
+		StackEntry stack[cMaxEdgeLength];
+		int cur_stack_pos = 0;
+
+		// Start with the triangle / edge provided
+		stack[0].mTriangle = inFacingTriangle;
+		stack[0].mEdge = 0;
+		stack[0].mIter = -1; // Start with edge 0 (is incremented below before use)
+
+		// Next index that we expect to find, if we don't then there are 'islands'
+		int next_expected_start_idx = -1;
+
+		for (;;)
+		{
+			StackEntry &cur_entry = stack[cur_stack_pos];
+
+			// Next iteration
+			if (++cur_entry.mIter >= 3)
+			{
+				// This triangle needs to be removed, unlink it now
+				UnlinkTriangle(cur_entry.mTriangle);
+
+				// Pop from stack
+				if (--cur_stack_pos < 0)
+					break;
+			}
+			else
+			{
+				// Visit neighbour
+				Edge &e = cur_entry.mTriangle->mEdge[(cur_entry.mEdge + cur_entry.mIter) % 3];
+				Triangle *n = e.mNeighbourTriangle;
+				if (n != nullptr && !n->mRemoved)
+				{
+					// Check if vertex is on the front side of this triangle
+					if (n->IsFacing(inVertex))
+					{
+						// Vertex on front, this triangle needs to be removed
+						n->mRemoved = true;
+
+						// Add element to the stack of elements to visit
+						cur_stack_pos++;
+						JPH_ASSERT(cur_stack_pos < cMaxEdgeLength);
+						StackEntry &new_entry = stack[cur_stack_pos];
+						new_entry.mTriangle = n;
+						new_entry.mEdge = e.mNeighbourEdge;
+						new_entry.mIter = 0; // Is incremented before use, we don't need to test this edge again since we came from it
+					}
+					else
+					{
+						// Detect if edge doesn't connect to previous edge, if this happens we have found and 'island' which means
+						// the newly added point is so close to the triangles of the hull that we classified some (nearly) coplanar
+						// triangles as before and some behind the point. At this point we just abort adding the point because
+						// we've reached numerical precision.
+						// Note that we do not need to test if the first and last edge connect, since when there are islands
+						// there should be at least 2 disconnects.
+						if (e.mStartIdx != next_expected_start_idx && next_expected_start_idx != -1)
+							return false;
+
+						// Next expected index is the start index of our neighbour's edge
+						next_expected_start_idx = n->mEdge[e.mNeighbourEdge].mStartIdx;
+
+						// Vertex behind, keep edge
+						outEdges.push_back(e);
+					}
+				}
+			}
+		}
+
+		// Assert that we have a fully connected loop
+		JPH_ASSERT(outEdges.empty() || outEdges[0].mStartIdx == next_expected_start_idx);
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		// Draw edge of facing triangles
+		for (int i = 0; i < (int)outEdges.size(); ++i)
+		{
+			RVec3 edge_start = cDrawScale * (mOffset + mPositions[outEdges[i].mStartIdx]);
+			DebugRenderer::sInstance->DrawArrow(edge_start, cDrawScale * (mOffset + mPositions[outEdges[(i + 1) % outEdges.size()].mStartIdx]), Color::sYellow, 0.01f);
+			DebugRenderer::sInstance->DrawText3D(edge_start, ConvertToString(outEdges[i].mStartIdx), Color::sWhite);
+		}
+
+		// Draw the state with the facing triangles removed
+		DrawState();
+#endif
+
+		// When we start with two triangles facing away from each other and adding a point that is on the plane,
+		// sometimes we consider the point in front of both causing both triangles to be removed resulting in an empty edge list.
+		// In this case we fail to add the point which will result in no collision reported (the shapes are contacting in 1 point so there's 0 penetration)
+		return outEdges.size() >= 3;
+	}
+
+#ifdef JPH_EPA_CONVEX_BUILDER_VALIDATE
+	/// Check consistency of 1 triangle
+	void				ValidateTriangle(const Triangle *inT) const
+	{
+		if (inT->mRemoved)
+		{
+			// Validate that removed triangles are not connected to anything
+			for (const Edge &my_edge : inT->mEdge)
+				JPH_ASSERT(my_edge.mNeighbourTriangle == nullptr);
+		}
+		else
+		{
+			for (int i = 0; i < 3; ++i)
+			{
+				const Edge &my_edge = inT->mEdge[i];
+
+				// Assert that we have a neighbour
+				const Triangle *nb = my_edge.mNeighbourTriangle;
+				JPH_ASSERT(nb != nullptr);
+
+				if (nb != nullptr)
+				{
+					// Assert that our neighbours edge points to us
+					const Edge &nb_edge = nb->mEdge[my_edge.mNeighbourEdge];
+					JPH_ASSERT(nb_edge.mNeighbourTriangle == inT);
+					JPH_ASSERT(nb_edge.mNeighbourEdge == i);
+
+					// Assert that the next edge of the neighbour points to the same vertex as this edge's vertex
+					const Edge &nb_next_edge = nb->GetNextEdge(my_edge.mNeighbourEdge);
+					JPH_ASSERT(nb_next_edge.mStartIdx == my_edge.mStartIdx);
+
+					// Assert that my next edge points to the same vertex as my neighbours vertex
+					const Edge &my_next_edge = inT->GetNextEdge(i);
+					JPH_ASSERT(my_next_edge.mStartIdx == nb_edge.mStartIdx);
+				}
+			}
+		}
+	}
+
+	/// Check consistency of all triangles
+	void				ValidateTriangles() const
+	{
+		for (const Triangle *t : mTriangles)
+			ValidateTriangle(t);
+	}
+#endif
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+public:
+	/// Draw state of algorithm
+	void				DrawState()
+	{
+		// Draw origin
+		DebugRenderer::sInstance->DrawCoordinateSystem(RMat44::sTranslation(cDrawScale * mOffset), 1.0f);
+
+		// Draw triangles
+		for (const Triangle *t : mTriangles)
+			if (!t->mRemoved)
+			{
+				// Calculate the triangle vertices
+				RVec3 p1 = cDrawScale * (mOffset + mPositions[t->mEdge[0].mStartIdx]);
+				RVec3 p2 = cDrawScale * (mOffset + mPositions[t->mEdge[1].mStartIdx]);
+				RVec3 p3 = cDrawScale * (mOffset + mPositions[t->mEdge[2].mStartIdx]);
+
+				// Draw triangle
+				DebugRenderer::sInstance->DrawTriangle(p1, p2, p3, Color::sGetDistinctColor(t->mIteration));
+				DebugRenderer::sInstance->DrawWireTriangle(p1, p2, p3, Color::sGrey);
+
+				// Draw normal
+				RVec3 centroid = cDrawScale * (mOffset + t->mCentroid);
+				float len = t->mNormal.Length();
+				if (len > 0.0f)
+					DebugRenderer::sInstance->DrawArrow(centroid, centroid + t->mNormal / len, Color::sDarkGreen, 0.01f);
+			}
+
+		// Determine max position
+		float min_x = FLT_MAX;
+		float max_x = -FLT_MAX;
+		for (Vec3 p : mPositions)
+		{
+			min_x = min(min_x, p.GetX());
+			max_x = max(max_x, p.GetX());
+		}
+
+		// Offset to the right
+		mOffset += Vec3(max_x - min_x + 0.5f, 0.0f, 0.0f);
+	}
+
+	/// Draw a label to indicate the next stage in the algorithm
+	void				DrawLabel(const string_view &inText)
+	{
+		DebugRenderer::sInstance->DrawText3D(cDrawScale * mOffset, inText, Color::sWhite, 0.1f * cDrawScale);
+
+		mOffset += Vec3(5.0f, 0.0f, 0.0f);
+	}
+
+	/// Draw geometry for debugging purposes
+	void				DrawGeometry(const DebugRenderer::GeometryRef &inGeometry, ColorArg inColor)
+	{
+		RMat44 origin = RMat44::sScale(Vec3::sReplicate(cDrawScale)) * RMat44::sTranslation(mOffset);
+		DebugRenderer::sInstance->DrawGeometry(origin, inGeometry->mBounds.Transformed(origin), inGeometry->mBounds.GetExtent().LengthSq(), inColor, inGeometry);
+
+		mOffset += Vec3(inGeometry->mBounds.GetSize().GetX(), 0, 0);
+	}
+
+	/// Draw a triangle for debugging purposes
+	void				DrawWireTriangle(const Triangle &inTriangle, ColorArg inColor)
+	{
+		RVec3 prev = cDrawScale * (mOffset + mPositions[inTriangle.mEdge[2].mStartIdx]);
+		for (const Edge &edge : inTriangle.mEdge)
+		{
+			RVec3 cur = cDrawScale * (mOffset + mPositions[edge.mStartIdx]);
+			DebugRenderer::sInstance->DrawArrow(prev, cur, inColor, 0.01f);
+			prev = cur;
+		}
+	}
+
+	/// Draw a marker for debugging purposes
+	void				DrawMarker(Vec3Arg inPosition, ColorArg inColor, float inSize)
+	{
+		DebugRenderer::sInstance->DrawMarker(cDrawScale * (mOffset + inPosition), inColor, inSize);
+	}
+
+	/// Draw an arrow for debugging purposes
+	void				DrawArrow(Vec3Arg inFrom, Vec3Arg inTo, ColorArg inColor, float inArrowSize)
+	{
+		DebugRenderer::sInstance->DrawArrow(cDrawScale * (mOffset + inFrom), cDrawScale * (mOffset + inTo), inColor, inArrowSize);
+	}
+#endif
+
+private:
+	TriangleFactory		mFactory;							///< Factory to create new triangles and remove old ones
+	const Points &		mPositions;							///< List of positions (some of them are part of the hull)
+	TriangleQueue		mTriangleQueue;						///< List of triangles that are part of the hull that still need to be checked (if !mRemoved)
+
+#if defined(JPH_EPA_CONVEX_BUILDER_VALIDATE) || defined(JPH_EPA_CONVEX_BUILDER_DRAW)
+	Triangles			mTriangles;							///< The list of all triangles in this hull (for debug purposes)
+#endif
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+	int					mIteration;							///< Number of iterations we've had so far (for debug purposes)
+	RVec3				mOffset;							///< Offset to use for state drawing
+#endif
+};
+
+// The determinant that is calculated in the Triangle constructor is really sensitive
+// to numerical round off, disable the fmadd instructions to maintain precision.
+JPH_PRECISE_MATH_ON
+
+EPAConvexHullBuilder::Triangle::Triangle(int inIdx0, int inIdx1, int inIdx2, const Vec3 *inPositions)
+{
+	// Fill in indexes
+	JPH_ASSERT(inIdx0 != inIdx1 && inIdx0 != inIdx2 && inIdx1 != inIdx2);
+	mEdge[0].mStartIdx = inIdx0;
+	mEdge[1].mStartIdx = inIdx1;
+	mEdge[2].mStartIdx = inIdx2;
+
+	// Clear links
+	mEdge[0].mNeighbourTriangle = nullptr;
+	mEdge[1].mNeighbourTriangle = nullptr;
+	mEdge[2].mNeighbourTriangle = nullptr;
+
+	// Get vertex positions
+	Vec3 y0 = inPositions[inIdx0];
+	Vec3 y1 = inPositions[inIdx1];
+	Vec3 y2 = inPositions[inIdx2];
+
+	// Calculate centroid
+	mCentroid = (y0 + y1 + y2) / 3.0f;
+
+	// Calculate edges
+	Vec3 y10 = y1 - y0;
+	Vec3 y20 = y2 - y0;
+	Vec3 y21 = y2 - y1;
+
+	// The most accurate normal is calculated by using the two shortest edges
+	// See: https://box2d.org/posts/2014/01/troublesome-triangle/
+	// The difference in normals is most pronounced when one edge is much smaller than the others (in which case the other 2 must have roughly the same length).
+	// Therefore we can suffice by just picking the shortest from 2 edges and use that with the 3rd edge to calculate the normal.
+	// We first check which of the edges is shorter.
+	float y20_dot_y20 = y20.Dot(y20);
+	float y21_dot_y21 = y21.Dot(y21);
+	if (y20_dot_y20 < y21_dot_y21)
+	{
+		// We select the edges y10 and y20
+		mNormal = y10.Cross(y20);
+
+		// Check if triangle is degenerate
+		float normal_len_sq = mNormal.LengthSq();
+		if (normal_len_sq > cMinTriangleArea)
+		{
+			// Determine distance between triangle and origin: distance = (centroid - origin) . normal / |normal|
+			// Note that this way of calculating the closest point is much more accurate than first calculating barycentric coordinates and then calculating the closest
+			// point based on those coordinates. Note that we preserve the sign of the distance to check on which side the origin is.
+			float c_dot_n = mCentroid.Dot(mNormal);
+			mClosestLenSq = abs(c_dot_n) * c_dot_n / normal_len_sq;
+
+			// Calculate closest point to origin using barycentric coordinates:
+			//
+			// v = y0 + l0 * (y1 - y0) + l1 * (y2 - y0)
+			// v . (y1 - y0) = 0
+			// v . (y2 - y0) = 0
+			//
+			// Written in matrix form:
+			//
+			// | y10.y10  y20.y10 | | l0 | = | -y0.y10 |
+			// | y10.y20  y20.y20 | | l1 |   | -y0.y20 |
+			//
+			// (y10 = y1 - y0 etc.)
+			//
+			// Cramers rule to invert matrix:
+			float y10_dot_y10 = y10.LengthSq();
+			float y10_dot_y20 = y10.Dot(y20);
+			float determinant = y10_dot_y10 * y20_dot_y20 - y10_dot_y20 * y10_dot_y20;
+			if (determinant > 0.0f) // If determinant == 0 then the system is linearly dependent and the triangle is degenerate, since y10.10 * y20.y20 > y10.y20^2 it should also be > 0
+			{
+				float y0_dot_y10 = y0.Dot(y10);
+				float y0_dot_y20 = y0.Dot(y20);
+				float l0 = (y10_dot_y20 * y0_dot_y20 - y20_dot_y20 * y0_dot_y10) / determinant;
+				float l1 = (y10_dot_y20 * y0_dot_y10 - y10_dot_y10 * y0_dot_y20) / determinant;
+				mLambda[0] = l0;
+				mLambda[1] = l1;
+				mLambdaRelativeTo0 = true;
+
+				// Check if closest point is interior to the triangle. For a convex hull which contains the origin each face must contain the origin, but because
+				// our faces are triangles, we can have multiple coplanar triangles and only 1 will have the origin as an interior point. We want to use this triangle
+				// to calculate the contact points because it gives the most accurate results, so we will only add these triangles to the priority queue.
+				if (l0 > -cBarycentricEpsilon && l1 > -cBarycentricEpsilon && l0 + l1 < 1.0f + cBarycentricEpsilon)
+					mClosestPointInterior = true;
+			}
+		}
+	}
+	else
+	{
+		// We select the edges y10 and y21
+		mNormal = y10.Cross(y21);
+
+		// Check if triangle is degenerate
+		float normal_len_sq = mNormal.LengthSq();
+		if (normal_len_sq > cMinTriangleArea)
+		{
+			// Again calculate distance between triangle and origin
+			float c_dot_n = mCentroid.Dot(mNormal);
+			mClosestLenSq = abs(c_dot_n) * c_dot_n / normal_len_sq;
+
+			// Calculate closest point to origin using barycentric coordinates but this time using y1 as the reference vertex
+			//
+			// v = y1 + l0 * (y0 - y1) + l1 * (y2 - y1)
+			// v . (y0 - y1) = 0
+			// v . (y2 - y1) = 0
+			//
+			// Written in matrix form:
+			//
+			// | y10.y10  -y21.y10 | | l0 | = | y1.y10 |
+			// | -y10.y21  y21.y21 | | l1 |   | -y1.y21 |
+			//
+			// Cramers rule to invert matrix:
+			float y10_dot_y10 = y10.LengthSq();
+			float y10_dot_y21 = y10.Dot(y21);
+			float determinant = y10_dot_y10 * y21_dot_y21 - y10_dot_y21 * y10_dot_y21;
+			if (determinant > 0.0f)
+			{
+				float y1_dot_y10 = y1.Dot(y10);
+				float y1_dot_y21 = y1.Dot(y21);
+				float l0 = (y21_dot_y21 * y1_dot_y10 - y10_dot_y21 * y1_dot_y21) / determinant;
+				float l1 = (y10_dot_y21 * y1_dot_y10 - y10_dot_y10 * y1_dot_y21) / determinant;
+				mLambda[0] = l0;
+				mLambda[1] = l1;
+				mLambdaRelativeTo0 = false;
+
+				// Again check if the closest point is inside the triangle
+				if (l0 > -cBarycentricEpsilon && l1 > -cBarycentricEpsilon && l0 + l1 < 1.0f + cBarycentricEpsilon)
+					mClosestPointInterior = true;
+			}
+		}
+	}
+}
+
+JPH_PRECISE_MATH_OFF
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/EPAPenetrationDepth.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/EPAPenetrationDepth.h
@@ -0,0 +1,559 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/StaticArray.h>
+#include <Jolt/Core/Profiler.h>
+#include <Jolt/Geometry/GJKClosestPoint.h>
+#include <Jolt/Geometry/EPAConvexHullBuilder.h>
+
+//#define JPH_EPA_PENETRATION_DEPTH_DEBUG
+
+JPH_NAMESPACE_BEGIN
+
+/// Implementation of Expanding Polytope Algorithm as described in:
+///
+/// Proximity Queries and Penetration Depth Computation on 3D Game Objects - Gino van den Bergen
+///
+/// The implementation of this algorithm does not completely follow the article, instead of splitting
+/// triangles at each edge as in fig. 7 in the article, we build a convex hull (removing any triangles that
+/// are facing the new point, thereby avoiding the problem of getting really oblong triangles as mentioned in
+/// the article).
+///
+/// The algorithm roughly works like:
+///
+/// - Start with a simplex of the Minkowski sum (difference) of two objects that was calculated by GJK
+/// - This simplex should contain the origin (or else GJK would have reported: no collision)
+/// - In cases where the simplex consists of 1 - 3 points, find some extra support points (of the Minkowski sum) to get to at least 4 points
+/// - Convert this into a convex hull with non-zero volume (which includes the origin)
+/// - A: Calculate the closest point to the origin for all triangles of the hull and take the closest one
+/// - Calculate a new support point (of the Minkowski sum) in this direction and add this point to the convex hull
+/// - This will remove all faces that are facing the new point and will create new triangles to fill up the hole
+/// - Loop to A until no closer point found
+/// - The closest point indicates the position / direction of least penetration
+class EPAPenetrationDepth
+{
+private:
+	// Typedefs
+	static constexpr int cMaxPoints = EPAConvexHullBuilder::cMaxPoints;
+	static constexpr int cMaxPointsToIncludeOriginInHull = 32;
+	static_assert(cMaxPointsToIncludeOriginInHull < cMaxPoints);
+
+	using Triangle = EPAConvexHullBuilder::Triangle;
+	using Points = EPAConvexHullBuilder::Points;
+
+	/// The GJK algorithm, used to start the EPA algorithm
+	GJKClosestPoint		mGJK;
+
+#ifdef JPH_ENABLE_ASSERTS
+	/// Tolerance as passed to the GJK algorithm, used for asserting.
+	float				mGJKTolerance = 0.0f;
+#endif // JPH_ENABLE_ASSERTS
+
+	/// A list of support points for the EPA algorithm
+	class SupportPoints
+	{
+	public:
+		/// List of support points
+		Points			mY;
+		Vec3			mP[cMaxPoints];
+		Vec3			mQ[cMaxPoints];
+
+		/// Calculate and add new support point to the list of points
+		template <typename A, typename B>
+		Vec3			Add(const A &inA, const B &inB, Vec3Arg inDirection, int &outIndex)
+		{
+			// Get support point of the minkowski sum A - B
+			Vec3 p = inA.GetSupport(inDirection);
+			Vec3 q = inB.GetSupport(-inDirection);
+			Vec3 w = p - q;
+
+			// Store new point
+			outIndex = mY.size();
+			mY.push_back(w);
+			mP[outIndex] = p;
+			mQ[outIndex] = q;
+
+			return w;
+		}
+	};
+
+public:
+	/// Return code for GetPenetrationDepthStepGJK
+	enum class EStatus
+	{
+		NotColliding,		///< Returned if the objects don't collide, in this case outPointA/outPointB are invalid
+		Colliding,			///< Returned if the objects penetrate
+		Indeterminate		///< Returned if the objects penetrate further than the convex radius. In this case you need to call GetPenetrationDepthStepEPA to get the actual penetration depth.
+	};
+
+	/// Calculates penetration depth between two objects, first step of two (the GJK step)
+	///
+	/// @param inAExcludingConvexRadius Object A without convex radius.
+	/// @param inBExcludingConvexRadius Object B without convex radius.
+	/// @param inConvexRadiusA Convex radius for A.
+	/// @param inConvexRadiusB Convex radius for B.
+	/// @param ioV Pass in previously returned value or (1, 0, 0). On return this value is changed to direction to move B out of collision along the shortest path (magnitude is meaningless).
+	/// @param inTolerance Minimal distance before A and B are considered colliding.
+	/// @param outPointA Position on A that has the least amount of penetration.
+	/// @param outPointB Position on B that has the least amount of penetration.
+	/// Use |outPointB - outPointA| to get the distance of penetration.
+	template <typename AE, typename BE>
+	EStatus				GetPenetrationDepthStepGJK(const AE &inAExcludingConvexRadius, float inConvexRadiusA, const BE &inBExcludingConvexRadius, float inConvexRadiusB, float inTolerance, Vec3 &ioV, Vec3 &outPointA, Vec3 &outPointB)
+	{
+		JPH_PROFILE_FUNCTION();
+
+		JPH_IF_ENABLE_ASSERTS(mGJKTolerance = inTolerance;)
+
+		// Don't supply a zero ioV, we only want to get points on the hull of the Minkowsky sum and not internal points.
+		//
+		// Note that if the assert below triggers, it is very likely that you have a MeshShape that contains a degenerate triangle (e.g. a sliver).
+		// Go up a couple of levels in the call stack to see if we're indeed testing a triangle and if it is degenerate.
+		// If this is the case then fix the triangles you supply to the MeshShape.
+		JPH_ASSERT(!ioV.IsNearZero());
+
+		// Get closest points
+		float combined_radius = inConvexRadiusA + inConvexRadiusB;
+		float combined_radius_sq = combined_radius * combined_radius;
+		float closest_points_dist_sq = mGJK.GetClosestPoints(inAExcludingConvexRadius, inBExcludingConvexRadius, inTolerance, combined_radius_sq, ioV, outPointA, outPointB);
+		if (closest_points_dist_sq > combined_radius_sq)
+		{
+			// No collision
+			return EStatus::NotColliding;
+		}
+		if (closest_points_dist_sq > 0.0f)
+		{
+			// Collision within convex radius, adjust points for convex radius
+			float v_len = sqrt(closest_points_dist_sq); // GetClosestPoints function returns |ioV|^2 when return value < FLT_MAX
+			outPointA += ioV * (inConvexRadiusA / v_len);
+			outPointB -= ioV * (inConvexRadiusB / v_len);
+			return EStatus::Colliding;
+		}
+
+		return EStatus::Indeterminate;
+	}
+
+	/// Calculates penetration depth between two objects, second step (the EPA step)
+	///
+	/// @param inAIncludingConvexRadius Object A with convex radius
+	/// @param inBIncludingConvexRadius Object B with convex radius
+	/// @param inTolerance A factor that determines the accuracy of the result. If the change of the squared distance is less than inTolerance * current_penetration_depth^2 the algorithm will terminate. Should be bigger or equal to FLT_EPSILON.
+	/// @param outV Direction to move B out of collision along the shortest path (magnitude is meaningless)
+	/// @param outPointA Position on A that has the least amount of penetration
+	/// @param outPointB Position on B that has the least amount of penetration
+	/// Use |outPointB - outPointA| to get the distance of penetration
+	///
+	/// @return False if the objects don't collide, in this case outPointA/outPointB are invalid.
+	/// True if the objects penetrate
+	template <typename AI, typename BI>
+	bool				GetPenetrationDepthStepEPA(const AI &inAIncludingConvexRadius, const BI &inBIncludingConvexRadius, float inTolerance, Vec3 &outV, Vec3 &outPointA, Vec3 &outPointB)
+	{
+		JPH_PROFILE_FUNCTION();
+
+		// Check that the tolerance makes sense (smaller value than this will just result in needless iterations)
+		JPH_ASSERT(inTolerance >= FLT_EPSILON);
+
+		// Fetch the simplex from GJK algorithm
+		SupportPoints support_points;
+		mGJK.GetClosestPointsSimplex(support_points.mY.data(), support_points.mP, support_points.mQ, support_points.mY.GetSizeRef());
+
+		// Fill up the amount of support points to 4
+		switch (support_points.mY.size())
+		{
+		case 1:
+			{
+				// 1 vertex, which must be at the origin, which is useless for our purpose
+				JPH_ASSERT(support_points.mY[0].IsNearZero(Square(mGJKTolerance)));
+				support_points.mY.pop_back();
+
+				// Add support points in 4 directions to form a tetrahedron around the origin
+				int p1, p2, p3, p4;
+				(void)support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, Vec3(0, 1, 0), p1);
+				(void)support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, Vec3(-1, -1, -1), p2);
+				(void)support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, Vec3(1, -1, -1), p3);
+				(void)support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, Vec3(0, -1, 1), p4);
+				JPH_ASSERT(p1 == 0);
+				JPH_ASSERT(p2 == 1);
+				JPH_ASSERT(p3 == 2);
+				JPH_ASSERT(p4 == 3);
+				break;
+			}
+
+		case 2:
+			{
+				// Two vertices, create 3 extra by taking perpendicular axis and rotating it around in 120 degree increments
+				Vec3 axis = (support_points.mY[1] - support_points.mY[0]).Normalized();
+				Mat44 rotation = Mat44::sRotation(axis, DegreesToRadians(120.0f));
+				Vec3 dir1 = axis.GetNormalizedPerpendicular();
+				Vec3 dir2 = rotation * dir1;
+				Vec3 dir3 = rotation * dir2;
+				int p1, p2, p3;
+				(void)support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, dir1, p1);
+				(void)support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, dir2, p2);
+				(void)support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, dir3, p3);
+				JPH_ASSERT(p1 == 2);
+				JPH_ASSERT(p2 == 3);
+				JPH_ASSERT(p3 == 4);
+				break;
+			}
+
+		case 3:
+		case 4:
+			// We already have enough points
+			break;
+		}
+
+		// Create hull out of the initial points
+		JPH_ASSERT(support_points.mY.size() >= 3);
+		EPAConvexHullBuilder hull(support_points.mY);
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		hull.DrawLabel("Build initial hull");
+#endif
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+		Trace("Init: num_points = %u", (uint)support_points.mY.size());
+#endif
+		hull.Initialize(0, 1, 2);
+		for (typename Points::size_type i = 3; i < support_points.mY.size(); ++i)
+		{
+			float dist_sq;
+			Triangle *t = hull.FindFacingTriangle(support_points.mY[i], dist_sq);
+			if (t != nullptr)
+			{
+				EPAConvexHullBuilder::NewTriangles new_triangles;
+				if (!hull.AddPoint(t, i, FLT_MAX, new_triangles))
+				{
+					// We can't recover from a failure to add a point to the hull because the old triangles have been unlinked already.
+					// Assume no collision. This can happen if the shapes touch in 1 point (or plane) in which case the hull is degenerate.
+					return false;
+				}
+			}
+		}
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		hull.DrawLabel("Complete hull");
+
+		// Generate the hull of the Minkowski difference for visualization
+		MinkowskiDifference diff(inAIncludingConvexRadius, inBIncludingConvexRadius);
+		DebugRenderer::GeometryRef geometry = DebugRenderer::sInstance->CreateTriangleGeometryForConvex([&diff](Vec3Arg inDirection) { return diff.GetSupport(inDirection); });
+		hull.DrawGeometry(geometry, Color::sYellow);
+
+		hull.DrawLabel("Ensure origin in hull");
+#endif
+
+		// Loop until we are sure that the origin is inside the hull
+		for (;;)
+		{
+			// Get the next closest triangle
+			Triangle *t = hull.PeekClosestTriangleInQueue();
+
+			// Don't process removed triangles, just free them (because they're in a heap we don't remove them earlier since we would have to rebuild the sorted heap)
+			if (t->mRemoved)
+			{
+				hull.PopClosestTriangleFromQueue();
+
+				// If we run out of triangles, we couldn't include the origin in the hull so there must be very little penetration and we report no collision.
+				if (!hull.HasNextTriangle())
+					return false;
+
+				hull.FreeTriangle(t);
+				continue;
+			}
+
+			// If the closest to the triangle is zero or positive, the origin is in the hull and we can proceed to the main algorithm
+			if (t->mClosestLenSq >= 0.0f)
+				break;
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+			hull.DrawLabel("Next iteration");
+#endif
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+			Trace("EncapsulateOrigin: verts = (%d, %d, %d), closest_dist_sq = %g, centroid = (%g, %g, %g), normal = (%g, %g, %g)",
+				t->mEdge[0].mStartIdx, t->mEdge[1].mStartIdx, t->mEdge[2].mStartIdx,
+				t->mClosestLenSq,
+				t->mCentroid.GetX(), t->mCentroid.GetY(), t->mCentroid.GetZ(),
+				t->mNormal.GetX(), t->mNormal.GetY(), t->mNormal.GetZ());
+#endif
+
+			// Remove the triangle from the queue before we start adding new ones (which may result in a new closest triangle at the front of the queue)
+			hull.PopClosestTriangleFromQueue();
+
+			// Add a support point to get the origin inside the hull
+			int new_index;
+			Vec3 w = support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, t->mNormal, new_index);
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+			// Draw the point that we're adding
+			hull.DrawMarker(w, Color::sRed, 1.0f);
+			hull.DrawWireTriangle(*t, Color::sRed);
+			hull.DrawState();
+#endif
+
+			// Add the point to the hull, if we fail we terminate and report no collision
+			EPAConvexHullBuilder::NewTriangles new_triangles;
+			if (!t->IsFacing(w) || !hull.AddPoint(t, new_index, FLT_MAX, new_triangles))
+				return false;
+
+			// The triangle is facing the support point "w" and can now be safely removed
+			JPH_ASSERT(t->mRemoved);
+			hull.FreeTriangle(t);
+
+			// If we run out of triangles or points, we couldn't include the origin in the hull so there must be very little penetration and we report no collision.
+			if (!hull.HasNextTriangle() || support_points.mY.size() >= cMaxPointsToIncludeOriginInHull)
+				return false;
+		}
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		hull.DrawLabel("Main algorithm");
+#endif
+
+		// Current closest distance to origin
+		float closest_dist_sq = FLT_MAX;
+
+		// Remember last good triangle
+		Triangle *last = nullptr;
+
+		// If we want to flip the penetration depth
+		bool flip_v_sign = false;
+
+		// Loop until closest point found
+		do
+		{
+			// Get closest triangle to the origin
+			Triangle *t = hull.PopClosestTriangleFromQueue();
+
+			// Don't process removed triangles, just free them (because they're in a heap we don't remove them earlier since we would have to rebuild the sorted heap)
+			if (t->mRemoved)
+			{
+				hull.FreeTriangle(t);
+				continue;
+			}
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+			hull.DrawLabel("Next iteration");
+#endif
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+			Trace("FindClosest: verts = (%d, %d, %d), closest_len_sq = %g, centroid = (%g, %g, %g), normal = (%g, %g, %g)",
+				t->mEdge[0].mStartIdx, t->mEdge[1].mStartIdx, t->mEdge[2].mStartIdx,
+				t->mClosestLenSq,
+				t->mCentroid.GetX(), t->mCentroid.GetY(), t->mCentroid.GetZ(),
+				t->mNormal.GetX(), t->mNormal.GetY(), t->mNormal.GetZ());
+#endif
+			// Check if next triangle is further away than closest point, we've found the closest point
+			if (t->mClosestLenSq >= closest_dist_sq)
+				break;
+
+			// Replace last good with this triangle
+			if (last != nullptr)
+				hull.FreeTriangle(last);
+			last = t;
+
+			// Add support point in direction of normal of the plane
+			// Note that the article uses the closest point between the origin and plane, but this always has the exact same direction as the normal (if the origin is behind the plane)
+			// and this way we do less calculations and lose less precision
+			int new_index;
+			Vec3 w = support_points.Add(inAIncludingConvexRadius, inBIncludingConvexRadius, t->mNormal, new_index);
+
+			// Project w onto the triangle normal
+			float dot = t->mNormal.Dot(w);
+
+			// Check if we just found a separating axis. This can happen if the shape shrunk by convex radius and then expanded by
+			// convex radius is bigger then the original shape due to inaccuracies in the shrinking process.
+			if (dot < 0.0f)
+				return false;
+
+			// Get the distance squared (along normal) to the support point
+			float dist_sq = Square(dot) / t->mNormal.LengthSq();
+
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+			Trace("FindClosest: w = (%g, %g, %g), dot = %g, dist_sq = %g",
+				w.GetX(), w.GetY(), w.GetZ(),
+				dot, dist_sq);
+#endif
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+			// Draw the point that we're adding
+			hull.DrawMarker(w, Color::sPurple, 1.0f);
+			hull.DrawWireTriangle(*t, Color::sPurple);
+			hull.DrawState();
+#endif
+
+			// If the error became small enough, we've converged
+			if (dist_sq - t->mClosestLenSq < t->mClosestLenSq * inTolerance)
+			{
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+				Trace("Converged");
+#endif // JPH_EPA_PENETRATION_DEPTH_DEBUG
+				break;
+			}
+
+			// Keep track of the minimum distance
+			closest_dist_sq = min(closest_dist_sq, dist_sq);
+
+			// If the triangle thinks this point is not front facing, we've reached numerical precision and we're done
+			if (!t->IsFacing(w))
+			{
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+				Trace("Not facing triangle");
+#endif // JPH_EPA_PENETRATION_DEPTH_DEBUG
+				break;
+			}
+
+			// Add point to hull
+			EPAConvexHullBuilder::NewTriangles new_triangles;
+			if (!hull.AddPoint(t, new_index, closest_dist_sq, new_triangles))
+			{
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+				Trace("Could not add point");
+#endif // JPH_EPA_PENETRATION_DEPTH_DEBUG
+				break;
+			}
+
+			// If the hull is starting to form defects then we're reaching numerical precision and we have to stop
+			bool has_defect = false;
+			for (const Triangle *nt : new_triangles)
+				if (nt->IsFacingOrigin())
+				{
+					has_defect = true;
+					break;
+				}
+			if (has_defect)
+			{
+#ifdef JPH_EPA_PENETRATION_DEPTH_DEBUG
+				Trace("Has defect");
+#endif // JPH_EPA_PENETRATION_DEPTH_DEBUG
+				// When the hull has defects it is possible that the origin has been classified on the wrong side of the triangle
+				// so we do an additional check to see if the penetration in the -triangle normal direction is smaller than
+				// the penetration in the triangle normal direction. If so we must flip the sign of the penetration depth.
+				Vec3 w2 = inAIncludingConvexRadius.GetSupport(-t->mNormal) - inBIncludingConvexRadius.GetSupport(t->mNormal);
+				float dot2 = -t->mNormal.Dot(w2);
+				if (dot2 < dot)
+					flip_v_sign = true;
+				break;
+			}
+		}
+		while (hull.HasNextTriangle() && support_points.mY.size() < cMaxPoints);
+
+		// Determine closest points, if last == null it means the hull was a plane so there's no penetration
+		if (last == nullptr)
+			return false;
+
+#ifdef JPH_EPA_CONVEX_BUILDER_DRAW
+		hull.DrawLabel("Closest found");
+		hull.DrawWireTriangle(*last, Color::sWhite);
+		hull.DrawArrow(last->mCentroid, last->mCentroid + last->mNormal.NormalizedOr(Vec3::sZero()), Color::sWhite, 0.1f);
+		hull.DrawState();
+#endif
+
+		// Calculate penetration by getting the vector from the origin to the closest point on the triangle:
+		// distance = (centroid - origin) . normal / |normal|, closest = origin + distance * normal / |normal|
+		outV = (last->mCentroid.Dot(last->mNormal) / last->mNormal.LengthSq()) * last->mNormal;
+
+		// If penetration is near zero, treat this as a non collision since we cannot find a good normal
+		if (outV.IsNearZero())
+			return false;
+
+		// Check if we have to flip the sign of the penetration depth
+		if (flip_v_sign)
+			outV = -outV;
+
+		// Use the barycentric coordinates for the closest point to the origin to find the contact points on A and B
+		Vec3 p0 = support_points.mP[last->mEdge[0].mStartIdx];
+		Vec3 p1 = support_points.mP[last->mEdge[1].mStartIdx];
+		Vec3 p2 = support_points.mP[last->mEdge[2].mStartIdx];
+
+		Vec3 q0 = support_points.mQ[last->mEdge[0].mStartIdx];
+		Vec3 q1 = support_points.mQ[last->mEdge[1].mStartIdx];
+		Vec3 q2 = support_points.mQ[last->mEdge[2].mStartIdx];
+
+		if (last->mLambdaRelativeTo0)
+		{
+			// y0 was the reference vertex
+			outPointA = p0 + last->mLambda[0] * (p1 - p0) + last->mLambda[1] * (p2 - p0);
+			outPointB = q0 + last->mLambda[0] * (q1 - q0) + last->mLambda[1] * (q2 - q0);
+		}
+		else
+		{
+			// y1 was the reference vertex
+			outPointA = p1 + last->mLambda[0] * (p0 - p1) + last->mLambda[1] * (p2 - p1);
+			outPointB = q1 + last->mLambda[0] * (q0 - q1) + last->mLambda[1] * (q2 - q1);
+		}
+
+		return true;
+	}
+
+	/// This function combines the GJK and EPA steps and is provided as a convenience function.
+	/// Note: less performant since you're providing all support functions in one go
+	/// Note 2: You need to initialize ioV, see documentation at GetPenetrationDepthStepGJK!
+	template <typename AE, typename AI, typename BE, typename BI>
+	bool				GetPenetrationDepth(const AE &inAExcludingConvexRadius, const AI &inAIncludingConvexRadius, float inConvexRadiusA, const BE &inBExcludingConvexRadius, const BI &inBIncludingConvexRadius, float inConvexRadiusB, float inCollisionToleranceSq, float inPenetrationTolerance, Vec3 &ioV, Vec3 &outPointA, Vec3 &outPointB)
+	{
+		// Check result of collision detection
+		switch (GetPenetrationDepthStepGJK(inAExcludingConvexRadius, inConvexRadiusA, inBExcludingConvexRadius, inConvexRadiusB, inCollisionToleranceSq, ioV, outPointA, outPointB))
+		{
+		case EPAPenetrationDepth::EStatus::Colliding:
+			return true;
+
+		case EPAPenetrationDepth::EStatus::NotColliding:
+			return false;
+
+		case EPAPenetrationDepth::EStatus::Indeterminate:
+			return GetPenetrationDepthStepEPA(inAIncludingConvexRadius, inBIncludingConvexRadius, inPenetrationTolerance, ioV, outPointA, outPointB);
+		}
+
+		JPH_ASSERT(false);
+		return false;
+	}
+
+	/// Test if a cast shape inA moving from inStart to lambda * inStart.GetTranslation() + inDirection where lambda e [0, ioLambda> intersects inB
+	///
+	/// @param inStart Start position and orientation of the convex object
+	/// @param inDirection Direction of the sweep (ioLambda * inDirection determines length)
+	///	@param inCollisionTolerance The minimal distance between A and B before they are considered colliding
+	/// @param inPenetrationTolerance A factor that determines the accuracy of the result. If the change of the squared distance is less than inTolerance * current_penetration_depth^2 the algorithm will terminate. Should be bigger or equal to FLT_EPSILON.
+	/// @param inA The convex object A, must support the GetSupport(Vec3) function.
+	/// @param inB The convex object B, must support the GetSupport(Vec3) function.
+	/// @param inConvexRadiusA The convex radius of A, this will be added on all sides to pad A.
+	/// @param inConvexRadiusB The convex radius of B, this will be added on all sides to pad B.
+	/// @param inReturnDeepestPoint If the shapes are initially intersecting this determines if the EPA algorithm will run to find the deepest point
+	/// @param ioLambda The max fraction along the sweep, on output updated with the actual collision fraction.
+	///	@param outPointA is the contact point on A
+	///	@param outPointB is the contact point on B
+	/// @param outContactNormal is either the contact normal when the objects are touching or the penetration axis when the objects are penetrating at the start of the sweep (pointing from A to B, length will not be 1)
+	///
+	/// @return true if the a hit was found, in which case ioLambda, outPointA, outPointB and outSurfaceNormal are updated.
+	template <typename A, typename B>
+	bool				CastShape(Mat44Arg inStart, Vec3Arg inDirection, float inCollisionTolerance, float inPenetrationTolerance, const A &inA, const B &inB, float inConvexRadiusA, float inConvexRadiusB, bool inReturnDeepestPoint, float &ioLambda, Vec3 &outPointA, Vec3 &outPointB, Vec3 &outContactNormal)
+	{
+		JPH_IF_ENABLE_ASSERTS(mGJKTolerance = inCollisionTolerance;)
+
+		// First determine if there's a collision at all
+		if (!mGJK.CastShape(inStart, inDirection, inCollisionTolerance, inA, inB, inConvexRadiusA, inConvexRadiusB, ioLambda, outPointA, outPointB, outContactNormal))
+			return false;
+
+		// When our contact normal is too small, we don't have an accurate result
+		bool contact_normal_invalid = outContactNormal.IsNearZero(Square(inCollisionTolerance));
+
+		if (inReturnDeepestPoint
+			&& ioLambda == 0.0f // Only when lambda = 0 we can have the bodies overlap
+			&& (inConvexRadiusA + inConvexRadiusB == 0.0f // When no convex radius was provided we can never trust contact points at lambda = 0
+				|| contact_normal_invalid))
+		{
+			// If we're initially intersecting, we need to run the EPA algorithm in order to find the deepest contact point
+			AddConvexRadius add_convex_a(inA, inConvexRadiusA);
+			AddConvexRadius add_convex_b(inB, inConvexRadiusB);
+			TransformedConvexObject transformed_a(inStart, add_convex_a);
+			if (!GetPenetrationDepthStepEPA(transformed_a, add_convex_b, inPenetrationTolerance, outContactNormal, outPointA, outPointB))
+				return false;
+		}
+		else if (contact_normal_invalid)
+		{
+			// If we weren't able to calculate a contact normal, use the cast direction instead
+			outContactNormal = inDirection;
+		}
+
+		return true;
+	}
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/Ellipse.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/Ellipse.h
@@ -0,0 +1,77 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Math/Float2.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Ellipse centered around the origin
+/// @see https://en.wikipedia.org/wiki/Ellipse
+class Ellipse
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Construct ellipse with radius A along the X-axis and B along the Y-axis
+					Ellipse(float inA, float inB) : mA(inA), mB(inB) { JPH_ASSERT(inA > 0.0f); JPH_ASSERT(inB > 0.0f); }
+
+	/// Check if inPoint is inside the ellipse
+	bool			IsInside(const Float2 &inPoint) const
+	{
+		return Square(inPoint.x / mA) + Square(inPoint.y / mB) <= 1.0f;
+	}
+
+	/// Get the closest point on the ellipse to inPoint
+	/// Assumes inPoint is outside the ellipse
+	/// @see Rotation Joint Limits in Quaternion Space by Gino van den Bergen, section 10.1 in Game Engine Gems 3.
+	Float2			GetClosestPoint(const Float2 &inPoint) const
+	{
+		float a_sq = Square(mA);
+		float b_sq = Square(mB);
+
+		// Equation of ellipse: f(x, y) = (x/a)^2 + (y/b)^2 - 1 = 0											[1]
+		// Normal on surface: (df/dx, df/dy) = (2 x / a^2, 2 y / b^2)
+		// Closest point (x', y') on ellipse to point (x, y): (x', y') + t (x / a^2, y / b^2) = (x, y)
+		// <=> (x', y') = (a^2 x / (t + a^2), b^2 y / (t + b^2))
+		// Requiring point to be on ellipse (substituting into [1]): g(t) = (a x / (t + a^2))^2 + (b y / (t + b^2))^2 - 1 = 0
+
+		// Newton Raphson iteration, starting at t = 0
+		float t = 0.0f;
+		for (;;)
+		{
+			// Calculate g(t)
+			float t_plus_a_sq = t + a_sq;
+			float t_plus_b_sq = t + b_sq;
+			float gt = Square(mA * inPoint.x / t_plus_a_sq) + Square(mB * inPoint.y / t_plus_b_sq) - 1.0f;
+
+			// Check if g(t) it is close enough to zero
+			if (abs(gt) < 1.0e-6f)
+				return Float2(a_sq * inPoint.x / t_plus_a_sq, b_sq * inPoint.y / t_plus_b_sq);
+
+			// Get derivative dg/dt = g'(t) = -2 (b^2 y^2 / (t + b^2)^3 + a^2 x^2 / (t + a^2)^3)
+			float gt_accent = -2.0f *
+				(a_sq * Square(inPoint.x) / Cubed(t_plus_a_sq)
+				+ b_sq * Square(inPoint.y) / Cubed(t_plus_b_sq));
+
+			// Calculate t for next iteration: tn+1 = tn - g(t) / g'(t)
+			float tn = t - gt / gt_accent;
+			t = tn;
+		}
+	}
+
+	/// Get normal at point inPoint (non-normalized vector)
+	Float2			GetNormal(const Float2 &inPoint) const
+	{
+		// Calculated by [d/dx f(x, y), d/dy f(x, y)], where f(x, y) is the ellipse equation from above
+		return Float2(inPoint.x / Square(mA), inPoint.y / Square(mB));
+	}
+
+private:
+	float			mA;				///< Radius along X-axis
+	float			mB;				///< Radius along Y-axis
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/GJKClosestPoint.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/GJKClosestPoint.h
@@ -0,0 +1,945 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/NonCopyable.h>
+#include <Jolt/Geometry/ClosestPoint.h>
+#include <Jolt/Geometry/ConvexSupport.h>
+
+//#define JPH_GJK_DEBUG
+#ifdef JPH_GJK_DEBUG
+	#include <Jolt/Core/StringTools.h>
+	#include <Jolt/Renderer/DebugRenderer.h>
+#endif
+
+JPH_NAMESPACE_BEGIN
+
+/// Convex vs convex collision detection
+/// Based on: A Fast and Robust GJK Implementation for Collision Detection of Convex Objects - Gino van den Bergen
+class GJKClosestPoint : public NonCopyable
+{
+private:
+	/// Get new closest point to origin given simplex mY of mNumPoints points
+	///
+	/// @param inPrevVLenSq Length of |outV|^2 from the previous iteration, used as a maximum value when selecting a new closest point.
+	/// @param outV Closest point
+	/// @param outVLenSq |outV|^2
+	/// @param outSet Set of points that form the new simplex closest to the origin (bit 1 = mY[0], bit 2 = mY[1], ...)
+	///
+	/// If LastPointPartOfClosestFeature is true then the last point added will be assumed to be part of the closest feature and the function will do less work.
+	///
+	/// @return True if new closest point was found.
+	/// False if the function failed, in this case the output variables are not modified
+	template <bool LastPointPartOfClosestFeature>
+	bool		GetClosest(float inPrevVLenSq, Vec3 &outV, float &outVLenSq, uint32 &outSet) const
+	{
+#ifdef JPH_GJK_DEBUG
+		for (int i = 0; i < mNumPoints; ++i)
+			Trace("y[%d] = [%s], |y[%d]| = %g", i, ConvertToString(mY[i]).c_str(), i, (double)mY[i].Length());
+#endif
+
+		uint32 set;
+		Vec3 v;
+
+		switch (mNumPoints)
+		{
+		case 1:
+			// Single point
+			set = 0b0001;
+			v = mY[0];
+			break;
+
+		case 2:
+			// Line segment
+			v = ClosestPoint::GetClosestPointOnLine(mY[0], mY[1], set);
+			break;
+
+		case 3:
+			// Triangle
+			v = ClosestPoint::GetClosestPointOnTriangle<LastPointPartOfClosestFeature>(mY[0], mY[1], mY[2], set);
+			break;
+
+		case 4:
+			// Tetrahedron
+			v = ClosestPoint::GetClosestPointOnTetrahedron<LastPointPartOfClosestFeature>(mY[0], mY[1], mY[2], mY[3], set);
+			break;
+
+		default:
+			JPH_ASSERT(false);
+			return false;
+		}
+
+#ifdef JPH_GJK_DEBUG
+		Trace("GetClosest: set = 0b%s, v = [%s], |v| = %g", NibbleToBinary(set), ConvertToString(v).c_str(), (double)v.Length());
+#endif
+
+		float v_len_sq = v.LengthSq();
+		if (v_len_sq < inPrevVLenSq) // Note, comparison order important: If v_len_sq is NaN then this expression will be false so we will return false
+		{
+			// Return closest point
+			outV = v;
+			outVLenSq = v_len_sq;
+			outSet = set;
+			return true;
+		}
+
+		// No better match found
+#ifdef JPH_GJK_DEBUG
+		Trace("New closer point is further away, failed to converge");
+#endif
+		return false;
+	}
+
+	// Get max(|Y_0|^2 .. |Y_n|^2)
+	float		GetMaxYLengthSq() const
+	{
+		float y_len_sq = mY[0].LengthSq();
+		for (int i = 1; i < mNumPoints; ++i)
+			y_len_sq = max(y_len_sq, mY[i].LengthSq());
+		return y_len_sq;
+	}
+
+	// Remove points that are not in the set, only updates mY
+	void		UpdatePointSetY(uint32 inSet)
+	{
+		int num_points = 0;
+		for (int i = 0; i < mNumPoints; ++i)
+			if ((inSet & (1 << i)) != 0)
+			{
+				mY[num_points] = mY[i];
+				++num_points;
+			}
+		mNumPoints = num_points;
+	}
+
+	// Remove points that are not in the set, only updates mP
+	void		UpdatePointSetP(uint32 inSet)
+	{
+		int num_points = 0;
+		for (int i = 0; i < mNumPoints; ++i)
+			if ((inSet & (1 << i)) != 0)
+			{
+				mP[num_points] = mP[i];
+				++num_points;
+			}
+		mNumPoints = num_points;
+	}
+
+	// Remove points that are not in the set, only updates mP and mQ
+	void		UpdatePointSetPQ(uint32 inSet)
+	{
+		int num_points = 0;
+		for (int i = 0; i < mNumPoints; ++i)
+			if ((inSet & (1 << i)) != 0)
+			{
+				mP[num_points] = mP[i];
+				mQ[num_points] = mQ[i];
+				++num_points;
+			}
+		mNumPoints = num_points;
+	}
+
+	// Remove points that are not in the set, updates mY, mP and mQ
+	void		UpdatePointSetYPQ(uint32 inSet)
+	{
+		int num_points = 0;
+		for (int i = 0; i < mNumPoints; ++i)
+			if ((inSet & (1 << i)) != 0)
+			{
+				mY[num_points] = mY[i];
+				mP[num_points] = mP[i];
+				mQ[num_points] = mQ[i];
+				++num_points;
+			}
+		mNumPoints = num_points;
+	}
+
+	// Calculate closest points on A and B
+	void		CalculatePointAAndB(Vec3 &outPointA, Vec3 &outPointB) const
+	{
+		switch (mNumPoints)
+		{
+		case 1:
+			outPointA = mP[0];
+			outPointB = mQ[0];
+			break;
+
+		case 2:
+			{
+				float u, v;
+				ClosestPoint::GetBaryCentricCoordinates(mY[0], mY[1], u, v);
+				outPointA = u * mP[0] + v * mP[1];
+				outPointB = u * mQ[0] + v * mQ[1];
+			}
+			break;
+
+		case 3:
+			{
+				float u, v, w;
+				ClosestPoint::GetBaryCentricCoordinates(mY[0], mY[1], mY[2], u, v, w);
+				outPointA = u * mP[0] + v * mP[1] + w * mP[2];
+				outPointB = u * mQ[0] + v * mQ[1] + w * mQ[2];
+			}
+			break;
+
+		case 4:
+		#ifdef JPH_DEBUG
+			memset(&outPointA, 0xcd, sizeof(outPointA));
+			memset(&outPointB, 0xcd, sizeof(outPointB));
+		#endif
+			break;
+		}
+	}
+
+public:
+	/// Test if inA and inB intersect
+	///
+	/// @param inA The convex object A, must support the GetSupport(Vec3) function.
+	/// @param inB The convex object B, must support the GetSupport(Vec3) function.
+	///	@param inTolerance Minimal distance between objects when the objects are considered to be colliding
+	///	@param ioV is used as initial separating axis (provide a zero vector if you don't know yet)
+	///
+	///	@return True if they intersect (in which case ioV = (0, 0, 0)).
+	///	False if they don't intersect in which case ioV is a separating axis in the direction from A to B (magnitude is meaningless)
+	template <typename A, typename B>
+	bool		Intersects(const A &inA, const B &inB, float inTolerance, Vec3 &ioV)
+	{
+		float tolerance_sq = Square(inTolerance);
+
+		// Reset state
+		mNumPoints = 0;
+
+#ifdef JPH_GJK_DEBUG
+		for (int i = 0; i < 4; ++i)
+			mY[i] = Vec3::sZero();
+#endif
+
+		// Previous length^2 of v
+		float prev_v_len_sq = FLT_MAX;
+
+		for (;;)
+		{
+#ifdef JPH_GJK_DEBUG
+			Trace("v = [%s], num_points = %d", ConvertToString(ioV).c_str(), mNumPoints);
+#endif
+
+			// Get support points for shape A and B in the direction of v
+			Vec3 p = inA.GetSupport(ioV);
+			Vec3 q = inB.GetSupport(-ioV);
+
+			// Get support point of the minkowski sum A - B of v
+			Vec3 w = p - q;
+
+			// If the support point sA-B(v) is in the opposite direction as v, then we have found a separating axis and there is no intersection
+			if (ioV.Dot(w) < 0.0f)
+			{
+				// Separating axis found
+#ifdef JPH_GJK_DEBUG
+				Trace("Separating axis");
+#endif
+				return false;
+			}
+
+			// Store the point for later use
+			mY[mNumPoints] = w;
+			++mNumPoints;
+
+#ifdef JPH_GJK_DEBUG
+			Trace("w = [%s]", ConvertToString(w).c_str());
+#endif
+
+			// Determine the new closest point
+			float v_len_sq;			// Length^2 of v
+			uint32 set;				// Set of points that form the new simplex
+			if (!GetClosest<true>(prev_v_len_sq, ioV, v_len_sq, set))
+				return false;
+
+			// If there are 4 points, the origin is inside the tetrahedron and we're done
+			if (set == 0xf)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Full simplex");
+#endif
+				ioV = Vec3::sZero();
+				return true;
+			}
+
+			// If v is very close to zero, we consider this a collision
+			if (v_len_sq <= tolerance_sq)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Distance zero");
+#endif
+				ioV = Vec3::sZero();
+				return true;
+			}
+
+			// If v is very small compared to the length of y, we also consider this a collision
+			if (v_len_sq <= FLT_EPSILON * GetMaxYLengthSq())
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Machine precision reached");
+#endif
+				ioV = Vec3::sZero();
+				return true;
+			}
+
+			// The next separation axis to test is the negative of the closest point of the Minkowski sum to the origin
+			// Note: This must be done before terminating as converged since the separating axis is -v
+			ioV = -ioV;
+
+			// If the squared length of v is not changing enough, we've converged and there is no collision
+			JPH_ASSERT(prev_v_len_sq >= v_len_sq);
+			if (prev_v_len_sq - v_len_sq <= FLT_EPSILON * prev_v_len_sq)
+			{
+				// v is a separating axis
+#ifdef JPH_GJK_DEBUG
+				Trace("Converged");
+#endif
+				return false;
+			}
+			prev_v_len_sq = v_len_sq;
+
+			// Update the points of the simplex
+			UpdatePointSetY(set);
+		}
+	}
+
+	/// Get closest points between inA and inB
+	///
+	/// @param inA The convex object A, must support the GetSupport(Vec3) function.
+	/// @param inB The convex object B, must support the GetSupport(Vec3) function.
+	///	@param inTolerance The minimal distance between A and B before the objects are considered colliding and processing is terminated.
+	///	@param inMaxDistSq The maximum squared distance between A and B before the objects are considered infinitely far away and processing is terminated.
+	///	@param ioV Initial guess for the separating axis. Start with any non-zero vector if you don't know.
+	///		If return value is 0, ioV = (0, 0, 0).
+	///		If the return value is bigger than 0 but smaller than FLT_MAX, ioV will be the separating axis in the direction from A to B and its length the squared distance between A and B.
+	///		If the return value is FLT_MAX, ioV will be the separating axis in the direction from A to B and the magnitude of the vector is meaningless.
+	///	@param outPointA , outPointB
+	///		If the return value is 0 the points are invalid.
+	///		If the return value is bigger than 0 but smaller than FLT_MAX these will contain the closest point on A and B.
+	///		If the return value is FLT_MAX the points are invalid.
+	///
+	///	@return The squared distance between A and B or FLT_MAX when they are further away than inMaxDistSq.
+	template <typename A, typename B>
+	float		GetClosestPoints(const A &inA, const B &inB, float inTolerance, float inMaxDistSq, Vec3 &ioV, Vec3 &outPointA, Vec3 &outPointB)
+	{
+		float tolerance_sq = Square(inTolerance);
+
+		// Reset state
+		mNumPoints = 0;
+
+#ifdef JPH_GJK_DEBUG
+		// Generate the hull of the Minkowski difference for visualization
+		MinkowskiDifference diff(inA, inB);
+		mGeometry = DebugRenderer::sInstance->CreateTriangleGeometryForConvex([&diff](Vec3Arg inDirection) { return diff.GetSupport(inDirection); });
+
+		for (int i = 0; i < 4; ++i)
+		{
+			mY[i] = Vec3::sZero();
+			mP[i] = Vec3::sZero();
+			mQ[i] = Vec3::sZero();
+		}
+#endif
+
+		// Length^2 of v
+		float v_len_sq = ioV.LengthSq();
+
+		// Previous length^2 of v
+		float prev_v_len_sq = FLT_MAX;
+
+		for (;;)
+		{
+#ifdef JPH_GJK_DEBUG
+			Trace("v = [%s], num_points = %d", ConvertToString(ioV).c_str(), mNumPoints);
+#endif
+
+			// Get support points for shape A and B in the direction of v
+			Vec3 p = inA.GetSupport(ioV);
+			Vec3 q = inB.GetSupport(-ioV);
+
+			// Get support point of the minkowski sum A - B of v
+			Vec3 w = p - q;
+
+			float dot = ioV.Dot(w);
+
+#ifdef JPH_GJK_DEBUG
+			// Draw -ioV to show the closest point to the origin from the previous simplex
+			DebugRenderer::sInstance->DrawArrow(mOffset, mOffset - ioV, Color::sOrange, 0.05f);
+
+			// Draw ioV to show where we're probing next
+			DebugRenderer::sInstance->DrawArrow(mOffset, mOffset + ioV, Color::sCyan, 0.05f);
+
+			// Draw w, the support point
+			DebugRenderer::sInstance->DrawArrow(mOffset, mOffset + w, Color::sGreen, 0.05f);
+			DebugRenderer::sInstance->DrawMarker(mOffset + w, Color::sGreen, 1.0f);
+
+			// Draw the simplex and the Minkowski difference around it
+			DrawState();
+#endif
+
+			// Test if we have a separation of more than inMaxDistSq, in which case we terminate early
+			if (dot < 0.0f && dot * dot > v_len_sq * inMaxDistSq)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Distance bigger than max");
+#endif
+#ifdef JPH_DEBUG
+				memset(&outPointA, 0xcd, sizeof(outPointA));
+				memset(&outPointB, 0xcd, sizeof(outPointB));
+#endif
+				return FLT_MAX;
+			}
+
+			// Store the point for later use
+			mY[mNumPoints] = w;
+			mP[mNumPoints] = p;
+			mQ[mNumPoints] = q;
+			++mNumPoints;
+
+#ifdef JPH_GJK_DEBUG
+			Trace("w = [%s]", ConvertToString(w).c_str());
+#endif
+
+			uint32 set;
+			if (!GetClosest<true>(prev_v_len_sq, ioV, v_len_sq, set))
+			{
+				--mNumPoints; // Undo add last point
+				break;
+			}
+
+			// If there are 4 points, the origin is inside the tetrahedron and we're done
+			if (set == 0xf)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Full simplex");
+#endif
+				ioV = Vec3::sZero();
+				v_len_sq = 0.0f;
+				break;
+			}
+
+			// Update the points of the simplex
+			UpdatePointSetYPQ(set);
+
+			// If v is very close to zero, we consider this a collision
+			if (v_len_sq <= tolerance_sq)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Distance zero");
+#endif
+				ioV = Vec3::sZero();
+				v_len_sq = 0.0f;
+				break;
+			}
+
+			// If v is very small compared to the length of y, we also consider this a collision
+#ifdef JPH_GJK_DEBUG
+			Trace("Check v small compared to y: %g <= %g", (double)v_len_sq, (double)(FLT_EPSILON * GetMaxYLengthSq()));
+#endif
+			if (v_len_sq <= FLT_EPSILON * GetMaxYLengthSq())
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Machine precision reached");
+#endif
+				ioV = Vec3::sZero();
+				v_len_sq = 0.0f;
+				break;
+			}
+
+			// The next separation axis to test is the negative of the closest point of the Minkowski sum to the origin
+			// Note: This must be done before terminating as converged since the separating axis is -v
+			ioV = -ioV;
+
+			// If the squared length of v is not changing enough, we've converged and there is no collision
+#ifdef JPH_GJK_DEBUG
+			Trace("Check v not changing enough: %g <= %g", (double)(prev_v_len_sq - v_len_sq), (double)(FLT_EPSILON * prev_v_len_sq));
+#endif
+			JPH_ASSERT(prev_v_len_sq >= v_len_sq);
+			if (prev_v_len_sq - v_len_sq <= FLT_EPSILON * prev_v_len_sq)
+			{
+				// v is a separating axis
+#ifdef JPH_GJK_DEBUG
+				Trace("Converged");
+#endif
+				break;
+			}
+			prev_v_len_sq = v_len_sq;
+		}
+
+		// Get the closest points
+		CalculatePointAAndB(outPointA, outPointB);
+
+#ifdef JPH_GJK_DEBUG
+		Trace("Return: v = [%s], |v| = %g", ConvertToString(ioV).c_str(), (double)ioV.Length());
+
+		// Draw -ioV to show the closest point to the origin from the previous simplex
+		DebugRenderer::sInstance->DrawArrow(mOffset, mOffset - ioV, Color::sOrange, 0.05f);
+
+		// Draw the closest points
+		DebugRenderer::sInstance->DrawMarker(mOffset + outPointA, Color::sGreen, 1.0f);
+		DebugRenderer::sInstance->DrawMarker(mOffset + outPointB, Color::sPurple, 1.0f);
+
+		// Draw the simplex and the Minkowski difference around it
+		DrawState();
+#endif
+
+		JPH_ASSERT(ioV.LengthSq() == v_len_sq);
+		return v_len_sq;
+	}
+
+	/// Get the resulting simplex after the GetClosestPoints algorithm finishes.
+	/// If it returned a squared distance of 0, the origin will be contained in the simplex.
+	void		GetClosestPointsSimplex(Vec3 *outY, Vec3 *outP, Vec3 *outQ, uint &outNumPoints) const
+	{
+		uint size = sizeof(Vec3) * mNumPoints;
+		memcpy(outY, mY, size);
+		memcpy(outP, mP, size);
+		memcpy(outQ, mQ, size);
+		outNumPoints = mNumPoints;
+	}
+
+	/// Test if a ray inRayOrigin + lambda * inRayDirection for lambda e [0, ioLambda> intersects inA
+	///
+	/// Code based upon: Ray Casting against General Convex Objects with Application to Continuous Collision Detection - Gino van den Bergen
+	///
+	/// @param inRayOrigin Origin of the ray
+	/// @param inRayDirection Direction of the ray (ioLambda * inDirection determines length)
+	///	@param inTolerance The minimal distance between the ray and A before it is considered colliding
+	/// @param inA A convex object that has the GetSupport(Vec3) function
+	/// @param ioLambda The max fraction along the ray, on output updated with the actual collision fraction.
+	///
+	///	@return true if a hit was found, ioLambda is the solution for lambda.
+	template <typename A>
+	bool		CastRay(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, float inTolerance, const A &inA, float &ioLambda)
+	{
+		float tolerance_sq = Square(inTolerance);
+
+		// Reset state
+		mNumPoints = 0;
+
+		float lambda = 0.0f;
+		Vec3 x = inRayOrigin;
+		Vec3 v = x - inA.GetSupport(Vec3::sZero());
+		float v_len_sq = FLT_MAX;
+		bool allow_restart = false;
+
+		for (;;)
+		{
+#ifdef JPH_GJK_DEBUG
+			Trace("v = [%s], num_points = %d", ConvertToString(v).c_str(), mNumPoints);
+#endif
+
+			// Get new support point
+			Vec3 p = inA.GetSupport(v);
+			Vec3 w = x - p;
+
+#ifdef JPH_GJK_DEBUG
+			Trace("w = [%s]", ConvertToString(w).c_str());
+#endif
+
+			float v_dot_w = v.Dot(w);
+#ifdef JPH_GJK_DEBUG
+			Trace("v . w = %g", (double)v_dot_w);
+#endif
+			if (v_dot_w > 0.0f)
+			{
+				// If ray and normal are in the same direction, we've passed A and there's no collision
+				float v_dot_r = v.Dot(inRayDirection);
+#ifdef JPH_GJK_DEBUG
+				Trace("v . r = %g", (double)v_dot_r);
+#endif
+				if (v_dot_r >= 0.0f)
+					return false;
+
+				// Update the lower bound for lambda
+				float delta = v_dot_w / v_dot_r;
+				float old_lambda = lambda;
+				lambda -= delta;
+#ifdef JPH_GJK_DEBUG
+				Trace("lambda = %g, delta = %g", (double)lambda, (double)delta);
+#endif
+
+				// If lambda didn't change, we cannot converge any further and we assume a hit
+				if (old_lambda == lambda)
+					break;
+
+				// If lambda is bigger or equal than max, we don't have a hit
+				if (lambda >= ioLambda)
+					return false;
+
+				// Update x to new closest point on the ray
+				x = inRayOrigin + lambda * inRayDirection;
+
+				// We've shifted x, so reset v_len_sq so that it is not used as early out for GetClosest
+				v_len_sq = FLT_MAX;
+
+				// We allow rebuilding the simplex once after x changes because the simplex was built
+				// for another x and numerical round off builds up as you keep adding points to an
+				// existing simplex
+				allow_restart = true;
+			}
+
+			// Add p to set P: P = P U {p}
+			mP[mNumPoints] = p;
+			++mNumPoints;
+
+			// Calculate Y = {x} - P
+			for (int i = 0; i < mNumPoints; ++i)
+				mY[i] = x - mP[i];
+
+			// Determine the new closest point from Y to origin
+			uint32 set;						// Set of points that form the new simplex
+			if (!GetClosest<false>(v_len_sq, v, v_len_sq, set))
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Failed to converge");
+#endif
+
+				// Only allow 1 restart, if we still can't get a closest point
+				// we're so close that we return this as a hit
+				if (!allow_restart)
+					break;
+
+				// If we fail to converge, we start again with the last point as simplex
+#ifdef JPH_GJK_DEBUG
+				Trace("Restarting");
+#endif
+				allow_restart = false;
+				mP[0] = p;
+				mNumPoints = 1;
+				v = x - p;
+				v_len_sq = FLT_MAX;
+				continue;
+			}
+			else if (set == 0xf)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Full simplex");
+#endif
+
+				// We're inside the tetrahedron, we have a hit (verify that length of v is 0)
+				JPH_ASSERT(v_len_sq == 0.0f);
+				break;
+			}
+
+			// Update the points P to form the new simplex
+			// Note: We're not updating Y as Y will shift with x so we have to calculate it every iteration
+			UpdatePointSetP(set);
+
+			// Check if x is close enough to inA
+			if (v_len_sq <= tolerance_sq)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Converged");
+#endif
+				break;
+			}
+		}
+
+		// Store hit fraction
+		ioLambda = lambda;
+		return true;
+	}
+
+	/// Test if a cast shape inA moving from inStart to lambda * inStart.GetTranslation() + inDirection where lambda e [0, ioLambda> intersects inB
+	///
+	/// @param inStart Start position and orientation of the convex object
+	/// @param inDirection Direction of the sweep (ioLambda * inDirection determines length)
+	///	@param inTolerance The minimal distance between A and B before they are considered colliding
+	/// @param inA The convex object A, must support the GetSupport(Vec3) function.
+	/// @param inB The convex object B, must support the GetSupport(Vec3) function.
+	/// @param ioLambda The max fraction along the sweep, on output updated with the actual collision fraction.
+	///
+	/// @return true if a hit was found, ioLambda is the solution for lambda.
+	template <typename A, typename B>
+	bool		CastShape(Mat44Arg inStart, Vec3Arg inDirection, float inTolerance, const A &inA, const B &inB, float &ioLambda)
+	{
+		// Transform the shape to be cast to the starting position
+		TransformedConvexObject transformed_a(inStart, inA);
+
+		// Calculate the minkowski difference inB - inA
+		// inA is moving, so we need to add the back side of inB to the front side of inA
+		MinkowskiDifference difference(inB, transformed_a);
+
+		// Do a raycast against the Minkowski difference
+		return CastRay(Vec3::sZero(), inDirection, inTolerance, difference, ioLambda);
+	}
+
+	/// Test if a cast shape inA moving from inStart to lambda * inStart.GetTranslation() + inDirection where lambda e [0, ioLambda> intersects inB
+	///
+	/// @param inStart Start position and orientation of the convex object
+	/// @param inDirection Direction of the sweep (ioLambda * inDirection determines length)
+	///	@param inTolerance The minimal distance between A and B before they are considered colliding
+	/// @param inA The convex object A, must support the GetSupport(Vec3) function.
+	/// @param inB The convex object B, must support the GetSupport(Vec3) function.
+	/// @param inConvexRadiusA The convex radius of A, this will be added on all sides to pad A.
+	/// @param inConvexRadiusB The convex radius of B, this will be added on all sides to pad B.
+	/// @param ioLambda The max fraction along the sweep, on output updated with the actual collision fraction.
+	///	@param outPointA is the contact point on A (if outSeparatingAxis is near zero, this may not be not the deepest point)
+	///	@param outPointB is the contact point on B (if outSeparatingAxis is near zero, this may not be not the deepest point)
+	/// @param outSeparatingAxis On return this will contain a vector that points from A to B along the smallest distance of separation.
+	/// The length of this vector indicates the separation of A and B without their convex radius.
+	/// If it is near zero, the direction may not be accurate as the bodies may overlap when lambda = 0.
+	///
+	///	@return true if a hit was found, ioLambda is the solution for lambda and outPoint and outSeparatingAxis are valid.
+	template <typename A, typename B>
+	bool		CastShape(Mat44Arg inStart, Vec3Arg inDirection, float inTolerance, const A &inA, const B &inB, float inConvexRadiusA, float inConvexRadiusB, float &ioLambda, Vec3 &outPointA, Vec3 &outPointB, Vec3 &outSeparatingAxis)
+	{
+		float tolerance_sq = Square(inTolerance);
+
+		// Calculate how close A and B (without their convex radius) need to be to each other in order for us to consider this a collision
+		float sum_convex_radius = inConvexRadiusA + inConvexRadiusB;
+
+		// Transform the shape to be cast to the starting position
+		TransformedConvexObject transformed_a(inStart, inA);
+
+		// Reset state
+		mNumPoints = 0;
+
+		float lambda = 0.0f;
+		Vec3 x = Vec3::sZero(); // Since A is already transformed we can start the cast from zero
+		Vec3 v = -inB.GetSupport(Vec3::sZero()) + transformed_a.GetSupport(Vec3::sZero()); // See CastRay: v = x - inA.GetSupport(Vec3::sZero()) where inA is the Minkowski difference inB - transformed_a (see CastShape above) and x is zero
+		float v_len_sq = FLT_MAX;
+		bool allow_restart = false;
+
+		// Keeps track of separating axis of the previous iteration.
+		// Initialized at zero as we don't know if our first v is actually a separating axis.
+		Vec3 prev_v = Vec3::sZero();
+
+		for (;;)
+		{
+#ifdef JPH_GJK_DEBUG
+			Trace("v = [%s], num_points = %d", ConvertToString(v).c_str(), mNumPoints);
+#endif
+
+			// Calculate the minkowski difference inB - inA
+			// inA is moving, so we need to add the back side of inB to the front side of inA
+			// Keep the support points on A and B separate so that in the end we can calculate a contact point
+			Vec3 p = transformed_a.GetSupport(-v);
+			Vec3 q = inB.GetSupport(v);
+			Vec3 w = x - (q - p);
+
+#ifdef JPH_GJK_DEBUG
+			Trace("w = [%s]", ConvertToString(w).c_str());
+#endif
+
+			// Difference from article to this code:
+			// We did not include the convex radius in p and q in order to be able to calculate a good separating axis at the end of the algorithm.
+			// However when moving forward along inDirection we do need to take this into account so that we keep A and B separated by the sum of their convex radii.
+			// From p we have to subtract: inConvexRadiusA * v / |v|
+			// To q we have to add: inConvexRadiusB * v / |v|
+			// This means that to w we have to add: -(inConvexRadiusA + inConvexRadiusB) * v / |v|
+			// So to v . w we have to add: v . (-(inConvexRadiusA + inConvexRadiusB) * v / |v|) = -(inConvexRadiusA + inConvexRadiusB) * |v|
+			float v_dot_w = v.Dot(w) - sum_convex_radius * v.Length();
+#ifdef JPH_GJK_DEBUG
+			Trace("v . w = %g", (double)v_dot_w);
+#endif
+			if (v_dot_w > 0.0f)
+			{
+				// If ray and normal are in the same direction, we've passed A and there's no collision
+				float v_dot_r = v.Dot(inDirection);
+#ifdef JPH_GJK_DEBUG
+				Trace("v . r = %g", (double)v_dot_r);
+#endif
+				if (v_dot_r >= 0.0f)
+					return false;
+
+				// Update the lower bound for lambda
+				float delta = v_dot_w / v_dot_r;
+				float old_lambda = lambda;
+				lambda -= delta;
+#ifdef JPH_GJK_DEBUG
+				Trace("lambda = %g, delta = %g", (double)lambda, (double)delta);
+#endif
+
+				// If lambda didn't change, we cannot converge any further and we assume a hit
+				if (old_lambda == lambda)
+					break;
+
+				// If lambda is bigger or equal than max, we don't have a hit
+				if (lambda >= ioLambda)
+					return false;
+
+				// Update x to new closest point on the ray
+				x = lambda * inDirection;
+
+				// We've shifted x, so reset v_len_sq so that it is not used as early out when GetClosest returns false
+				v_len_sq = FLT_MAX;
+
+				// Now that we've moved, we know that A and B are not intersecting at lambda = 0, so we can update our tolerance to stop iterating
+				// as soon as A and B are inConvexRadiusA + inConvexRadiusB apart
+				tolerance_sq = Square(inTolerance + sum_convex_radius);
+
+				// We allow rebuilding the simplex once after x changes because the simplex was built
+				// for another x and numerical round off builds up as you keep adding points to an
+				// existing simplex
+				allow_restart = true;
+			}
+
+			// Add p to set P, q to set Q: P = P U {p}, Q = Q U {q}
+			mP[mNumPoints] = p;
+			mQ[mNumPoints] = q;
+			++mNumPoints;
+
+			// Calculate Y = {x} - (Q - P)
+			for (int i = 0; i < mNumPoints; ++i)
+				mY[i] = x - (mQ[i] - mP[i]);
+
+			// Determine the new closest point from Y to origin
+			uint32 set;						// Set of points that form the new simplex
+			if (!GetClosest<false>(v_len_sq, v, v_len_sq, set))
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Failed to converge");
+#endif
+
+				// Only allow 1 restart, if we still can't get a closest point
+				// we're so close that we return this as a hit
+				if (!allow_restart)
+					break;
+
+				// If we fail to converge, we start again with the last point as simplex
+#ifdef JPH_GJK_DEBUG
+				Trace("Restarting");
+#endif
+				allow_restart = false;
+				mP[0] = p;
+				mQ[0] = q;
+				mNumPoints = 1;
+				v = x - q;
+				v_len_sq = FLT_MAX;
+				continue;
+			}
+			else if (set == 0xf)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Full simplex");
+#endif
+
+				// We're inside the tetrahedron, we have a hit (verify that length of v is 0)
+				JPH_ASSERT(v_len_sq == 0.0f);
+				break;
+			}
+
+			// Update the points P and Q to form the new simplex
+			// Note: We're not updating Y as Y will shift with x so we have to calculate it every iteration
+			UpdatePointSetPQ(set);
+
+			// Check if A and B are touching according to our tolerance
+			if (v_len_sq <= tolerance_sq)
+			{
+#ifdef JPH_GJK_DEBUG
+				Trace("Converged");
+#endif
+				break;
+			}
+
+			// Store our v to return as separating axis
+			prev_v = v;
+		}
+
+		// Calculate Y = {x} - (Q - P) again so we can calculate the contact points
+		for (int i = 0; i < mNumPoints; ++i)
+			mY[i] = x - (mQ[i] - mP[i]);
+
+		// Calculate the offset we need to apply to A and B to correct for the convex radius
+		Vec3 normalized_v = v.NormalizedOr(Vec3::sZero());
+		Vec3 convex_radius_a = inConvexRadiusA * normalized_v;
+		Vec3 convex_radius_b = inConvexRadiusB * normalized_v;
+
+		// Get the contact point
+		// Note that A and B will coincide when lambda > 0. In this case we calculate only B as it is more accurate as it contains less terms.
+		switch (mNumPoints)
+		{
+		case 1:
+			outPointB = mQ[0] + convex_radius_b;
+			outPointA = lambda > 0.0f? outPointB : mP[0] - convex_radius_a;
+			break;
+
+		case 2:
+			{
+				float bu, bv;
+				ClosestPoint::GetBaryCentricCoordinates(mY[0], mY[1], bu, bv);
+				outPointB = bu * mQ[0] + bv * mQ[1] + convex_radius_b;
+				outPointA = lambda > 0.0f? outPointB : bu * mP[0] + bv * mP[1] - convex_radius_a;
+			}
+			break;
+
+		case 3:
+		case 4: // A full simplex, we can't properly determine a contact point! As contact point we take the closest point of the previous iteration.
+			{
+				float bu, bv, bw;
+				ClosestPoint::GetBaryCentricCoordinates(mY[0], mY[1], mY[2], bu, bv, bw);
+				outPointB = bu * mQ[0] + bv * mQ[1] + bw * mQ[2] + convex_radius_b;
+				outPointA = lambda > 0.0f? outPointB : bu * mP[0] + bv * mP[1] + bw * mP[2] - convex_radius_a;
+			}
+			break;
+		}
+
+		// Store separating axis, in case we have a convex radius we can just return v,
+		// otherwise v will be very small and we resort to returning previous v as an approximation.
+		outSeparatingAxis = sum_convex_radius > 0.0f? -v : -prev_v;
+
+		// Store hit fraction
+		ioLambda = lambda;
+		return true;
+	}
+
+private:
+#ifdef JPH_GJK_DEBUG
+	/// Draw state of algorithm
+	void		DrawState()
+	{
+		RMat44 origin = RMat44::sTranslation(mOffset);
+
+		// Draw origin
+		DebugRenderer::sInstance->DrawCoordinateSystem(origin, 1.0f);
+
+		// Draw the hull
+		DebugRenderer::sInstance->DrawGeometry(origin, mGeometry->mBounds.Transformed(origin), mGeometry->mBounds.GetExtent().LengthSq(), Color::sYellow, mGeometry);
+
+		// Draw Y
+		for (int i = 0; i < mNumPoints; ++i)
+		{
+			// Draw support point
+			RVec3 y_i = origin * mY[i];
+			DebugRenderer::sInstance->DrawMarker(y_i, Color::sRed, 1.0f);
+			for (int j = i + 1; j < mNumPoints; ++j)
+			{
+				// Draw edge
+				RVec3 y_j = origin * mY[j];
+				DebugRenderer::sInstance->DrawLine(y_i, y_j, Color::sRed);
+				for (int k = j + 1; k < mNumPoints; ++k)
+				{
+					// Make sure triangle faces the origin
+					RVec3 y_k = origin * mY[k];
+					RVec3 center = (y_i + y_j + y_k) / Real(3);
+					RVec3 normal = (y_j - y_i).Cross(y_k - y_i);
+					if (normal.Dot(center) < Real(0))
+						DebugRenderer::sInstance->DrawTriangle(y_i, y_j, y_k, Color::sLightGrey);
+					else
+						DebugRenderer::sInstance->DrawTriangle(y_i, y_k, y_j, Color::sLightGrey);
+				}
+			}
+		}
+
+		// Offset to the right
+		mOffset += Vec3(mGeometry->mBounds.GetSize().GetX() + 2.0f, 0, 0);
+	}
+#endif // JPH_GJK_DEBUG
+
+	Vec3		mY[4];						///< Support points on A - B
+	Vec3		mP[4];						///< Support point on A
+	Vec3		mQ[4];						///< Support point on B
+	int			mNumPoints = 0;				///< Number of points in mY, mP and mQ that are valid
+
+#ifdef JPH_GJK_DEBUG
+	DebugRenderer::GeometryRef	mGeometry;	///< A visualization of the minkowski difference for state drawing
+	RVec3		mOffset = RVec3::sZero();	///< Offset to use for state drawing
+#endif
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/IndexedTriangle.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/IndexedTriangle.h
@@ -0,0 +1,130 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Core/HashCombine.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Triangle with 32-bit indices
+class IndexedTriangleNoMaterial
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+					IndexedTriangleNoMaterial() = default;
+	constexpr		IndexedTriangleNoMaterial(uint32 inI1, uint32 inI2, uint32 inI3) : mIdx { inI1, inI2, inI3 } { }
+
+	/// Check if two triangles are identical
+	bool			operator == (const IndexedTriangleNoMaterial &inRHS) const
+	{
+		return mIdx[0] == inRHS.mIdx[0] && mIdx[1] == inRHS.mIdx[1] && mIdx[2] == inRHS.mIdx[2];
+	}
+
+	/// Check if two triangles are equivalent (using the same vertices)
+	bool			IsEquivalent(const IndexedTriangleNoMaterial &inRHS) const
+	{
+		return (mIdx[0] == inRHS.mIdx[0] && mIdx[1] == inRHS.mIdx[1] && mIdx[2] == inRHS.mIdx[2])
+			|| (mIdx[0] == inRHS.mIdx[1] && mIdx[1] == inRHS.mIdx[2] && mIdx[2] == inRHS.mIdx[0])
+			|| (mIdx[0] == inRHS.mIdx[2] && mIdx[1] == inRHS.mIdx[0] && mIdx[2] == inRHS.mIdx[1]);
+	}
+
+	/// Check if two triangles are opposite (using the same vertices but in opposing order)
+	bool			IsOpposite(const IndexedTriangleNoMaterial &inRHS) const
+	{
+		return (mIdx[0] == inRHS.mIdx[0] && mIdx[1] == inRHS.mIdx[2] && mIdx[2] == inRHS.mIdx[1])
+			|| (mIdx[0] == inRHS.mIdx[1] && mIdx[1] == inRHS.mIdx[0] && mIdx[2] == inRHS.mIdx[2])
+			|| (mIdx[0] == inRHS.mIdx[2] && mIdx[1] == inRHS.mIdx[1] && mIdx[2] == inRHS.mIdx[0]);
+	}
+
+	/// Check if triangle is degenerate
+	bool			IsDegenerate(const VertexList &inVertices) const
+	{
+		Vec3 v0(inVertices[mIdx[0]]);
+		Vec3 v1(inVertices[mIdx[1]]);
+		Vec3 v2(inVertices[mIdx[2]]);
+
+		return (v1 - v0).Cross(v2 - v0).IsNearZero();
+	}
+
+	/// Rotate the vertices so that the second vertex becomes first etc. This does not change the represented triangle.
+	void			Rotate()
+	{
+		uint32 tmp = mIdx[0];
+		mIdx[0] = mIdx[1];
+		mIdx[1] = mIdx[2];
+		mIdx[2] = tmp;
+	}
+
+	/// Get center of triangle
+	Vec3			GetCentroid(const VertexList &inVertices) const
+	{
+		return (Vec3(inVertices[mIdx[0]]) + Vec3(inVertices[mIdx[1]]) + Vec3(inVertices[mIdx[2]])) / 3.0f;
+	}
+
+	/// Get the hash value of this structure
+	uint64			GetHash() const
+	{
+		static_assert(sizeof(IndexedTriangleNoMaterial) == 3 * sizeof(uint32), "Class should have no padding");
+		return HashBytes(this, sizeof(IndexedTriangleNoMaterial));
+	}
+
+	uint32			mIdx[3];
+};
+
+/// Triangle with 32-bit indices and material index
+class IndexedTriangle : public IndexedTriangleNoMaterial
+{
+public:
+	using IndexedTriangleNoMaterial::IndexedTriangleNoMaterial;
+
+	/// Constructor
+	constexpr		IndexedTriangle(uint32 inI1, uint32 inI2, uint32 inI3, uint32 inMaterialIndex, uint inUserData = 0) : IndexedTriangleNoMaterial(inI1, inI2, inI3), mMaterialIndex(inMaterialIndex), mUserData(inUserData) { }
+
+	/// Check if two triangles are identical
+	bool			operator == (const IndexedTriangle &inRHS) const
+	{
+		return mMaterialIndex == inRHS.mMaterialIndex && mUserData == inRHS.mUserData && IndexedTriangleNoMaterial::operator==(inRHS);
+	}
+
+	/// Rotate the vertices so that the lowest vertex becomes the first. This does not change the represented triangle.
+	IndexedTriangle	GetLowestIndexFirst() const
+	{
+		if (mIdx[0] < mIdx[1])
+		{
+			if (mIdx[0] < mIdx[2])
+				return IndexedTriangle(mIdx[0], mIdx[1], mIdx[2], mMaterialIndex, mUserData); // 0 is smallest
+			else
+				return IndexedTriangle(mIdx[2], mIdx[0], mIdx[1], mMaterialIndex, mUserData); // 2 is smallest
+		}
+		else
+		{
+			if (mIdx[1] < mIdx[2])
+				return IndexedTriangle(mIdx[1], mIdx[2], mIdx[0], mMaterialIndex, mUserData); // 1 is smallest
+			else
+				return IndexedTriangle(mIdx[2], mIdx[0], mIdx[1], mMaterialIndex, mUserData); // 2 is smallest
+		}
+	}
+
+	/// Get the hash value of this structure
+	uint64			GetHash() const
+	{
+		static_assert(sizeof(IndexedTriangle) == 5 * sizeof(uint32), "Class should have no padding");
+		return HashBytes(this, sizeof(IndexedTriangle));
+	}
+
+	uint32			mMaterialIndex = 0;
+	uint32			mUserData = 0;				///< User data that can be used for anything by the application, e.g. for tracking the original index of the triangle
+};
+
+using IndexedTriangleNoMaterialList = Array<IndexedTriangleNoMaterial>;
+using IndexedTriangleList = Array<IndexedTriangle>;
+
+JPH_NAMESPACE_END
+
+// Create a std::hash for IndexedTriangleNoMaterial and IndexedTriangle
+JPH_MAKE_STD_HASH(JPH::IndexedTriangleNoMaterial)
+JPH_MAKE_STD_HASH(JPH::IndexedTriangle)
--- a/thirdparty/jolt_physics/Jolt/Geometry/Indexify.cpp
+++ b/thirdparty/jolt_physics/Jolt/Geometry/Indexify.cpp
@@ -0,0 +1,222 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Geometry/Indexify.h>
+#include <Jolt/Geometry/AABox.h>
+
+JPH_NAMESPACE_BEGIN
+
+static JPH_INLINE const Float3 &sIndexifyGetFloat3(const TriangleList &inTriangles, uint32 inVertexIndex)
+{
+	return inTriangles[inVertexIndex / 3].mV[inVertexIndex % 3];
+}
+
+static JPH_INLINE Vec3 sIndexifyGetVec3(const TriangleList &inTriangles, uint32 inVertexIndex)
+{
+	return Vec3::sLoadFloat3Unsafe(sIndexifyGetFloat3(inTriangles, inVertexIndex));
+}
+
+static void sIndexifyVerticesBruteForce(const TriangleList &inTriangles, const uint32 *inVertexIndices, const uint32 *inVertexIndicesEnd, Array<uint32> &ioWeldedVertices, float inVertexWeldDistance)
+{
+	float weld_dist_sq = Square(inVertexWeldDistance);
+
+	// Compare every vertex
+	for (const uint32 *v1_idx = inVertexIndices; v1_idx < inVertexIndicesEnd; ++v1_idx)
+	{
+		Vec3 v1 = sIndexifyGetVec3(inTriangles, *v1_idx);
+
+		// with every other vertex...
+		for (const uint32 *v2_idx = v1_idx + 1; v2_idx < inVertexIndicesEnd; ++v2_idx)
+		{
+			Vec3 v2 = sIndexifyGetVec3(inTriangles, *v2_idx);
+
+			// If they're weldable
+			if ((v2 - v1).LengthSq() <= weld_dist_sq)
+			{
+				// Find the lowest indices both indices link to
+				uint32 idx1 = *v1_idx;
+				for (;;)
+				{
+					uint32 new_idx1 = ioWeldedVertices[idx1];
+					if (new_idx1 >= idx1)
+						break;
+					idx1 = new_idx1;
+				}
+				uint32 idx2 = *v2_idx;
+				for (;;)
+				{
+					uint32 new_idx2 = ioWeldedVertices[idx2];
+					if (new_idx2 >= idx2)
+						break;
+					idx2 = new_idx2;
+				}
+
+				// Order the vertices
+				uint32 lowest = min(idx1, idx2);
+				uint32 highest = max(idx1, idx2);
+
+				// Link highest to lowest
+				ioWeldedVertices[highest] = lowest;
+
+				// Also update the vertices we started from to avoid creating long chains
+				ioWeldedVertices[*v1_idx] = lowest;
+				ioWeldedVertices[*v2_idx] = lowest;
+				break;
+			}
+		}
+	}
+}
+
+static void sIndexifyVerticesRecursively(const TriangleList &inTriangles, uint32 *ioVertexIndices, uint inNumVertices, uint32 *ioScratch, Array<uint32> &ioWeldedVertices, float inVertexWeldDistance, uint inMaxRecursion)
+{
+	// Check if we have few enough vertices to do a brute force search
+	// Or if we've recursed too deep (this means we chipped off a few vertices each iteration because all points are very close)
+	if (inNumVertices <= 8 || inMaxRecursion == 0)
+	{
+		sIndexifyVerticesBruteForce(inTriangles, ioVertexIndices, ioVertexIndices + inNumVertices, ioWeldedVertices, inVertexWeldDistance);
+		return;
+	}
+
+	// Calculate bounds
+	AABox bounds;
+	for (const uint32 *v = ioVertexIndices, *v_end = ioVertexIndices + inNumVertices; v < v_end; ++v)
+		bounds.Encapsulate(sIndexifyGetVec3(inTriangles, *v));
+
+	// Determine split plane
+	int split_axis = bounds.GetExtent().GetHighestComponentIndex();
+	float split_value = bounds.GetCenter()[split_axis];
+
+	// Partition vertices
+	uint32 *v_read = ioVertexIndices, *v_write = ioVertexIndices, *v_end = ioVertexIndices + inNumVertices;
+	uint32 *scratch = ioScratch;
+	while (v_read < v_end)
+	{
+		// Calculate distance to plane
+		float distance_to_split_plane = sIndexifyGetFloat3(inTriangles, *v_read)[split_axis] - split_value;
+		if (distance_to_split_plane < -inVertexWeldDistance)
+		{
+			// Vertex is on the right side
+			*v_write = *v_read;
+			++v_read;
+			++v_write;
+		}
+		else if (distance_to_split_plane > inVertexWeldDistance)
+		{
+			// Vertex is on the wrong side, swap with the last vertex
+			--v_end;
+			std::swap(*v_read, *v_end);
+		}
+		else
+		{
+			// Vertex is too close to the split plane, it goes on both sides
+			*scratch++ = *v_read++;
+		}
+	}
+
+	// Check if we made any progress
+	uint num_vertices_on_both_sides = (uint)(scratch - ioScratch);
+	if (num_vertices_on_both_sides == inNumVertices)
+	{
+		sIndexifyVerticesBruteForce(inTriangles, ioVertexIndices, ioVertexIndices + inNumVertices, ioWeldedVertices, inVertexWeldDistance);
+		return;
+	}
+
+	// Calculate how we classified the vertices
+	uint num_vertices_left = (uint)(v_write - ioVertexIndices);
+	uint num_vertices_right = (uint)(ioVertexIndices + inNumVertices - v_end);
+	JPH_ASSERT(num_vertices_left + num_vertices_right + num_vertices_on_both_sides == inNumVertices);
+	memcpy(v_write, ioScratch, num_vertices_on_both_sides * sizeof(uint32));
+
+	// Recurse
+	uint max_recursion = inMaxRecursion - 1;
+	sIndexifyVerticesRecursively(inTriangles, ioVertexIndices, num_vertices_left + num_vertices_on_both_sides, ioScratch, ioWeldedVertices, inVertexWeldDistance, max_recursion);
+	sIndexifyVerticesRecursively(inTriangles, ioVertexIndices + num_vertices_left, num_vertices_right + num_vertices_on_both_sides, ioScratch, ioWeldedVertices, inVertexWeldDistance, max_recursion);
+}
+
+void Indexify(const TriangleList &inTriangles, VertexList &outVertices, IndexedTriangleList &outTriangles, float inVertexWeldDistance)
+{
+	uint num_triangles = (uint)inTriangles.size();
+	uint num_vertices = num_triangles * 3;
+
+	// Create a list of all vertex indices
+	Array<uint32> vertex_indices;
+	vertex_indices.resize(num_vertices);
+	for (uint i = 0; i < num_vertices; ++i)
+		vertex_indices[i] = i;
+
+	// Link each vertex to itself
+	Array<uint32> welded_vertices;
+	welded_vertices.resize(num_vertices);
+	for (uint i = 0; i < num_vertices; ++i)
+		welded_vertices[i] = i;
+
+	// A scope to free memory used by the scratch array
+	{
+		// Some scratch memory, used for the vertices that fall in both partitions
+		Array<uint32> scratch;
+		scratch.resize(num_vertices);
+
+		// Recursively split the vertices
+		sIndexifyVerticesRecursively(inTriangles, vertex_indices.data(), num_vertices, scratch.data(), welded_vertices, inVertexWeldDistance, 32);
+	}
+
+	// Do a pass to complete the welding, linking each vertex to the vertex it is welded to
+	// (and since we're going from 0 to N we can be sure that the vertex we're linking to is already linked to the lowest vertex)
+	uint num_resulting_vertices = 0;
+	for (uint i = 0; i < num_vertices; ++i)
+	{
+		JPH_ASSERT(welded_vertices[welded_vertices[i]] <= welded_vertices[i]);
+		welded_vertices[i] = welded_vertices[welded_vertices[i]];
+		if (welded_vertices[i] == i)
+			++num_resulting_vertices;
+	}
+
+	// Collect the vertices
+	outVertices.clear();
+	outVertices.reserve(num_resulting_vertices);
+	for (uint i = 0; i < num_vertices; ++i)
+		if (welded_vertices[i] == i)
+		{
+			// New vertex
+			welded_vertices[i] = (uint32)outVertices.size();
+			outVertices.push_back(sIndexifyGetFloat3(inTriangles, i));
+		}
+		else
+		{
+			// Reused vertex, remap index
+			welded_vertices[i] = welded_vertices[welded_vertices[i]];
+		}
+
+	// Create indexed triangles
+	outTriangles.clear();
+	outTriangles.reserve(num_triangles);
+	for (uint t = 0; t < num_triangles; ++t)
+	{
+		IndexedTriangle it;
+		it.mMaterialIndex = inTriangles[t].mMaterialIndex;
+		it.mUserData = inTriangles[t].mUserData;
+		for (int v = 0; v < 3; ++v)
+			it.mIdx[v] = welded_vertices[t * 3 + v];
+		if (!it.IsDegenerate(outVertices))
+			outTriangles.push_back(it);
+	}
+}
+
+void Deindexify(const VertexList &inVertices, const IndexedTriangleList &inTriangles, TriangleList &outTriangles)
+{
+	outTriangles.resize(inTriangles.size());
+	for (size_t t = 0; t < inTriangles.size(); ++t)
+	{
+		const IndexedTriangle &in = inTriangles[t];
+		Triangle &out = outTriangles[t];
+		out.mMaterialIndex = in.mMaterialIndex;
+		out.mUserData = in.mUserData;
+		for (int v = 0; v < 3; ++v)
+			out.mV[v] = inVertices[in.mIdx[v]];
+	}
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/Indexify.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/Indexify.h
@@ -0,0 +1,19 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/Triangle.h>
+#include <Jolt/Geometry/IndexedTriangle.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Take a list of triangles and get the unique set of vertices and use them to create indexed triangles.
+/// Vertices that are less than inVertexWeldDistance apart will be combined to a single vertex.
+JPH_EXPORT void Indexify(const TriangleList &inTriangles, VertexList &outVertices, IndexedTriangleList &outTriangles, float inVertexWeldDistance = 1.0e-4f);
+
+/// Take a list of indexed triangles and unpack them
+JPH_EXPORT void Deindexify(const VertexList &inVertices, const IndexedTriangleList &inTriangles, TriangleList &outTriangles);
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/MortonCode.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/MortonCode.h
@@ -0,0 +1,40 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/AABox.h>
+
+JPH_NAMESPACE_BEGIN
+
+class MortonCode
+{
+public:
+	/// First converts a floating point value in the range [0, 1] to a 10 bit fixed point integer.
+	/// Then expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit.
+	static uint32 sExpandBits(float inV)
+	{
+		JPH_ASSERT(inV >= 0.0f && inV <= 1.0f);
+		uint32 v = uint32(inV * 1023.0f + 0.5f);
+		JPH_ASSERT(v < 1024);
+		v = (v * 0x00010001u) & 0xFF0000FFu;
+		v = (v * 0x00000101u) & 0x0F00F00Fu;
+		v = (v * 0x00000011u) & 0xC30C30C3u;
+		v = (v * 0x00000005u) & 0x49249249u;
+		return v;
+	}
+
+	/// Calculate the morton code for inVector, given that all vectors lie in inVectorBounds
+	static uint32 sGetMortonCode(Vec3Arg inVector, const AABox &inVectorBounds)
+	{
+		// Convert to 10 bit fixed point
+		Vec3 scaled = (inVector - inVectorBounds.mMin) / inVectorBounds.GetSize();
+		uint x = sExpandBits(scaled.GetX());
+		uint y = sExpandBits(scaled.GetY());
+		uint z = sExpandBits(scaled.GetZ());
+		return (x << 2) + (y << 1) + z;
+	}
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/OrientedBox.cpp
+++ b/thirdparty/jolt_physics/Jolt/Geometry/OrientedBox.cpp
@@ -0,0 +1,178 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt/Jolt.h>
+
+#include <Jolt/Geometry/OrientedBox.h>
+#include <Jolt/Geometry/AABox.h>
+
+JPH_NAMESPACE_BEGIN
+
+bool OrientedBox::Overlaps(const AABox &inBox, float inEpsilon) const
+{
+	// Taken from: Real Time Collision Detection - Christer Ericson
+	// Chapter 4.4.1, page 103-105.
+	// Note that the code is swapped around: A is the aabox and B is the oriented box (this saves us from having to invert the orientation of the oriented box)
+
+	// Convert AABox to center / extent representation
+	Vec3 a_center = inBox.GetCenter();
+	Vec3 a_half_extents = inBox.GetExtent();
+
+	// Compute rotation matrix expressing b in a's coordinate frame
+	Mat44 rot(mOrientation.GetColumn4(0), mOrientation.GetColumn4(1), mOrientation.GetColumn4(2), mOrientation.GetColumn4(3) - Vec4(a_center, 0));
+
+	// Compute common subexpressions. Add in an epsilon term to
+	// counteract arithmetic errors when two edges are parallel and
+	// their cross product is (near) null (see text for details)
+	Vec3 epsilon = Vec3::sReplicate(inEpsilon);
+	Vec3 abs_r[3] { rot.GetAxisX().Abs() + epsilon, rot.GetAxisY().Abs() + epsilon, rot.GetAxisZ().Abs() + epsilon };
+
+	// Test axes L = A0, L = A1, L = A2
+	float ra, rb;
+	for (int i = 0; i < 3; i++)
+	{
+		ra = a_half_extents[i];
+		rb = mHalfExtents[0] * abs_r[0][i] + mHalfExtents[1] * abs_r[1][i] + mHalfExtents[2] * abs_r[2][i];
+		if (abs(rot(i, 3)) > ra + rb) return false;
+	}
+
+	// Test axes L = B0, L = B1, L = B2
+	for (int i = 0; i < 3; i++)
+	{
+		ra = a_half_extents.Dot(abs_r[i]);
+		rb = mHalfExtents[i];
+		if (abs(rot.GetTranslation().Dot(rot.GetColumn3(i))) > ra + rb) return false;
+	}
+
+	// Test axis L = A0 x B0
+	ra = a_half_extents[1] * abs_r[0][2] + a_half_extents[2] * abs_r[0][1];
+	rb = mHalfExtents[1] * abs_r[2][0] + mHalfExtents[2] * abs_r[1][0];
+	if (abs(rot(2, 3) * rot(1, 0) - rot(1, 3) * rot(2, 0)) > ra + rb) return false;
+
+	// Test axis L = A0 x B1
+	ra = a_half_extents[1] * abs_r[1][2] + a_half_extents[2] * abs_r[1][1];
+	rb = mHalfExtents[0] * abs_r[2][0] + mHalfExtents[2] * abs_r[0][0];
+	if (abs(rot(2, 3) * rot(1, 1) - rot(1, 3) * rot(2, 1)) > ra + rb) return false;
+
+	// Test axis L = A0 x B2
+	ra = a_half_extents[1] * abs_r[2][2] + a_half_extents[2] * abs_r[2][1];
+	rb = mHalfExtents[0] * abs_r[1][0] + mHalfExtents[1] * abs_r[0][0];
+	if (abs(rot(2, 3) * rot(1, 2) - rot(1, 3) * rot(2, 2)) > ra + rb) return false;
+
+	// Test axis L = A1 x B0
+	ra = a_half_extents[0] * abs_r[0][2] + a_half_extents[2] * abs_r[0][0];
+	rb = mHalfExtents[1] * abs_r[2][1] + mHalfExtents[2] * abs_r[1][1];
+	if (abs(rot(0, 3) * rot(2, 0) - rot(2, 3) * rot(0, 0)) > ra + rb) return false;
+
+	// Test axis L = A1 x B1
+	ra = a_half_extents[0] * abs_r[1][2] + a_half_extents[2] * abs_r[1][0];
+	rb = mHalfExtents[0] * abs_r[2][1] + mHalfExtents[2] * abs_r[0][1];
+	if (abs(rot(0, 3) * rot(2, 1) - rot(2, 3) * rot(0, 1)) > ra + rb) return false;
+
+	// Test axis L = A1 x B2
+	ra = a_half_extents[0] * abs_r[2][2] + a_half_extents[2] * abs_r[2][0];
+	rb = mHalfExtents[0] * abs_r[1][1] + mHalfExtents[1] * abs_r[0][1];
+	if (abs(rot(0, 3) * rot(2, 2) - rot(2, 3) * rot(0, 2)) > ra + rb) return false;
+
+	// Test axis L = A2 x B0
+	ra = a_half_extents[0] * abs_r[0][1] + a_half_extents[1] * abs_r[0][0];
+	rb = mHalfExtents[1] * abs_r[2][2] + mHalfExtents[2] * abs_r[1][2];
+	if (abs(rot(1, 3) * rot(0, 0) - rot(0, 3) * rot(1, 0)) > ra + rb) return false;
+
+	// Test axis L = A2 x B1
+	ra = a_half_extents[0] * abs_r[1][1] + a_half_extents[1] * abs_r[1][0];
+	rb = mHalfExtents[0] * abs_r[2][2] + mHalfExtents[2] * abs_r[0][2];
+	if (abs(rot(1, 3) * rot(0, 1) - rot(0, 3) * rot(1, 1)) > ra + rb) return false;
+
+	// Test axis L = A2 x B2
+	ra = a_half_extents[0] * abs_r[2][1] + a_half_extents[1] * abs_r[2][0];
+	rb = mHalfExtents[0] * abs_r[1][2] + mHalfExtents[1] * abs_r[0][2];
+	if (abs(rot(1, 3) * rot(0, 2) - rot(0, 3) * rot(1, 2)) > ra + rb) return false;
+
+	// Since no separating axis is found, the OBB and AAB must be intersecting
+	return true;
+}
+
+bool OrientedBox::Overlaps(const OrientedBox &inBox, float inEpsilon) const
+{
+	// Taken from: Real Time Collision Detection - Christer Ericson
+	// Chapter 4.4.1, page 103-105.
+	// Note that A is this, B is inBox
+
+	// Compute rotation matrix expressing b in a's coordinate frame
+	Mat44 rot = mOrientation.InversedRotationTranslation() * inBox.mOrientation;
+
+	// Compute common subexpressions. Add in an epsilon term to
+	// counteract arithmetic errors when two edges are parallel and
+	// their cross product is (near) null (see text for details)
+	Vec3 epsilon = Vec3::sReplicate(inEpsilon);
+	Vec3 abs_r[3] { rot.GetAxisX().Abs() + epsilon, rot.GetAxisY().Abs() + epsilon, rot.GetAxisZ().Abs() + epsilon };
+
+	// Test axes L = A0, L = A1, L = A2
+	float ra, rb;
+	for (int i = 0; i < 3; i++)
+	{
+		ra = mHalfExtents[i];
+		rb = inBox.mHalfExtents[0] * abs_r[0][i] + inBox.mHalfExtents[1] * abs_r[1][i] + inBox.mHalfExtents[2] * abs_r[2][i];
+		if (abs(rot(i, 3)) > ra + rb) return false;
+	}
+
+	// Test axes L = B0, L = B1, L = B2
+	for (int i = 0; i < 3; i++)
+	{
+		ra = mHalfExtents.Dot(abs_r[i]);
+		rb = inBox.mHalfExtents[i];
+		if (abs(rot.GetTranslation().Dot(rot.GetColumn3(i))) > ra + rb) return false;
+	}
+
+	// Test axis L = A0 x B0
+	ra = mHalfExtents[1] * abs_r[0][2] + mHalfExtents[2] * abs_r[0][1];
+	rb = inBox.mHalfExtents[1] * abs_r[2][0] + inBox.mHalfExtents[2] * abs_r[1][0];
+	if (abs(rot(2, 3) * rot(1, 0) - rot(1, 3) * rot(2, 0)) > ra + rb) return false;
+
+	// Test axis L = A0 x B1
+	ra = mHalfExtents[1] * abs_r[1][2] + mHalfExtents[2] * abs_r[1][1];
+	rb = inBox.mHalfExtents[0] * abs_r[2][0] + inBox.mHalfExtents[2] * abs_r[0][0];
+	if (abs(rot(2, 3) * rot(1, 1) - rot(1, 3) * rot(2, 1)) > ra + rb) return false;
+
+	// Test axis L = A0 x B2
+	ra = mHalfExtents[1] * abs_r[2][2] + mHalfExtents[2] * abs_r[2][1];
+	rb = inBox.mHalfExtents[0] * abs_r[1][0] + inBox.mHalfExtents[1] * abs_r[0][0];
+	if (abs(rot(2, 3) * rot(1, 2) - rot(1, 3) * rot(2, 2)) > ra + rb) return false;
+
+	// Test axis L = A1 x B0
+	ra = mHalfExtents[0] * abs_r[0][2] + mHalfExtents[2] * abs_r[0][0];
+	rb = inBox.mHalfExtents[1] * abs_r[2][1] + inBox.mHalfExtents[2] * abs_r[1][1];
+	if (abs(rot(0, 3) * rot(2, 0) - rot(2, 3) * rot(0, 0)) > ra + rb) return false;
+
+	// Test axis L = A1 x B1
+	ra = mHalfExtents[0] * abs_r[1][2] + mHalfExtents[2] * abs_r[1][0];
+	rb = inBox.mHalfExtents[0] * abs_r[2][1] + inBox.mHalfExtents[2] * abs_r[0][1];
+	if (abs(rot(0, 3) * rot(2, 1) - rot(2, 3) * rot(0, 1)) > ra + rb) return false;
+
+	// Test axis L = A1 x B2
+	ra = mHalfExtents[0] * abs_r[2][2] + mHalfExtents[2] * abs_r[2][0];
+	rb = inBox.mHalfExtents[0] * abs_r[1][1] + inBox.mHalfExtents[1] * abs_r[0][1];
+	if (abs(rot(0, 3) * rot(2, 2) - rot(2, 3) * rot(0, 2)) > ra + rb) return false;
+
+	// Test axis L = A2 x B0
+	ra = mHalfExtents[0] * abs_r[0][1] + mHalfExtents[1] * abs_r[0][0];
+	rb = inBox.mHalfExtents[1] * abs_r[2][2] + inBox.mHalfExtents[2] * abs_r[1][2];
+	if (abs(rot(1, 3) * rot(0, 0) - rot(0, 3) * rot(1, 0)) > ra + rb) return false;
+
+	// Test axis L = A2 x B1
+	ra = mHalfExtents[0] * abs_r[1][1] + mHalfExtents[1] * abs_r[1][0];
+	rb = inBox.mHalfExtents[0] * abs_r[2][2] + inBox.mHalfExtents[2] * abs_r[0][2];
+	if (abs(rot(1, 3) * rot(0, 1) - rot(0, 3) * rot(1, 1)) > ra + rb) return false;
+
+	// Test axis L = A2 x B2
+	ra = mHalfExtents[0] * abs_r[2][1] + mHalfExtents[1] * abs_r[2][0];
+	rb = inBox.mHalfExtents[0] * abs_r[1][2] + inBox.mHalfExtents[1] * abs_r[0][2];
+	if (abs(rot(1, 3) * rot(0, 2) - rot(0, 3) * rot(1, 2)) > ra + rb) return false;
+
+	// Since no separating axis is found, the OBBs must be intersecting
+	return true;
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/OrientedBox.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/OrientedBox.h
@@ -0,0 +1,39 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/Triangle.h>
+#include <Jolt/Geometry/IndexedTriangle.h>
+#include <Jolt/Geometry/AABox.h>
+#include <Jolt/Math/Mat44.h>
+
+JPH_NAMESPACE_BEGIN
+
+class AABox;
+
+/// Oriented box
+class JPH_EXPORT_GCC_BUG_WORKAROUND [[nodiscard]] OrientedBox
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+					OrientedBox() = default;
+					OrientedBox(Mat44Arg inOrientation, Vec3Arg inHalfExtents)			: mOrientation(inOrientation), mHalfExtents(inHalfExtents) { }
+
+	/// Construct from axis aligned box and transform. Only works for rotation/translation matrix (no scaling / shearing).
+					OrientedBox(Mat44Arg inOrientation, const AABox &inBox)				: OrientedBox(inOrientation.PreTranslated(inBox.GetCenter()), inBox.GetExtent()) { }
+
+	/// Test if oriented box overlaps with axis aligned box each other
+	bool			Overlaps(const AABox &inBox, float inEpsilon = 1.0e-6f) const;
+
+	/// Test if two oriented boxes overlap each other
+	bool			Overlaps(const OrientedBox &inBox, float inEpsilon = 1.0e-6f) const;
+
+	Mat44			mOrientation;														///< Transform that positions and rotates the local space axis aligned box into world space
+	Vec3			mHalfExtents;														///< Half extents (half the size of the edge) of the local space axis aligned box
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/Plane.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/Plane.h
@@ -0,0 +1,101 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// An infinite plane described by the formula X . Normal + Constant = 0.
+class [[nodiscard]] Plane
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+					Plane() = default;
+	explicit		Plane(Vec4Arg inNormalAndConstant)										: mNormalAndConstant(inNormalAndConstant) { }
+					Plane(Vec3Arg inNormal, float inConstant)								: mNormalAndConstant(inNormal, inConstant) { }
+
+	/// Create from point and normal
+	static Plane	sFromPointAndNormal(Vec3Arg inPoint, Vec3Arg inNormal)					{ return Plane(Vec4(inNormal, -inNormal.Dot(inPoint))); }
+
+	/// Create from point and normal, double precision version that more accurately calculates the plane constant
+	static Plane	sFromPointAndNormal(DVec3Arg inPoint, Vec3Arg inNormal)					{ return Plane(Vec4(inNormal, -float(DVec3(inNormal).Dot(inPoint)))); }
+
+	/// Create from 3 counter clockwise points
+	static Plane	sFromPointsCCW(Vec3Arg inV1, Vec3Arg inV2, Vec3Arg inV3)				{ return sFromPointAndNormal(inV1, (inV2 - inV1).Cross(inV3 - inV1).Normalized()); }
+
+	// Properties
+	Vec3			GetNormal() const														{ return Vec3(mNormalAndConstant); }
+	void			SetNormal(Vec3Arg inNormal)												{ mNormalAndConstant = Vec4(inNormal, mNormalAndConstant.GetW()); }
+	float			GetConstant() const														{ return mNormalAndConstant.GetW(); }
+	void			SetConstant(float inConstant)											{ mNormalAndConstant.SetW(inConstant); }
+
+	/// Offset the plane (positive value means move it in the direction of the plane normal)
+	Plane			Offset(float inDistance) const											{ return Plane(mNormalAndConstant - Vec4(Vec3::sZero(), inDistance)); }
+
+	/// Transform the plane by a matrix
+	inline Plane	GetTransformed(Mat44Arg inTransform) const
+	{
+		Vec3 transformed_normal = inTransform.Multiply3x3(GetNormal());
+		return Plane(transformed_normal, GetConstant() - inTransform.GetTranslation().Dot(transformed_normal));
+	}
+
+	/// Scale the plane, can handle non-uniform and negative scaling
+	inline Plane	Scaled(Vec3Arg inScale) const
+	{
+		Vec3 scaled_normal = GetNormal() / inScale;
+		float scaled_normal_length = scaled_normal.Length();
+		return Plane(scaled_normal / scaled_normal_length, GetConstant() / scaled_normal_length);
+	}
+
+	/// Distance point to plane
+	float			SignedDistance(Vec3Arg inPoint) const									{ return inPoint.Dot(GetNormal()) + GetConstant(); }
+
+	/// Project inPoint onto the plane
+	Vec3			ProjectPointOnPlane(Vec3Arg inPoint) const								{ return inPoint - GetNormal() * SignedDistance(inPoint); }
+
+	/// Returns intersection point between 3 planes
+	static bool		sIntersectPlanes(const Plane &inP1, const Plane &inP2, const Plane &inP3, Vec3 &outPoint)
+	{
+		// We solve the equation:
+		// |ax, ay, az, aw|   | x |   | 0 |
+		// |bx, by, bz, bw| * | y | = | 0 |
+		// |cx, cy, cz, cw|   | z |   | 0 |
+		// | 0,	 0,	 0,	 1|   | 1 |   | 1 |
+		// Where normal of plane 1 = (ax, ay, az), plane constant of 1 = aw, normal of plane 2 = (bx, by, bz) etc.
+		// This involves inverting the matrix and multiplying it with [0, 0, 0, 1]
+
+		// Fetch the normals and plane constants for the three planes
+		Vec4 a = inP1.mNormalAndConstant;
+		Vec4 b = inP2.mNormalAndConstant;
+		Vec4 c = inP3.mNormalAndConstant;
+
+		// Result is a vector that we have to divide by:
+		float denominator = Vec3(a).Dot(Vec3(b).Cross(Vec3(c)));
+		if (denominator == 0.0f)
+			return false;
+
+		// The numerator is:
+		// [aw*(bz*cy-by*cz)+ay*(bw*cz-bz*cw)+az*(by*cw-bw*cy)]
+		// [aw*(bx*cz-bz*cx)+ax*(bz*cw-bw*cz)+az*(bw*cx-bx*cw)]
+		// [aw*(by*cx-bx*cy)+ax*(bw*cy-by*cw)+ay*(bx*cw-bw*cx)]
+		Vec4 numerator =
+			a.SplatW() * (b.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_UNUSED>() * c.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_UNUSED>() - b.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_UNUSED>() * c.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_UNUSED>())
+			+ a.Swizzle<SWIZZLE_Y, SWIZZLE_X, SWIZZLE_X, SWIZZLE_UNUSED>() * (b.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED>() * c.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Y, SWIZZLE_UNUSED>() - b.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Y, SWIZZLE_UNUSED>() * c.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED>())
+			+ a.Swizzle<SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_UNUSED>() * (b.Swizzle<SWIZZLE_Y, SWIZZLE_W, SWIZZLE_X, SWIZZLE_UNUSED>() * c.Swizzle<SWIZZLE_W, SWIZZLE_X, SWIZZLE_W, SWIZZLE_UNUSED>() - b.Swizzle<SWIZZLE_W, SWIZZLE_X, SWIZZLE_W, SWIZZLE_UNUSED>() * c.Swizzle<SWIZZLE_Y, SWIZZLE_W, SWIZZLE_X, SWIZZLE_UNUSED>());
+
+		outPoint = Vec3(numerator) / denominator;
+		return true;
+	}
+
+private:
+#ifdef JPH_OBJECT_STREAM
+	friend void		CreateRTTIPlane(class RTTI &);										// For JPH_IMPLEMENT_SERIALIZABLE_OUTSIDE_CLASS
+#endif
+
+	Vec4			mNormalAndConstant;													///< XYZ = normal, W = constant, plane: x . normal + constant = 0
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/RayAABox.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/RayAABox.h
@@ -0,0 +1,241 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Helper structure holding the reciprocal of a ray for Ray vs AABox testing
+class RayInvDirection
+{
+public:
+	/// Constructors
+	inline			RayInvDirection() = default;
+	inline explicit	RayInvDirection(Vec3Arg inDirection) { Set(inDirection); }
+
+	/// Set reciprocal from ray direction
+	inline void		Set(Vec3Arg inDirection)
+	{
+		// if (abs(inDirection) <= Epsilon) the ray is nearly parallel to the slab.
+		mIsParallel = Vec3::sLessOrEqual(inDirection.Abs(), Vec3::sReplicate(1.0e-20f));
+
+		// Calculate 1 / direction while avoiding division by zero
+		mInvDirection = Vec3::sSelect(inDirection, Vec3::sOne(), mIsParallel).Reciprocal();
+	}
+
+	Vec3			mInvDirection;					///< 1 / ray direction
+	UVec4			mIsParallel;					///< for each component if it is parallel to the coordinate axis
+};
+
+/// Intersect AABB with ray, returns minimal distance along ray or FLT_MAX if no hit
+/// Note: Can return negative value if ray starts in box
+JPH_INLINE float RayAABox(Vec3Arg inOrigin, const RayInvDirection &inInvDirection, Vec3Arg inBoundsMin, Vec3Arg inBoundsMax)
+{
+	// Constants
+	Vec3 flt_min = Vec3::sReplicate(-FLT_MAX);
+	Vec3 flt_max = Vec3::sReplicate(FLT_MAX);
+
+	// Test against all three axes simultaneously.
+	Vec3 t1 = (inBoundsMin - inOrigin) * inInvDirection.mInvDirection;
+	Vec3 t2 = (inBoundsMax - inOrigin) * inInvDirection.mInvDirection;
+
+	// Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
+	// use the results from any directions parallel to the slab.
+	Vec3 t_min = Vec3::sSelect(Vec3::sMin(t1, t2), flt_min, inInvDirection.mIsParallel);
+	Vec3 t_max = Vec3::sSelect(Vec3::sMax(t1, t2), flt_max, inInvDirection.mIsParallel);
+
+	// t_min.xyz = maximum(t_min.x, t_min.y, t_min.z);
+	t_min = Vec3::sMax(t_min, t_min.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>());
+	t_min = Vec3::sMax(t_min, t_min.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y>());
+
+	// t_max.xyz = minimum(t_max.x, t_max.y, t_max.z);
+	t_max = Vec3::sMin(t_max, t_max.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>());
+	t_max = Vec3::sMin(t_max, t_max.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y>());
+
+	// if (t_min > t_max) return FLT_MAX;
+	UVec4 no_intersection = Vec3::sGreater(t_min, t_max);
+
+	// if (t_max < 0.0f) return FLT_MAX;
+	no_intersection = UVec4::sOr(no_intersection, Vec3::sLess(t_max, Vec3::sZero()));
+
+	// if (inInvDirection.mIsParallel && !(Min <= inOrigin && inOrigin <= Max)) return FLT_MAX; else return t_min;
+	UVec4 no_parallel_overlap = UVec4::sOr(Vec3::sLess(inOrigin, inBoundsMin), Vec3::sGreater(inOrigin, inBoundsMax));
+	no_intersection = UVec4::sOr(no_intersection, UVec4::sAnd(inInvDirection.mIsParallel, no_parallel_overlap));
+	no_intersection = UVec4::sOr(no_intersection, no_intersection.SplatY());
+	no_intersection = UVec4::sOr(no_intersection, no_intersection.SplatZ());
+	return Vec3::sSelect(t_min, flt_max, no_intersection).GetX();
+}
+
+/// Intersect 4 AABBs with ray, returns minimal distance along ray or FLT_MAX if no hit
+/// Note: Can return negative value if ray starts in box
+JPH_INLINE Vec4 RayAABox4(Vec3Arg inOrigin, const RayInvDirection &inInvDirection, Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ)
+{
+	// Constants
+	Vec4 flt_min = Vec4::sReplicate(-FLT_MAX);
+	Vec4 flt_max = Vec4::sReplicate(FLT_MAX);
+
+	// Origin
+	Vec4 originx = inOrigin.SplatX();
+	Vec4 originy = inOrigin.SplatY();
+	Vec4 originz = inOrigin.SplatZ();
+
+	// Parallel
+	UVec4 parallelx = inInvDirection.mIsParallel.SplatX();
+	UVec4 parallely = inInvDirection.mIsParallel.SplatY();
+	UVec4 parallelz = inInvDirection.mIsParallel.SplatZ();
+
+	// Inverse direction
+	Vec4 invdirx = inInvDirection.mInvDirection.SplatX();
+	Vec4 invdiry = inInvDirection.mInvDirection.SplatY();
+	Vec4 invdirz = inInvDirection.mInvDirection.SplatZ();
+
+	// Test against all three axes simultaneously.
+	Vec4 t1x = (inBoundsMinX - originx) * invdirx;
+	Vec4 t1y = (inBoundsMinY - originy) * invdiry;
+	Vec4 t1z = (inBoundsMinZ - originz) * invdirz;
+	Vec4 t2x = (inBoundsMaxX - originx) * invdirx;
+	Vec4 t2y = (inBoundsMaxY - originy) * invdiry;
+	Vec4 t2z = (inBoundsMaxZ - originz) * invdirz;
+
+	// Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
+	// use the results from any directions parallel to the slab.
+	Vec4 t_minx = Vec4::sSelect(Vec4::sMin(t1x, t2x), flt_min, parallelx);
+	Vec4 t_miny = Vec4::sSelect(Vec4::sMin(t1y, t2y), flt_min, parallely);
+	Vec4 t_minz = Vec4::sSelect(Vec4::sMin(t1z, t2z), flt_min, parallelz);
+	Vec4 t_maxx = Vec4::sSelect(Vec4::sMax(t1x, t2x), flt_max, parallelx);
+	Vec4 t_maxy = Vec4::sSelect(Vec4::sMax(t1y, t2y), flt_max, parallely);
+	Vec4 t_maxz = Vec4::sSelect(Vec4::sMax(t1z, t2z), flt_max, parallelz);
+
+	// t_min.xyz = maximum(t_min.x, t_min.y, t_min.z);
+	Vec4 t_min = Vec4::sMax(Vec4::sMax(t_minx, t_miny), t_minz);
+
+	// t_max.xyz = minimum(t_max.x, t_max.y, t_max.z);
+	Vec4 t_max = Vec4::sMin(Vec4::sMin(t_maxx, t_maxy), t_maxz);
+
+	// if (t_min > t_max) return FLT_MAX;
+	UVec4 no_intersection = Vec4::sGreater(t_min, t_max);
+
+	// if (t_max < 0.0f) return FLT_MAX;
+	no_intersection = UVec4::sOr(no_intersection, Vec4::sLess(t_max, Vec4::sZero()));
+
+	// if bounds are invalid return FLOAT_MAX;
+	UVec4 bounds_invalid = UVec4::sOr(UVec4::sOr(Vec4::sGreater(inBoundsMinX, inBoundsMaxX), Vec4::sGreater(inBoundsMinY, inBoundsMaxY)), Vec4::sGreater(inBoundsMinZ, inBoundsMaxZ));
+	no_intersection = UVec4::sOr(no_intersection, bounds_invalid);
+
+	// if (inInvDirection.mIsParallel && !(Min <= inOrigin && inOrigin <= Max)) return FLT_MAX; else return t_min;
+	UVec4 no_parallel_overlapx = UVec4::sAnd(parallelx, UVec4::sOr(Vec4::sLess(originx, inBoundsMinX), Vec4::sGreater(originx, inBoundsMaxX)));
+	UVec4 no_parallel_overlapy = UVec4::sAnd(parallely, UVec4::sOr(Vec4::sLess(originy, inBoundsMinY), Vec4::sGreater(originy, inBoundsMaxY)));
+	UVec4 no_parallel_overlapz = UVec4::sAnd(parallelz, UVec4::sOr(Vec4::sLess(originz, inBoundsMinZ), Vec4::sGreater(originz, inBoundsMaxZ)));
+	no_intersection = UVec4::sOr(no_intersection, UVec4::sOr(UVec4::sOr(no_parallel_overlapx, no_parallel_overlapy), no_parallel_overlapz));
+	return Vec4::sSelect(t_min, flt_max, no_intersection);
+}
+
+/// Intersect AABB with ray, returns minimal and maximal distance along ray or FLT_MAX, -FLT_MAX if no hit
+/// Note: Can return negative value for outMin if ray starts in box
+JPH_INLINE void RayAABox(Vec3Arg inOrigin, const RayInvDirection &inInvDirection, Vec3Arg inBoundsMin, Vec3Arg inBoundsMax, float &outMin, float &outMax)
+{
+	// Constants
+	Vec3 flt_min = Vec3::sReplicate(-FLT_MAX);
+	Vec3 flt_max = Vec3::sReplicate(FLT_MAX);
+
+	// Test against all three axes simultaneously.
+	Vec3 t1 = (inBoundsMin - inOrigin) * inInvDirection.mInvDirection;
+	Vec3 t2 = (inBoundsMax - inOrigin) * inInvDirection.mInvDirection;
+
+	// Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
+	// use the results from any directions parallel to the slab.
+	Vec3 t_min = Vec3::sSelect(Vec3::sMin(t1, t2), flt_min, inInvDirection.mIsParallel);
+	Vec3 t_max = Vec3::sSelect(Vec3::sMax(t1, t2), flt_max, inInvDirection.mIsParallel);
+
+	// t_min.xyz = maximum(t_min.x, t_min.y, t_min.z);
+	t_min = Vec3::sMax(t_min, t_min.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>());
+	t_min = Vec3::sMax(t_min, t_min.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y>());
+
+	// t_max.xyz = minimum(t_max.x, t_max.y, t_max.z);
+	t_max = Vec3::sMin(t_max, t_max.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>());
+	t_max = Vec3::sMin(t_max, t_max.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y>());
+
+	// if (t_min > t_max) return FLT_MAX;
+	UVec4 no_intersection = Vec3::sGreater(t_min, t_max);
+
+	// if (t_max < 0.0f) return FLT_MAX;
+	no_intersection = UVec4::sOr(no_intersection, Vec3::sLess(t_max, Vec3::sZero()));
+
+	// if (inInvDirection.mIsParallel && !(Min <= inOrigin && inOrigin <= Max)) return FLT_MAX; else return t_min;
+	UVec4 no_parallel_overlap = UVec4::sOr(Vec3::sLess(inOrigin, inBoundsMin), Vec3::sGreater(inOrigin, inBoundsMax));
+	no_intersection = UVec4::sOr(no_intersection, UVec4::sAnd(inInvDirection.mIsParallel, no_parallel_overlap));
+	no_intersection = UVec4::sOr(no_intersection, no_intersection.SplatY());
+	no_intersection = UVec4::sOr(no_intersection, no_intersection.SplatZ());
+	outMin = Vec3::sSelect(t_min, flt_max, no_intersection).GetX();
+	outMax = Vec3::sSelect(t_max, flt_min, no_intersection).GetX();
+}
+
+/// Intersect AABB with ray, returns true if there is a hit closer than inClosest
+JPH_INLINE bool RayAABoxHits(Vec3Arg inOrigin, const RayInvDirection &inInvDirection, Vec3Arg inBoundsMin, Vec3Arg inBoundsMax, float inClosest)
+{
+	// Constants
+	Vec3 flt_min = Vec3::sReplicate(-FLT_MAX);
+	Vec3 flt_max = Vec3::sReplicate(FLT_MAX);
+
+	// Test against all three axes simultaneously.
+	Vec3 t1 = (inBoundsMin - inOrigin) * inInvDirection.mInvDirection;
+	Vec3 t2 = (inBoundsMax - inOrigin) * inInvDirection.mInvDirection;
+
+	// Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
+	// use the results from any directions parallel to the slab.
+	Vec3 t_min = Vec3::sSelect(Vec3::sMin(t1, t2), flt_min, inInvDirection.mIsParallel);
+	Vec3 t_max = Vec3::sSelect(Vec3::sMax(t1, t2), flt_max, inInvDirection.mIsParallel);
+
+	// t_min.xyz = maximum(t_min.x, t_min.y, t_min.z);
+	t_min = Vec3::sMax(t_min, t_min.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>());
+	t_min = Vec3::sMax(t_min, t_min.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y>());
+
+	// t_max.xyz = minimum(t_max.x, t_max.y, t_max.z);
+	t_max = Vec3::sMin(t_max, t_max.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>());
+	t_max = Vec3::sMin(t_max, t_max.Swizzle<SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y>());
+
+	// if (t_min > t_max) return false;
+	UVec4 no_intersection = Vec3::sGreater(t_min, t_max);
+
+	// if (t_max < 0.0f) return false;
+	no_intersection = UVec4::sOr(no_intersection, Vec3::sLess(t_max, Vec3::sZero()));
+
+	// if (t_min > inClosest) return false;
+	no_intersection = UVec4::sOr(no_intersection, Vec3::sGreater(t_min, Vec3::sReplicate(inClosest)));
+
+	// if (inInvDirection.mIsParallel && !(Min <= inOrigin && inOrigin <= Max)) return false; else return true;
+	UVec4 no_parallel_overlap = UVec4::sOr(Vec3::sLess(inOrigin, inBoundsMin), Vec3::sGreater(inOrigin, inBoundsMax));
+	no_intersection = UVec4::sOr(no_intersection, UVec4::sAnd(inInvDirection.mIsParallel, no_parallel_overlap));
+
+	return !no_intersection.TestAnyXYZTrue();
+}
+
+/// Intersect AABB with ray without hit fraction, based on separating axis test
+/// @see http://www.codercorner.com/RayAABB.cpp
+JPH_INLINE bool RayAABoxHits(Vec3Arg inOrigin, Vec3Arg inDirection, Vec3Arg inBoundsMin, Vec3Arg inBoundsMax)
+{
+	Vec3 extents = inBoundsMax - inBoundsMin;
+
+	Vec3 diff = 2.0f * inOrigin - inBoundsMin - inBoundsMax;
+	Vec3 abs_diff = diff.Abs();
+
+	UVec4 no_intersection = UVec4::sAnd(Vec3::sGreater(abs_diff, extents), Vec3::sGreaterOrEqual(diff * inDirection, Vec3::sZero()));
+
+	Vec3 abs_dir = inDirection.Abs();
+	Vec3 abs_dir_yzz = abs_dir.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z>();
+	Vec3 abs_dir_xyx = abs_dir.Swizzle<SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X>();
+
+	Vec3 extents_yzz = extents.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z>();
+	Vec3 extents_xyx = extents.Swizzle<SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X>();
+
+	Vec3 diff_yzx = diff.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>();
+
+	Vec3 dir_yzx = inDirection.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X>();
+
+	no_intersection = UVec4::sOr(no_intersection, Vec3::sGreater((inDirection * diff_yzx - dir_yzx * diff).Abs(), extents_xyx * abs_dir_yzz + extents_yzz * abs_dir_xyx));
+
+	return !no_intersection.TestAnyXYZTrue();
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/RayCapsule.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/RayCapsule.h
@@ -0,0 +1,37 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/RayCylinder.h>
+#include <Jolt/Geometry/RaySphere.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Tests a ray starting at inRayOrigin and extending infinitely in inRayDirection
+/// against a capsule centered around the origin with its axis along the Y axis and half height specified.
+/// @return FLT_MAX if there is no intersection, otherwise the fraction along the ray.
+/// @param inRayDirection Ray direction. Does not need to be normalized.
+/// @param inRayOrigin Origin of the ray. If the ray starts inside the capsule, the returned fraction will be 0.
+/// @param inCapsuleHalfHeight Distance from the origin to the center of the top sphere (or that of the bottom)
+/// @param inCapsuleRadius Radius of the top/bottom sphere
+JPH_INLINE float RayCapsule(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, float inCapsuleHalfHeight, float inCapsuleRadius)
+{
+	// Test infinite cylinder
+	float cylinder = RayCylinder(inRayOrigin, inRayDirection, inCapsuleRadius);
+	if (cylinder == FLT_MAX)
+		return FLT_MAX;
+
+	// If this hit is in the finite cylinder we have our fraction
+	if (abs(inRayOrigin.GetY() + cylinder * inRayDirection.GetY()) <= inCapsuleHalfHeight)
+		return cylinder;
+
+	// Test upper and lower sphere
+	Vec3 sphere_center(0, inCapsuleHalfHeight, 0);
+	float upper = RaySphere(inRayOrigin, inRayDirection, sphere_center, inCapsuleRadius);
+	float lower = RaySphere(inRayOrigin, inRayDirection, -sphere_center, inCapsuleRadius);
+	return min(upper, lower);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/RayCylinder.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/RayCylinder.h
@@ -0,0 +1,101 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Math/FindRoot.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Tests a ray starting at inRayOrigin and extending infinitely in inRayDirection
+/// against an infinite cylinder centered along the Y axis
+/// @return FLT_MAX if there is no intersection, otherwise the fraction along the ray.
+/// @param inRayDirection Direction of the ray. Does not need to be normalized.
+/// @param inRayOrigin Origin of the ray. If the ray starts inside the cylinder, the returned fraction will be 0.
+/// @param inCylinderRadius Radius of the infinite cylinder
+JPH_INLINE float RayCylinder(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, float inCylinderRadius)
+{
+	// Remove Y component of ray to see of ray intersects with infinite cylinder
+	UVec4 mask_y = UVec4(0, 0xffffffff, 0, 0);
+	Vec3 origin_xz = Vec3::sSelect(inRayOrigin, Vec3::sZero(), mask_y);
+	float origin_xz_len_sq = origin_xz.LengthSq();
+	float r_sq = Square(inCylinderRadius);
+	if (origin_xz_len_sq > r_sq)
+	{
+		// Ray starts outside of the infinite cylinder
+		// Solve: |RayOrigin_xz + fraction * RayDirection_xz|^2 = r^2 to find fraction
+		Vec3 direction_xz = Vec3::sSelect(inRayDirection, Vec3::sZero(), mask_y);
+		float a = direction_xz.LengthSq();
+		float b = 2.0f * origin_xz.Dot(direction_xz);
+		float c = origin_xz_len_sq - r_sq;
+		float fraction1, fraction2;
+		if (FindRoot(a, b, c, fraction1, fraction2) == 0)
+			return FLT_MAX; // No intersection with infinite cylinder
+
+		// Get fraction corresponding to the ray entering the circle
+		float fraction = min(fraction1, fraction2);
+		if (fraction >= 0.0f)
+			return fraction;
+	}
+	else
+	{
+		// Ray starts inside the infinite cylinder
+		return 0.0f;
+	}
+
+	// No collision
+	return FLT_MAX;
+}
+
+/// Test a ray against a cylinder centered around the origin with its axis along the Y axis and half height specified.
+/// @return FLT_MAX if there is no intersection, otherwise the fraction along the ray.
+/// @param inRayDirection Ray direction. Does not need to be normalized.
+/// @param inRayOrigin Origin of the ray. If the ray starts inside the cylinder, the returned fraction will be 0.
+/// @param inCylinderRadius Radius of the cylinder
+/// @param inCylinderHalfHeight Distance from the origin to the top (or bottom) of the cylinder
+JPH_INLINE float RayCylinder(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, float inCylinderHalfHeight, float inCylinderRadius)
+{
+	// Test infinite cylinder
+	float fraction = RayCylinder(inRayOrigin, inRayDirection, inCylinderRadius);
+	if (fraction == FLT_MAX)
+		return FLT_MAX;
+
+	// If this hit is in the finite cylinder we have our fraction
+	if (abs(inRayOrigin.GetY() + fraction * inRayDirection.GetY()) <= inCylinderHalfHeight)
+		return fraction;
+
+	// Check if ray could hit the top or bottom plane of the cylinder
+	float direction_y = inRayDirection.GetY();
+	if (direction_y != 0.0f)
+	{
+		// Solving line equation: x = ray_origin + fraction * ray_direction
+		// and plane equation: plane_normal . x + plane_constant = 0
+		// fraction = (-plane_constant - plane_normal . ray_origin) / (plane_normal . ray_direction)
+		// when the ray_direction.y < 0:
+		// plane_constant = -cylinder_half_height, plane_normal = (0, 1, 0)
+		// else
+		// plane_constant = -cylinder_half_height, plane_normal = (0, -1, 0)
+		float origin_y = inRayOrigin.GetY();
+		float plane_fraction;
+		if (direction_y < 0.0f)
+			plane_fraction = (inCylinderHalfHeight - origin_y) / direction_y;
+		else
+			plane_fraction = -(inCylinderHalfHeight + origin_y) / direction_y;
+
+		// Check if the hit is in front of the ray
+		if (plane_fraction >= 0.0f)
+		{
+			// Test if this hit is inside the cylinder
+			Vec3 point = inRayOrigin + plane_fraction * inRayDirection;
+			float dist_sq = Square(point.GetX()) + Square(point.GetZ());
+			if (dist_sq <= Square(inCylinderRadius))
+				return plane_fraction;
+		}
+	}
+
+	// No collision
+	return FLT_MAX;
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/RaySphere.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/RaySphere.h
@@ -0,0 +1,96 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Math/FindRoot.h>
+
+JPH_NAMESPACE_BEGIN
+
+/// Tests a ray starting at inRayOrigin and extending infinitely in inRayDirection against a sphere,
+/// @return FLT_MAX if there is no intersection, otherwise the fraction along the ray.
+/// @param inRayOrigin Ray origin. If the ray starts inside the sphere, the returned fraction will be 0.
+/// @param inRayDirection Ray direction. Does not need to be normalized.
+/// @param inSphereCenter Position of the center of the sphere
+/// @param inSphereRadius Radius of the sphere
+JPH_INLINE float RaySphere(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, Vec3Arg inSphereCenter, float inSphereRadius)
+{
+	// Solve: |RayOrigin + fraction * RayDirection - SphereCenter|^2 = SphereRadius^2 for fraction
+	Vec3 center_origin = inRayOrigin - inSphereCenter;
+	float a = inRayDirection.LengthSq();
+	float b = 2.0f * inRayDirection.Dot(center_origin);
+	float c = center_origin.LengthSq() - inSphereRadius * inSphereRadius;
+	float fraction1, fraction2;
+	if (FindRoot(a, b, c, fraction1, fraction2) == 0)
+		return c <= 0.0f? 0.0f : FLT_MAX; // Return if origin is inside the sphere
+
+	// Sort so that the smallest is first
+	if (fraction1 > fraction2)
+		std::swap(fraction1, fraction2);
+
+	// Test solution with lowest fraction, this will be the ray entering the sphere
+	if (fraction1 >= 0.0f)
+		return fraction1; // Sphere is before the ray start
+
+	// Test solution with highest fraction, this will be the ray leaving the sphere
+	if (fraction2 >= 0.0f)
+		return 0.0f; // We start inside the sphere
+
+	// No solution
+	return FLT_MAX;
+}
+
+/// Tests a ray starting at inRayOrigin and extending infinitely in inRayDirection against a sphere.
+/// Outputs entry and exit points (outMinFraction and outMaxFraction) along the ray (which could be negative if the hit point is before the start of the ray).
+/// @param inRayOrigin Ray origin. If the ray starts inside the sphere, the returned fraction will be 0.
+/// @param inRayDirection Ray direction. Does not need to be normalized.
+/// @param inSphereCenter Position of the center of the sphere.
+/// @param inSphereRadius Radius of the sphere.
+/// @param outMinFraction Returned lowest intersection fraction
+/// @param outMaxFraction Returned highest intersection fraction
+/// @return The amount of intersections with the sphere.
+/// If 1 intersection is returned outMinFraction will be equal to outMaxFraction
+JPH_INLINE int RaySphere(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, Vec3Arg inSphereCenter, float inSphereRadius, float &outMinFraction, float &outMaxFraction)
+{
+	// Solve: |RayOrigin + fraction * RayDirection - SphereCenter|^2 = SphereRadius^2 for fraction
+	Vec3 center_origin = inRayOrigin - inSphereCenter;
+	float a = inRayDirection.LengthSq();
+	float b = 2.0f * inRayDirection.Dot(center_origin);
+	float c = center_origin.LengthSq() - inSphereRadius * inSphereRadius;
+	float fraction1, fraction2;
+	switch (FindRoot(a, b, c, fraction1, fraction2))
+	{
+	case 0:
+		if (c <= 0.0f)
+		{
+			// Origin inside sphere
+			outMinFraction = outMaxFraction = 0.0f;
+			return 1;
+		}
+		else
+		{
+			// Origin outside of the sphere
+			return 0;
+		}
+		break;
+
+	case 1:
+		// Ray is touching the sphere
+		outMinFraction = outMaxFraction = fraction1;
+		return 1;
+
+	default:
+		// Ray enters and exits the sphere
+
+		// Sort so that the smallest is first
+		if (fraction1 > fraction2)
+			std::swap(fraction1, fraction2);
+
+		outMinFraction = fraction1;
+		outMaxFraction = fraction2;
+		return 2;
+	}
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/RayTriangle.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/RayTriangle.h
@@ -0,0 +1,158 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// Intersect ray with triangle, returns closest point or FLT_MAX if no hit (branch less version)
+/// Adapted from: http://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm
+JPH_INLINE float RayTriangle(Vec3Arg inOrigin, Vec3Arg inDirection, Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2)
+{
+	// Epsilon
+	Vec3 epsilon = Vec3::sReplicate(1.0e-12f);
+
+	// Zero & one
+	Vec3 zero = Vec3::sZero();
+	Vec3 one = Vec3::sOne();
+
+	// Find vectors for two edges sharing inV0
+	Vec3 e1 = inV1 - inV0;
+	Vec3 e2 = inV2 - inV0;
+
+	// Begin calculating determinant - also used to calculate u parameter
+	Vec3 p = inDirection.Cross(e2);
+
+	// if determinant is near zero, ray lies in plane of triangle
+	Vec3 det = Vec3::sReplicate(e1.Dot(p));
+
+	// Check if determinant is near zero
+	UVec4 det_near_zero = Vec3::sLess(det.Abs(), epsilon);
+
+	// When the determinant is near zero, set it to one to avoid dividing by zero
+	det = Vec3::sSelect(det, Vec3::sOne(), det_near_zero);
+
+	// Calculate distance from inV0 to ray origin
+	Vec3 s = inOrigin - inV0;
+
+	// Calculate u parameter
+	Vec3 u = Vec3::sReplicate(s.Dot(p)) / det;
+
+	// Prepare to test v parameter
+	Vec3 q = s.Cross(e1);
+
+	// Calculate v parameter
+	Vec3 v = Vec3::sReplicate(inDirection.Dot(q)) / det;
+
+	// Get intersection point
+	Vec3 t = Vec3::sReplicate(e2.Dot(q)) / det;
+
+	// Check if there is an intersection
+	UVec4 no_intersection =
+		UVec4::sOr
+		(
+			UVec4::sOr
+			(
+				UVec4::sOr
+				(
+					det_near_zero,
+					Vec3::sLess(u, zero)
+				),
+				UVec4::sOr
+				(
+					Vec3::sLess(v, zero),
+					Vec3::sGreater(u + v, one)
+				)
+			),
+			Vec3::sLess(t, zero)
+		);
+
+	// Select intersection point or FLT_MAX based on if there is an intersection or not
+	return Vec3::sSelect(t, Vec3::sReplicate(FLT_MAX), no_intersection).GetX();
+}
+
+/// Intersect ray with 4 triangles in SOA format, returns 4 vector of closest points or FLT_MAX if no hit (uses bit tricks to do less divisions)
+JPH_INLINE Vec4 RayTriangle4(Vec3Arg inOrigin, Vec3Arg inDirection, Vec4Arg inV0X, Vec4Arg inV0Y, Vec4Arg inV0Z, Vec4Arg inV1X, Vec4Arg inV1Y, Vec4Arg inV1Z, Vec4Arg inV2X, Vec4Arg inV2Y, Vec4Arg inV2Z)
+{
+	// Epsilon
+	Vec4 epsilon = Vec4::sReplicate(1.0e-12f);
+
+	// Zero
+	Vec4 zero = Vec4::sZero();
+
+	// Find vectors for two edges sharing inV0
+	Vec4 e1x = inV1X - inV0X;
+	Vec4 e1y = inV1Y - inV0Y;
+	Vec4 e1z = inV1Z - inV0Z;
+	Vec4 e2x = inV2X - inV0X;
+	Vec4 e2y = inV2Y - inV0Y;
+	Vec4 e2z = inV2Z - inV0Z;
+
+	// Get direction vector components
+	Vec4 dx = inDirection.SplatX();
+	Vec4 dy = inDirection.SplatY();
+	Vec4 dz = inDirection.SplatZ();
+
+	// Begin calculating determinant - also used to calculate u parameter
+	Vec4 px = dy * e2z - dz * e2y;
+	Vec4 py = dz * e2x - dx * e2z;
+	Vec4 pz = dx * e2y - dy * e2x;
+
+	// if determinant is near zero, ray lies in plane of triangle
+	Vec4 det = e1x * px + e1y * py + e1z * pz;
+
+	// Get sign bit for determinant and make positive
+	Vec4 det_sign = Vec4::sAnd(det, UVec4::sReplicate(0x80000000).ReinterpretAsFloat());
+	det = Vec4::sXor(det, det_sign);
+
+	// Check which determinants are near zero
+	UVec4 det_near_zero = Vec4::sLess(det, epsilon);
+
+	// Set components of the determinant to 1 that are near zero to avoid dividing by zero
+	det = Vec4::sSelect(det, Vec4::sOne(), det_near_zero);
+
+	// Calculate distance from inV0 to ray origin
+	Vec4 sx = inOrigin.SplatX() - inV0X;
+	Vec4 sy = inOrigin.SplatY() - inV0Y;
+	Vec4 sz = inOrigin.SplatZ() - inV0Z;
+
+	// Calculate u parameter and flip sign if determinant was negative
+	Vec4 u = Vec4::sXor(sx * px + sy * py + sz * pz, det_sign);
+
+	// Prepare to test v parameter
+	Vec4 qx = sy * e1z - sz * e1y;
+	Vec4 qy = sz * e1x - sx * e1z;
+	Vec4 qz = sx * e1y - sy * e1x;
+
+	// Calculate v parameter and flip sign if determinant was negative
+	Vec4 v = Vec4::sXor(dx * qx + dy * qy + dz * qz, det_sign);
+
+	// Get intersection point and flip sign if determinant was negative
+	Vec4 t = Vec4::sXor(e2x * qx + e2y * qy + e2z * qz, det_sign);
+
+	// Check if there is an intersection
+	UVec4 no_intersection =
+		UVec4::sOr
+		(
+			UVec4::sOr
+			(
+				UVec4::sOr
+				(
+					det_near_zero,
+					Vec4::sLess(u, zero)
+				),
+				UVec4::sOr
+				(
+					Vec4::sLess(v, zero),
+					Vec4::sGreater(u + v, det)
+				)
+			),
+			Vec4::sLess(t, zero)
+		);
+
+	// Select intersection point or FLT_MAX based on if there is an intersection or not
+	return Vec4::sSelect(t / det, Vec4::sReplicate(FLT_MAX), no_intersection);
+}
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/Sphere.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/Sphere.h
@@ -0,0 +1,72 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Jolt/Geometry/AABox.h>
+
+JPH_NAMESPACE_BEGIN
+
+class [[nodiscard]] Sphere
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+	inline				Sphere() = default;
+	inline				Sphere(const Float3 &inCenter, float inRadius)			: mCenter(inCenter), mRadius(inRadius) { }
+	inline				Sphere(Vec3Arg inCenter, float inRadius)				: mRadius(inRadius) { inCenter.StoreFloat3(&mCenter); }
+
+	/// Calculate the support vector for this convex shape.
+	inline Vec3			GetSupport(Vec3Arg inDirection) const
+	{
+		float length = inDirection.Length();
+		return length > 0.0f ? Vec3::sLoadFloat3Unsafe(mCenter) + (mRadius/ length) * inDirection : Vec3::sLoadFloat3Unsafe(mCenter);
+	}
+
+	// Properties
+	inline Vec3			GetCenter() const										{ return Vec3::sLoadFloat3Unsafe(mCenter); }
+	inline float		GetRadius() const										{ return mRadius; }
+
+	/// Test if two spheres overlap
+	inline bool			Overlaps(const Sphere &inB) const
+	{
+		return (Vec3::sLoadFloat3Unsafe(mCenter) - Vec3::sLoadFloat3Unsafe(inB.mCenter)).LengthSq() <= Square(mRadius + inB.mRadius);
+	}
+
+	/// Check if this sphere overlaps with a box
+	inline bool			Overlaps(const AABox &inOther) const
+	{
+		return inOther.GetSqDistanceTo(GetCenter()) <= Square(mRadius);
+	}
+
+	/// Create the minimal sphere that encapsulates this sphere and inPoint
+	inline void			EncapsulatePoint(Vec3Arg inPoint)
+	{
+		// Calculate distance between point and center
+		Vec3 center = GetCenter();
+		Vec3 d_vec = inPoint - center;
+		float d_sq = d_vec.LengthSq();
+		if (d_sq > Square(mRadius))
+		{
+			// It is further away than radius, we need to widen the sphere
+			// The diameter of the new sphere is radius + d, so the new radius is half of that
+			float d = sqrt(d_sq);
+			float radius = 0.5f * (mRadius + d);
+
+			// The center needs to shift by new radius - old radius in the direction of d
+			center += (radius - mRadius) / d * d_vec;
+
+			// Store new sphere
+			center.StoreFloat3(&mCenter);
+			mRadius = radius;
+		}
+	}
+
+private:
+	Float3				mCenter;
+	float				mRadius;
+};
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Geometry/Triangle.h
+++ b/thirdparty/jolt_physics/Jolt/Geometry/Triangle.h
@@ -0,0 +1,34 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+JPH_NAMESPACE_BEGIN
+
+/// A simple triangle and its material
+class Triangle
+{
+public:
+	JPH_OVERRIDE_NEW_DELETE
+
+	/// Constructor
+					Triangle() = default;
+					Triangle(const Float3 &inV1, const Float3 &inV2, const Float3 &inV3, uint32 inMaterialIndex = 0, uint32 inUserData = 0) : mV { inV1, inV2, inV3 }, mMaterialIndex(inMaterialIndex), mUserData(inUserData) { }
+					Triangle(Vec3Arg inV1, Vec3Arg inV2, Vec3Arg inV3, uint32 inMaterialIndex = 0, uint32 inUserData = 0) : mMaterialIndex(inMaterialIndex), mUserData(inUserData) { inV1.StoreFloat3(&mV[0]); inV2.StoreFloat3(&mV[1]); inV3.StoreFloat3(&mV[2]); }
+
+	/// Get center of triangle
+	Vec3			GetCentroid() const
+	{
+		return (Vec3::sLoadFloat3Unsafe(mV[0]) + Vec3::sLoadFloat3Unsafe(mV[1]) + Vec3::sLoadFloat3Unsafe(mV[2])) * (1.0f / 3.0f);
+	}
+
+	/// Vertices
+	Float3			mV[3];
+	uint32			mMaterialIndex = 0;			///< Follows mV[3] so that we can read mV as 4 vectors
+	uint32			mUserData = 0;				///< User data that can be used for anything by the application, e.g. for tracking the original index of the triangle
+};
+
+using TriangleList = Array<Triangle>;
+
+JPH_NAMESPACE_END
--- a/thirdparty/jolt_physics/Jolt/Jolt.h
+++ b/thirdparty/jolt_physics/Jolt/Jolt.h
@@ -0,0 +1,16 @@
+// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+// Project includes
+#include <Jolt/Core/Core.h>
+#include <Jolt/Core/ARMNeon.h>
+#include <Jolt/Core/Memory.h>
+#include <Jolt/Core/IssueReporting.h>
+#include <Jolt/Core/Array.h>
+#include <Jolt/Math/Math.h>
+#include <Jolt/Math/Vec4.h>
+#include <Jolt/Math/Mat44.h>
+#include <Jolt/Math/Real.h>
--- a/thirdparty/jolt_physics/Jolt/Jolt.natvis
+++ b/thirdparty/jolt_physics/Jolt/Jolt.natvis
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="utf-8"?>
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+	<Type Name="JPH::Color">
+		<DisplayString>r={(int)r}, g={(int)g}, b={(int)b}, a={(int)a}</DisplayString>
+	</Type>
+	<Type Name="JPH::Float2">
+		<DisplayString>{x}, {y}</DisplayString>
+	</Type>
+	<Type Name="JPH::Float3">
+		<DisplayString>{x}, {y}, {z}</DisplayString>
+	</Type>
+	<Type Name="JPH::Float4">
+		<DisplayString>{x}, {y}, {z}, {w}</DisplayString>
+	</Type>
+	<Type Name="JPH::Vec3">
+		<DisplayString>{mF32[0]}, {mF32[1]}, {mF32[2]}, L^2={mF32[0]*mF32[0]+mF32[1]*mF32[1]+mF32[2]*mF32[2]}</DisplayString>
+	</Type>
+	<Type Name="JPH::DVec3">
+		<DisplayString>{mF64[0]}, {mF64[1]}, {mF64[2]}, L^2={mF64[0]*mF64[0]+mF64[1]*mF64[1]+mF64[2]*mF64[2]}</DisplayString>
+	</Type>
+	<Type Name="JPH::Vec4">
+		<DisplayString>{mF32[0]}, {mF32[1]}, {mF32[2]}, {mF32[3]}, L^2={mF32[0]*mF32[0]+mF32[1]*mF32[1]+mF32[2]*mF32[2]+mF32[3]*mF32[3]}</DisplayString>
+	</Type>
+	<Type Name="JPH::UVec4">
+		<DisplayString>{mU32[0]}, {mU32[1]}, {mU32[2]}, {mU32[3]}</DisplayString>
+	</Type>
+	<Type Name="JPH::BVec16">
+		<DisplayString>{uint(mU8[0])}, {uint(mU8[1])}, {uint(mU8[2])}, {uint(mU8[3])}, {uint(mU8[4])}, {uint(mU8[5])}, {uint(mU8[6])}, {uint(mU8[7])}, {uint(mU8[8])}, {uint(mU8[9])}, {uint(mU8[10])}, {uint(mU8[11])}, {uint(mU8[12])}, {uint(mU8[13])}, {uint(mU8[14])}, {uint(mU8[15])}</DisplayString>
+	</Type>
+	<Type Name="JPH::Quat">
+		<DisplayString>{mValue}</DisplayString>
+	</Type>
+	<Type Name="JPH::Mat44">
+		<DisplayString>{mCol[0].mF32[0]}, {mCol[1].mF32[0]}, {mCol[2].mF32[0]}, {mCol[3].mF32[0]} | {mCol[0].mF32[1]}, {mCol[1].mF32[1]}, {mCol[2].mF32[1]}, {mCol[3].mF32[1]} | {mCol[0].mF32[2]}, {mCol[1].mF32[2]}, {mCol[2].mF32[2]}, {mCol[3].mF32[2]}</DisplayString>
+		<Expand>
+			<Synthetic Name="[Row 0]">
+				<DisplayString>{mCol[0].mF32[0]}, {mCol[1].mF32[0]}, {mCol[2].mF32[0]}, {mCol[3].mF32[0]}</DisplayString>
+			</Synthetic>
+			<Synthetic Name="[Row 1]">
+				<DisplayString>{mCol[0].mF32[1]}, {mCol[1].mF32[1]}, {mCol[2].mF32[1]}, {mCol[3].mF32[1]}</DisplayString>
+			</Synthetic>
+			<Synthetic Name="[Row 2]">
+				<DisplayString>{mCol[0].mF32[2]}, {mCol[1].mF32[2]}, {mCol[2].mF32[2]}, {mCol[3].mF32[2]}</DisplayString>
+			</Synthetic>
+			<Synthetic Name="[Row 3]">
+				<DisplayString>{mCol[0].mF32[3]}, {mCol[1].mF32[3]}, {mCol[2].mF32[3]}, {mCol[3].mF32[3]}</DisplayString>
+			</Synthetic>
+		</Expand>
+	</Type>
+	<Type Name="JPH::DMat44">
+		<DisplayString>{mCol[0].mF32[0]}, {mCol[1].mF32[0]}, {mCol[2].mF32[0]}, {mCol3.mF64[0]} | {mCol[0].mF32[1]}, {mCol[1].mF32[1]}, {mCol[2].mF32[1]}, {mCol3.mF64[1]} | {mCol[0].mF32[2]}, {mCol[1].mF32[2]}, {mCol[2].mF32[2]}, {mCol3.mF64[2]}</DisplayString>
+		<Expand>
+			<Synthetic Name="[Row 0]">
+				<DisplayString>{mCol[0].mF32[0]}, {mCol[1].mF32[0]}, {mCol[2].mF32[0]}, {mCol3.mF64[0]}</DisplayString>
+			</Synthetic>
+			<Synthetic Name="[Row 1]">
+				<DisplayString>{mCol[0].mF32[1]}, {mCol[1].mF32[1]}, {mCol[2].mF32[1]}, {mCol3.mF64[1]}</DisplayString>
+			</Synthetic>
+			<Synthetic Name="[Row 2]">
+				<DisplayString>{mCol[0].mF32[2]}, {mCol[1].mF32[2]}, {mCol[2].mF32[2]}, {mCol3.mF64[2]}</DisplayString>
+			</Synthetic>
+			<Synthetic Name="[Row 3]">
+				<DisplayString>{mCol[0].mF32[3]}, {mCol[1].mF32[3]}, {mCol[2].mF32[3]}, 1}</DisplayString>
+			</Synthetic>
+		</Expand>
+	</Type>
+	<Type Name="JPH::AABox">
+		<DisplayString>min=({mMin}), max=({mMax})</DisplayString>
+	</Type>
+	<Type Name="JPH::BodyID">
+		<DisplayString>idx={mID &amp; 0x007fffff}, seq={(mID >> 23) &amp; 0xff}, in_bp={mID >> 24,d}</DisplayString>
+	</Type>
+	<Type Name="JPH::Body">
+		<DisplayString>{mDebugName}: p=({mPosition.mF32[0],g}, {mPosition.mF32[1],g}, {mPosition.mF32[2],g}), r=({mRotation.mValue.mF32[0],g}, {mRotation.mValue.mF32[1],g}, {mRotation.mValue.mF32[2],g}, {mRotation.mValue.mF32[3],g}), v=({mLinearVelocity.mF32[0],g}, {mLinearVelocity.mF32[1],g}, {mLinearVelocity.mF32[2],g}), w=({mAngularVelocity.mF32[0],g}, {mAngularVelocity.mF32[1],g}, {mAngularVelocity.mF32[2],g})</DisplayString>
+	</Type>
+	<Type Name="JPH::BodyManager">
+		<DisplayString>bodies={mBodies._Mypair._Myval2._Mylast - mBodies._Mypair._Myval2._Myfirst}, active={mActiveBodies._Mypair._Myval2._Mylast - mActiveBodies._Mypair._Myval2._Myfirst}</DisplayString>
+	</Type>
+	<Type Name="JPH::StaticArray&lt;*&gt;">
+		<DisplayString>size={mSize}</DisplayString>
+		<Expand>
+			<Item Name="[size]" ExcludeView="simple">mSize</Item>
+			<ArrayItems>
+				<Size>mSize</Size>
+				<ValuePointer>(value_type *)mElements</ValuePointer>
+			</ArrayItems>
+		</Expand>
+	</Type>
+	<Type Name="JPH::Array&lt;*&gt;">
+		<DisplayString>size={mSize}</DisplayString>
+		<Expand>
+			<Item Name="[size]" ExcludeView="simple">mSize</Item>
+			<Item Name="[capacity]" ExcludeView="simple">mCapacity</Item>
+			<ArrayItems>
+				<Size>mSize</Size>
+				<ValuePointer>mElements</ValuePointer>
+			</ArrayItems>
+		</Expand>
+	</Type>
+	<Type Name="JPH::HashTable&lt;*&gt;">
+		<DisplayString>size={mSize}</DisplayString>
+		<Expand>
+			<Item Name="[size]" ExcludeView="simple">mSize</Item>
+			<Item Name="[bucket_count]" ExcludeView="simple">mMaxSize</Item>
+			<IndexListItems Condition="mData != nullptr">
+				<Size>mMaxSize</Size>
+				<ValueNode Condition="mControl[$i] &amp; 0x80">mData[$i]</ValueNode>
+				<ValueNode Condition="mControl[$i] == 0">"--Empty--"</ValueNode>
+				<ValueNode Condition="mControl[$i] == 0x7f">"--Deleted--"</ValueNode>
+			</IndexListItems>
+		</Expand>
+	</Type>
+	<Type Name="JPH::StridedPtr&lt;*&gt;">
+		<DisplayString>{(value_type *)mPtr}, stride={mStride}</DisplayString>
+	</Type>
+</AutoVisualizer>
--- a/Show More
+++ b/Show More