initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
21
thirdparty/meshoptimizer/LICENSE.md
vendored
Normal file
21
thirdparty/meshoptimizer/LICENSE.md
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2016-2025 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
8
thirdparty/meshoptimizer/allocator.cpp
vendored
Normal file
8
thirdparty/meshoptimizer/allocator.cpp
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*))
|
||||
{
|
||||
meshopt_Allocator::Storage::allocate = allocate;
|
||||
meshopt_Allocator::Storage::deallocate = deallocate;
|
||||
}
|
1696
thirdparty/meshoptimizer/clusterizer.cpp
vendored
Normal file
1696
thirdparty/meshoptimizer/clusterizer.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
126
thirdparty/meshoptimizer/indexanalyzer.cpp
vendored
Normal file
126
thirdparty/meshoptimizer/indexanalyzer.cpp
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(cache_size >= 3);
|
||||
assert(warp_size == 0 || warp_size >= 3);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_VertexCacheStatistics result = {};
|
||||
|
||||
unsigned int warp_offset = 0;
|
||||
unsigned int primgroup_offset = 0;
|
||||
|
||||
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int timestamp = cache_size + 1;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
bool ac = (timestamp - cache_timestamps[a]) > cache_size;
|
||||
bool bc = (timestamp - cache_timestamps[b]) > cache_size;
|
||||
bool cc = (timestamp - cache_timestamps[c]) > cache_size;
|
||||
|
||||
// flush cache if triangle doesn't fit into warp or into the primitive buffer
|
||||
if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
|
||||
{
|
||||
result.warps_executed += warp_offset > 0;
|
||||
|
||||
warp_offset = 0;
|
||||
primgroup_offset = 0;
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
}
|
||||
|
||||
// update cache and add vertices to warp
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
unsigned int index = indices[i + j];
|
||||
|
||||
if (timestamp - cache_timestamps[index] > cache_size)
|
||||
{
|
||||
cache_timestamps[index] = timestamp++;
|
||||
result.vertices_transformed++;
|
||||
warp_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
primgroup_offset++;
|
||||
}
|
||||
|
||||
size_t unique_vertex_count = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
unique_vertex_count += cache_timestamps[i] > 0;
|
||||
|
||||
result.warps_executed += warp_offset > 0;
|
||||
|
||||
result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
|
||||
result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_VertexFetchStatistics result = {};
|
||||
|
||||
unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(vertex_visited, 0, vertex_count);
|
||||
|
||||
const size_t kCacheLine = 64;
|
||||
const size_t kCacheSize = 128 * 1024;
|
||||
|
||||
// simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
|
||||
size_t cache[kCacheSize / kCacheLine] = {};
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
vertex_visited[index] = 1;
|
||||
|
||||
size_t start_address = index * vertex_size;
|
||||
size_t end_address = start_address + vertex_size;
|
||||
|
||||
size_t start_tag = start_address / kCacheLine;
|
||||
size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
|
||||
|
||||
assert(start_tag < end_tag);
|
||||
|
||||
for (size_t tag = start_tag; tag < end_tag; ++tag)
|
||||
{
|
||||
size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
|
||||
|
||||
// we store +1 since cache is filled with 0 by default
|
||||
result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
|
||||
cache[line] = tag + 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t unique_vertex_count = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
unique_vertex_count += vertex_visited[i];
|
||||
|
||||
result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
|
||||
|
||||
return result;
|
||||
}
|
688
thirdparty/meshoptimizer/indexcodec.cpp
vendored
Normal file
688
thirdparty/meshoptimizer/indexcodec.cpp
vendored
Normal file
@@ -0,0 +1,688 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
|
||||
// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
const unsigned char kIndexHeader = 0xe0;
|
||||
const unsigned char kSequenceHeader = 0xd0;
|
||||
|
||||
static int gEncodeIndexVersion = 1;
|
||||
const int kDecodeIndexVersion = 1;
|
||||
|
||||
typedef unsigned int VertexFifo[16];
|
||||
typedef unsigned int EdgeFifo[16][2];
|
||||
|
||||
static const unsigned int kTriangleIndexOrder[3][3] = {
|
||||
{0, 1, 2},
|
||||
{1, 2, 0},
|
||||
{2, 0, 1},
|
||||
};
|
||||
|
||||
static const unsigned char kCodeAuxEncodingTable[16] = {
|
||||
0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
|
||||
0, 0, // last two entries aren't used for encoding
|
||||
};
|
||||
|
||||
static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
|
||||
{
|
||||
(void)a;
|
||||
|
||||
return (b == next) ? 1 : (c == next ? 2 : 0);
|
||||
}
|
||||
|
||||
static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
size_t index = (offset - 1 - i) & 15;
|
||||
|
||||
unsigned int e0 = fifo[index][0];
|
||||
unsigned int e1 = fifo[index][1];
|
||||
|
||||
if (e0 == a && e1 == b)
|
||||
return (i << 2) | 0;
|
||||
if (e0 == b && e1 == c)
|
||||
return (i << 2) | 1;
|
||||
if (e0 == c && e1 == a)
|
||||
return (i << 2) | 2;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
|
||||
{
|
||||
fifo[offset][0] = a;
|
||||
fifo[offset][1] = b;
|
||||
offset = (offset + 1) & 15;
|
||||
}
|
||||
|
||||
static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
size_t index = (offset - 1 - i) & 15;
|
||||
|
||||
if (fifo[index] == v)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
|
||||
{
|
||||
fifo[offset] = v;
|
||||
offset = (offset + cond) & 15;
|
||||
}
|
||||
|
||||
static void encodeVByte(unsigned char*& data, unsigned int v)
|
||||
{
|
||||
// encode 32-bit value in up to 5 7-bit groups
|
||||
do
|
||||
{
|
||||
*data++ = (v & 127) | (v > 127 ? 128 : 0);
|
||||
v >>= 7;
|
||||
} while (v);
|
||||
}
|
||||
|
||||
static unsigned int decodeVByte(const unsigned char*& data)
|
||||
{
|
||||
unsigned char lead = *data++;
|
||||
|
||||
// fast path: single byte
|
||||
if (lead < 128)
|
||||
return lead;
|
||||
|
||||
// slow path: up to 4 extra bytes
|
||||
// note that this loop always terminates, which is important for malformed data
|
||||
unsigned int result = lead & 127;
|
||||
unsigned int shift = 7;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
unsigned char group = *data++;
|
||||
result |= unsigned(group & 127) << shift;
|
||||
shift += 7;
|
||||
|
||||
if (group < 128)
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last)
|
||||
{
|
||||
unsigned int d = index - last;
|
||||
unsigned int v = (d << 1) ^ (int(d) >> 31);
|
||||
|
||||
encodeVByte(data, v);
|
||||
}
|
||||
|
||||
static unsigned int decodeIndex(const unsigned char*& data, unsigned int last)
|
||||
{
|
||||
unsigned int v = decodeVByte(data);
|
||||
unsigned int d = (v >> 1) ^ -int(v & 1);
|
||||
|
||||
return last + d;
|
||||
}
|
||||
|
||||
static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
if (table[i] == v)
|
||||
return i;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
|
||||
{
|
||||
if (index_size == 2)
|
||||
{
|
||||
static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a);
|
||||
static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b);
|
||||
static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_cast<unsigned int*>(destination)[offset + 0] = a;
|
||||
static_cast<unsigned int*>(destination)[offset + 1] = b;
|
||||
static_cast<unsigned int*>(destination)[offset + 2] = c;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
|
||||
if (buffer_size < 1 + index_count / 3 + 16)
|
||||
return 0;
|
||||
|
||||
int version = gEncodeIndexVersion;
|
||||
|
||||
buffer[0] = (unsigned char)(kIndexHeader | version);
|
||||
|
||||
EdgeFifo edgefifo;
|
||||
memset(edgefifo, -1, sizeof(edgefifo));
|
||||
|
||||
VertexFifo vertexfifo;
|
||||
memset(vertexfifo, -1, sizeof(vertexfifo));
|
||||
|
||||
size_t edgefifooffset = 0;
|
||||
size_t vertexfifooffset = 0;
|
||||
|
||||
unsigned int next = 0;
|
||||
unsigned int last = 0;
|
||||
|
||||
unsigned char* code = buffer + 1;
|
||||
unsigned char* data = code + index_count / 3;
|
||||
unsigned char* data_safe_end = buffer + buffer_size - 16;
|
||||
|
||||
int fecmax = version >= 1 ? 13 : 15;
|
||||
|
||||
// use static encoding table; it's possible to pack the result and then build an optimal table and repack
|
||||
// for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
|
||||
const unsigned char* codeaux_table = kCodeAuxEncodingTable;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
// make sure we have enough space to write a triangle
|
||||
// each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
|
||||
// after this we can be sure we can write without extra bounds checks
|
||||
if (data > data_safe_end)
|
||||
return 0;
|
||||
|
||||
int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
|
||||
|
||||
if (fer >= 0 && (fer >> 2) < 15)
|
||||
{
|
||||
// note: getEdgeFifo implicitly rotates triangles by matching a/b to existing edge
|
||||
const unsigned int* order = kTriangleIndexOrder[fer & 3];
|
||||
|
||||
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
|
||||
|
||||
// encode edge index and vertex fifo index, next or free index
|
||||
int fe = fer >> 2;
|
||||
int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next ? (next++, 0) : 15);
|
||||
|
||||
if (fec == 15 && version >= 1)
|
||||
{
|
||||
// encode last-1 and last+1 to optimize strip-like sequences
|
||||
if (c + 1 == last)
|
||||
fec = 13, last = c;
|
||||
if (c == last + 1)
|
||||
fec = 14, last = c;
|
||||
}
|
||||
|
||||
*code++ = (unsigned char)((fe << 4) | fec);
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fec == 15)
|
||||
encodeIndex(data, c, last), last = c;
|
||||
|
||||
// we only need to push third vertex since first two are likely already in the vertex fifo
|
||||
if (fec == 0 || fec >= fecmax)
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
// we only need to push two new edges to edge fifo since the third one is already there
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
|
||||
const unsigned int* order = kTriangleIndexOrder[rotation];
|
||||
|
||||
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
|
||||
|
||||
// if a/b/c are 0/1/2, we emit a reset code
|
||||
bool reset = false;
|
||||
|
||||
if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1)
|
||||
{
|
||||
reset = true;
|
||||
next = 0;
|
||||
|
||||
// reset vertex fifo to make sure we don't accidentally reference vertices from that in the future
|
||||
// this makes sure next continues to get incremented instead of being stuck
|
||||
memset(vertexfifo, -1, sizeof(vertexfifo));
|
||||
}
|
||||
|
||||
int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
|
||||
int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
// after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
|
||||
// note: decoder implicitly assumes that if feb=fec=0, then fea=0 (reset code); this is enforced by rotation
|
||||
int fea = (a == next) ? (next++, 0) : 15;
|
||||
int feb = (fb >= 0 && fb < 14) ? fb + 1 : (b == next ? (next++, 0) : 15);
|
||||
int fec = (fc >= 0 && fc < 14) ? fc + 1 : (c == next ? (next++, 0) : 15);
|
||||
|
||||
// we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
|
||||
unsigned char codeaux = (unsigned char)((feb << 4) | fec);
|
||||
int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
|
||||
|
||||
// <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
|
||||
if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset)
|
||||
{
|
||||
*code++ = (unsigned char)((15 << 4) | codeauxindex);
|
||||
}
|
||||
else
|
||||
{
|
||||
*code++ = (unsigned char)((15 << 4) | 14 | fea);
|
||||
*data++ = codeaux;
|
||||
}
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fea == 15)
|
||||
encodeIndex(data, a, last), last = a;
|
||||
|
||||
if (feb == 15)
|
||||
encodeIndex(data, b, last), last = b;
|
||||
|
||||
if (fec == 15)
|
||||
encodeIndex(data, c, last), last = c;
|
||||
|
||||
// only push vertices that weren't already in fifo
|
||||
if (fea == 0 || fea == 15)
|
||||
pushVertexFifo(vertexfifo, a, vertexfifooffset);
|
||||
|
||||
if (feb == 0 || feb == 15)
|
||||
pushVertexFifo(vertexfifo, b, vertexfifooffset);
|
||||
|
||||
if (fec == 0 || fec == 15)
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
// all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
|
||||
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
}
|
||||
|
||||
// make sure we have enough space to write codeaux table
|
||||
if (data > data_safe_end)
|
||||
return 0;
|
||||
|
||||
// add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
|
||||
// we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
|
||||
// this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
|
||||
for (size_t i = 0; i < 16; ++i)
|
||||
{
|
||||
// decoder assumes that table entries never refer to separately encoded indices
|
||||
assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
|
||||
|
||||
*data++ = codeaux_table[i];
|
||||
}
|
||||
|
||||
// since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference
|
||||
assert(codeaux_table[0] == 0);
|
||||
|
||||
assert(data >= buffer + index_count / 3 + 16);
|
||||
assert(data <= buffer + buffer_size);
|
||||
|
||||
return data - buffer;
|
||||
}
|
||||
|
||||
size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
// compute number of bits required for each index
|
||||
unsigned int vertex_bits = 1;
|
||||
|
||||
while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
|
||||
vertex_bits++;
|
||||
|
||||
// worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
|
||||
unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
|
||||
|
||||
return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
|
||||
}
|
||||
|
||||
void meshopt_encodeIndexVersion(int version)
|
||||
{
|
||||
assert(unsigned(version) <= unsigned(meshopt::kDecodeIndexVersion));
|
||||
|
||||
meshopt::gEncodeIndexVersion = version;
|
||||
}
|
||||
|
||||
int meshopt_decodeIndexVersion(const unsigned char* buffer, size_t buffer_size)
|
||||
{
|
||||
if (buffer_size < 1)
|
||||
return -1;
|
||||
|
||||
unsigned char header = buffer[0];
|
||||
|
||||
if ((header & 0xf0) != meshopt::kIndexHeader && (header & 0xf0) != meshopt::kSequenceHeader)
|
||||
return -1;
|
||||
|
||||
int version = header & 0x0f;
|
||||
if (version > meshopt::kDecodeIndexVersion)
|
||||
return -1;
|
||||
|
||||
return version;
|
||||
}
|
||||
|
||||
int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(index_size == 2 || index_size == 4);
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
|
||||
if (buffer_size < 1 + index_count / 3 + 16)
|
||||
return -2;
|
||||
|
||||
if ((buffer[0] & 0xf0) != kIndexHeader)
|
||||
return -1;
|
||||
|
||||
int version = buffer[0] & 0x0f;
|
||||
if (version > kDecodeIndexVersion)
|
||||
return -1;
|
||||
|
||||
EdgeFifo edgefifo;
|
||||
memset(edgefifo, -1, sizeof(edgefifo));
|
||||
|
||||
VertexFifo vertexfifo;
|
||||
memset(vertexfifo, -1, sizeof(vertexfifo));
|
||||
|
||||
size_t edgefifooffset = 0;
|
||||
size_t vertexfifooffset = 0;
|
||||
|
||||
unsigned int next = 0;
|
||||
unsigned int last = 0;
|
||||
|
||||
int fecmax = version >= 1 ? 13 : 15;
|
||||
|
||||
// since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
|
||||
const unsigned char* code = buffer + 1;
|
||||
const unsigned char* data = code + index_count / 3;
|
||||
const unsigned char* data_safe_end = buffer + buffer_size - 16;
|
||||
|
||||
const unsigned char* codeaux_table = data_safe_end;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
// make sure we have enough data to read for a triangle
|
||||
// each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
|
||||
// after this we can be sure we can read without extra bounds checks
|
||||
if (data > data_safe_end)
|
||||
return -2;
|
||||
|
||||
unsigned char codetri = *code++;
|
||||
|
||||
if (codetri < 0xf0)
|
||||
{
|
||||
int fe = codetri >> 4;
|
||||
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
|
||||
unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
|
||||
unsigned int c = 0;
|
||||
|
||||
int fec = codetri & 15;
|
||||
|
||||
// note: this is the most common path in the entire decoder
|
||||
// inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
|
||||
if (fec < fecmax)
|
||||
{
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
|
||||
c = (fec == 0) ? next : cf;
|
||||
|
||||
int fec0 = fec == 0;
|
||||
next += fec0;
|
||||
|
||||
// push vertex fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// fec - (fec ^ 3) decodes 13, 14 into -1, 1
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
|
||||
|
||||
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
}
|
||||
|
||||
// push edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
|
||||
// output triangle
|
||||
writeTriangle(destination, i, index_size, a, b, c);
|
||||
}
|
||||
else
|
||||
{
|
||||
// fast path: read codeaux from the table
|
||||
if (codetri < 0xfe)
|
||||
{
|
||||
unsigned char codeaux = codeaux_table[codetri & 15];
|
||||
|
||||
// note: table can't contain feb/fec=15
|
||||
int feb = codeaux >> 4;
|
||||
int fec = codeaux & 15;
|
||||
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
|
||||
unsigned int a = next++;
|
||||
|
||||
unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
|
||||
unsigned int b = (feb == 0) ? next : bf;
|
||||
|
||||
int feb0 = feb == 0;
|
||||
next += feb0;
|
||||
|
||||
unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
|
||||
unsigned int c = (fec == 0) ? next : cf;
|
||||
|
||||
int fec0 = fec == 0;
|
||||
next += fec0;
|
||||
|
||||
// output triangle
|
||||
writeTriangle(destination, i, index_size, a, b, c);
|
||||
|
||||
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, a, vertexfifooffset);
|
||||
pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
|
||||
|
||||
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow path: read a full byte for codeaux instead of using a table lookup
|
||||
unsigned char codeaux = *data++;
|
||||
|
||||
int fea = codetri == 0xfe ? 0 : 15;
|
||||
int feb = codeaux >> 4;
|
||||
int fec = codeaux & 15;
|
||||
|
||||
// reset: codeaux is 0 but encoded as not-a-table
|
||||
if (codeaux == 0)
|
||||
next = 0;
|
||||
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
|
||||
unsigned int a = (fea == 0) ? next++ : 0;
|
||||
unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
|
||||
unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fea == 15)
|
||||
last = a = decodeIndex(data, last);
|
||||
|
||||
if (feb == 15)
|
||||
last = b = decodeIndex(data, last);
|
||||
|
||||
if (fec == 15)
|
||||
last = c = decodeIndex(data, last);
|
||||
|
||||
// output triangle
|
||||
writeTriangle(destination, i, index_size, a, b, c);
|
||||
|
||||
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, a, vertexfifooffset);
|
||||
pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
|
||||
|
||||
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we should've read all data bytes and stopped at the boundary between data and codeaux table
|
||||
if (data != data_safe_end)
|
||||
return -3;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
|
||||
if (buffer_size < 1 + index_count + 4)
|
||||
return 0;
|
||||
|
||||
int version = gEncodeIndexVersion;
|
||||
|
||||
buffer[0] = (unsigned char)(kSequenceHeader | version);
|
||||
|
||||
unsigned int last[2] = {};
|
||||
unsigned int current = 0;
|
||||
|
||||
unsigned char* data = buffer + 1;
|
||||
unsigned char* data_safe_end = buffer + buffer_size - 4;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
// make sure we have enough data to write
|
||||
// each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end
|
||||
// after this we can be sure we can write without extra bounds checks
|
||||
if (data >= data_safe_end)
|
||||
return 0;
|
||||
|
||||
unsigned int index = indices[i];
|
||||
|
||||
// this is a heuristic that switches between baselines when the delta grows too large
|
||||
// we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index
|
||||
// for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily
|
||||
int cd = int(index - last[current]);
|
||||
current ^= ((cd < 0 ? -cd : cd) >= 30);
|
||||
|
||||
// encode delta from the last index
|
||||
unsigned int d = index - last[current];
|
||||
unsigned int v = (d << 1) ^ (int(d) >> 31);
|
||||
|
||||
// note: low bit encodes the index of the last baseline which will be used for reconstruction
|
||||
encodeVByte(data, (v << 1) | current);
|
||||
|
||||
// update last for the next iteration that uses it
|
||||
last[current] = index;
|
||||
}
|
||||
|
||||
// make sure we have enough space to write tail
|
||||
if (data > data_safe_end)
|
||||
return 0;
|
||||
|
||||
for (int k = 0; k < 4; ++k)
|
||||
*data++ = 0;
|
||||
|
||||
return data - buffer;
|
||||
}
|
||||
|
||||
size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count)
|
||||
{
|
||||
// compute number of bits required for each index
|
||||
unsigned int vertex_bits = 1;
|
||||
|
||||
while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
|
||||
vertex_bits++;
|
||||
|
||||
// worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit
|
||||
unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7;
|
||||
|
||||
return 1 + index_count * vertex_groups + 4;
|
||||
}
|
||||
|
||||
int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
|
||||
if (buffer_size < 1 + index_count + 4)
|
||||
return -2;
|
||||
|
||||
if ((buffer[0] & 0xf0) != kSequenceHeader)
|
||||
return -1;
|
||||
|
||||
int version = buffer[0] & 0x0f;
|
||||
if (version > kDecodeIndexVersion)
|
||||
return -1;
|
||||
|
||||
const unsigned char* data = buffer + 1;
|
||||
const unsigned char* data_safe_end = buffer + buffer_size - 4;
|
||||
|
||||
unsigned int last[2] = {};
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
// make sure we have enough data to read
|
||||
// each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end
|
||||
// after this we can be sure we can read without extra bounds checks
|
||||
if (data >= data_safe_end)
|
||||
return -2;
|
||||
|
||||
unsigned int v = decodeVByte(data);
|
||||
|
||||
// decode the index of the last baseline
|
||||
unsigned int current = v & 1;
|
||||
v >>= 1;
|
||||
|
||||
// reconstruct index as a delta
|
||||
unsigned int d = (v >> 1) ^ -int(v & 1);
|
||||
unsigned int index = last[current] + d;
|
||||
|
||||
// update last for the next iteration that uses it
|
||||
last[current] = index;
|
||||
|
||||
if (index_size == 2)
|
||||
{
|
||||
static_cast<unsigned short*>(destination)[i] = (unsigned short)(index);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_cast<unsigned int*>(destination)[i] = index;
|
||||
}
|
||||
}
|
||||
|
||||
// we should've read all data bytes and stopped at the boundary between data and tail
|
||||
if (data != data_safe_end)
|
||||
return -3;
|
||||
|
||||
return 0;
|
||||
}
|
679
thirdparty/meshoptimizer/indexgenerator.cpp
vendored
Normal file
679
thirdparty/meshoptimizer/indexgenerator.cpp
vendored
Normal file
@@ -0,0 +1,679 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
|
||||
// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
|
||||
// John Hable. Variable Rate Shading with Visibility Buffer Rendering. 2024
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len)
|
||||
{
|
||||
// MurmurHash2
|
||||
const unsigned int m = 0x5bd1e995;
|
||||
const int r = 24;
|
||||
|
||||
while (len >= 4)
|
||||
{
|
||||
unsigned int k = *reinterpret_cast<const unsigned int*>(key);
|
||||
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
|
||||
h *= m;
|
||||
h ^= k;
|
||||
|
||||
key += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
struct VertexHasher
|
||||
{
|
||||
const unsigned char* vertices;
|
||||
size_t vertex_size;
|
||||
size_t vertex_stride;
|
||||
|
||||
size_t hash(unsigned int index) const
|
||||
{
|
||||
return hashUpdate4(0, vertices + index * vertex_stride, vertex_size);
|
||||
}
|
||||
|
||||
bool equal(unsigned int lhs, unsigned int rhs) const
|
||||
{
|
||||
return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct VertexStreamHasher
|
||||
{
|
||||
const meshopt_Stream* streams;
|
||||
size_t stream_count;
|
||||
|
||||
size_t hash(unsigned int index) const
|
||||
{
|
||||
unsigned int h = 0;
|
||||
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
const meshopt_Stream& s = streams[i];
|
||||
const unsigned char* data = static_cast<const unsigned char*>(s.data);
|
||||
|
||||
h = hashUpdate4(h, data + index * s.stride, s.size);
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
bool equal(unsigned int lhs, unsigned int rhs) const
|
||||
{
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
const meshopt_Stream& s = streams[i];
|
||||
const unsigned char* data = static_cast<const unsigned char*>(s.data);
|
||||
|
||||
if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct VertexCustomHasher
|
||||
{
|
||||
const float* vertex_positions;
|
||||
size_t vertex_stride_float;
|
||||
|
||||
int (*callback)(void*, unsigned int, unsigned int);
|
||||
void* context;
|
||||
|
||||
size_t hash(unsigned int index) const
|
||||
{
|
||||
const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float);
|
||||
|
||||
unsigned int x = key[0], y = key[1], z = key[2];
|
||||
|
||||
// replace negative zero with zero
|
||||
x = (x == 0x80000000) ? 0 : x;
|
||||
y = (y == 0x80000000) ? 0 : y;
|
||||
z = (z == 0x80000000) ? 0 : z;
|
||||
|
||||
// scramble bits to make sure that integer coordinates have entropy in lower bits
|
||||
x ^= x >> 17;
|
||||
y ^= y >> 17;
|
||||
z ^= z >> 17;
|
||||
|
||||
// Optimized Spatial Hashing for Collision Detection of Deformable Objects
|
||||
return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
|
||||
}
|
||||
|
||||
bool equal(unsigned int lhs, unsigned int rhs) const
|
||||
{
|
||||
const float* lp = vertex_positions + lhs * vertex_stride_float;
|
||||
const float* rp = vertex_positions + rhs * vertex_stride_float;
|
||||
|
||||
if (lp[0] != rp[0] || lp[1] != rp[1] || lp[2] != rp[2])
|
||||
return false;
|
||||
|
||||
return callback ? callback(context, lhs, rhs) : true;
|
||||
}
|
||||
};
|
||||
|
||||
struct EdgeHasher
|
||||
{
|
||||
const unsigned int* remap;
|
||||
|
||||
size_t hash(unsigned long long edge) const
|
||||
{
|
||||
unsigned int e0 = unsigned(edge >> 32);
|
||||
unsigned int e1 = unsigned(edge);
|
||||
|
||||
unsigned int h1 = remap[e0];
|
||||
unsigned int h2 = remap[e1];
|
||||
|
||||
const unsigned int m = 0x5bd1e995;
|
||||
|
||||
// MurmurHash64B finalizer
|
||||
h1 ^= h2 >> 18;
|
||||
h1 *= m;
|
||||
h2 ^= h1 >> 22;
|
||||
h2 *= m;
|
||||
h1 ^= h2 >> 17;
|
||||
h1 *= m;
|
||||
h2 ^= h1 >> 19;
|
||||
h2 *= m;
|
||||
|
||||
return h2;
|
||||
}
|
||||
|
||||
bool equal(unsigned long long lhs, unsigned long long rhs) const
|
||||
{
|
||||
unsigned int l0 = unsigned(lhs >> 32);
|
||||
unsigned int l1 = unsigned(lhs);
|
||||
|
||||
unsigned int r0 = unsigned(rhs >> 32);
|
||||
unsigned int r1 = unsigned(rhs);
|
||||
|
||||
return remap[l0] == remap[r0] && remap[l1] == remap[r1];
|
||||
}
|
||||
};
|
||||
|
||||
static size_t hashBuckets(size_t count)
|
||||
{
|
||||
size_t buckets = 1;
|
||||
while (buckets < count + count / 4)
|
||||
buckets *= 2;
|
||||
|
||||
return buckets;
|
||||
}
|
||||
|
||||
template <typename T, typename Hash>
|
||||
static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
|
||||
{
|
||||
assert(buckets > 0);
|
||||
assert((buckets & (buckets - 1)) == 0);
|
||||
|
||||
size_t hashmod = buckets - 1;
|
||||
size_t bucket = hash.hash(key) & hashmod;
|
||||
|
||||
for (size_t probe = 0; probe <= hashmod; ++probe)
|
||||
{
|
||||
T& item = table[bucket];
|
||||
|
||||
if (item == empty)
|
||||
return &item;
|
||||
|
||||
if (hash.equal(item, key))
|
||||
return &item;
|
||||
|
||||
// hash collision, quadratic probing
|
||||
bucket = (bucket + probe + 1) & hashmod;
|
||||
}
|
||||
|
||||
assert(false && "Hash table is full"); // unreachable
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
|
||||
{
|
||||
VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
|
||||
|
||||
size_t vertex_table_size = hashBuckets(vertex_count);
|
||||
unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size);
|
||||
memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
unsigned int index = unsigned(i);
|
||||
unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
*entry = index;
|
||||
|
||||
remap[index] = *entry;
|
||||
}
|
||||
|
||||
allocator.deallocate(vertex_table);
|
||||
}
|
||||
|
||||
template <typename Hash>
|
||||
static size_t generateVertexRemap(unsigned int* remap, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator)
|
||||
{
|
||||
memset(remap, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
size_t table_size = hashBuckets(vertex_count);
|
||||
unsigned int* table = allocator.allocate<unsigned int>(table_size);
|
||||
memset(table, -1, table_size * sizeof(unsigned int));
|
||||
|
||||
unsigned int next_vertex = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices ? indices[i] : unsigned(i);
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (remap[index] != ~0u)
|
||||
continue;
|
||||
|
||||
unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
{
|
||||
*entry = index;
|
||||
remap[index] = next_vertex++;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(remap[*entry] != ~0u);
|
||||
remap[index] = remap[*entry];
|
||||
}
|
||||
}
|
||||
|
||||
assert(next_vertex <= vertex_count);
|
||||
return next_vertex;
|
||||
}
|
||||
|
||||
template <size_t BlockSize>
|
||||
static void remapVertices(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
|
||||
{
|
||||
size_t block_size = BlockSize == 0 ? vertex_size : BlockSize;
|
||||
assert(block_size == vertex_size);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
if (remap[i] != ~0u)
|
||||
{
|
||||
assert(remap[i] < vertex_count);
|
||||
memcpy(static_cast<unsigned char*>(destination) + remap[i] * block_size, static_cast<const unsigned char*>(vertices) + i * block_size, block_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Hash>
|
||||
static void generateShadowBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator)
|
||||
{
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(remap, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
size_t table_size = hashBuckets(vertex_count);
|
||||
unsigned int* table = allocator.allocate<unsigned int>(table_size);
|
||||
memset(table, -1, table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (remap[index] == ~0u)
|
||||
{
|
||||
unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
*entry = index;
|
||||
|
||||
remap[index] = *entry;
|
||||
}
|
||||
|
||||
destination[i] = remap[index];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices || index_count == vertex_count);
|
||||
assert(!indices || index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size};
|
||||
|
||||
return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
|
||||
}
|
||||
|
||||
size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices || index_count == vertex_count);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(stream_count > 0 && stream_count <= 16);
|
||||
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
assert(streams[i].size > 0 && streams[i].size <= 256);
|
||||
assert(streams[i].size <= streams[i].stride);
|
||||
}
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
VertexStreamHasher hasher = {streams, stream_count};
|
||||
|
||||
return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
|
||||
}
|
||||
|
||||
size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices || index_count == vertex_count);
|
||||
assert(!indices || index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
VertexCustomHasher hasher = {vertex_positions, vertex_positions_stride / sizeof(float), callback, context};
|
||||
|
||||
return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
|
||||
}
|
||||
|
||||
void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// support in-place remap
|
||||
if (destination == vertices)
|
||||
{
|
||||
unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
|
||||
memcpy(vertices_copy, vertices, vertex_count * vertex_size);
|
||||
vertices = vertices_copy;
|
||||
}
|
||||
|
||||
// specialize the loop for common vertex sizes to ensure memcpy is compiled as an inlined intrinsic
|
||||
switch (vertex_size)
|
||||
{
|
||||
case 4:
|
||||
return remapVertices<4>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
case 8:
|
||||
return remapVertices<8>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
case 12:
|
||||
return remapVertices<12>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
case 16:
|
||||
return remapVertices<16>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
default:
|
||||
return remapVertices<0>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices ? indices[i] : unsigned(i);
|
||||
assert(remap[index] != ~0u);
|
||||
|
||||
destination[i] = remap[index];
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
assert(vertex_size <= vertex_stride);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride};
|
||||
|
||||
generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator);
|
||||
}
|
||||
|
||||
void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(stream_count > 0 && stream_count <= 16);
|
||||
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
assert(streams[i].size > 0 && streams[i].size <= 256);
|
||||
assert(streams[i].size <= streams[i].stride);
|
||||
}
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
VertexStreamHasher hasher = {streams, stream_count};
|
||||
|
||||
generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator);
|
||||
}
|
||||
|
||||
void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
static const int next[4] = {1, 2, 0, 1};
|
||||
|
||||
// build position remap: for each vertex, which other (canonical) vertex does it map to?
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
|
||||
|
||||
// build edge set; this stores all triangle edges but we can look these up by any other wedge
|
||||
EdgeHasher edge_hasher = {remap};
|
||||
|
||||
size_t edge_table_size = hashBuckets(index_count);
|
||||
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
|
||||
unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size);
|
||||
|
||||
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
|
||||
memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
unsigned int i2 = indices[i + next[e + 1]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
|
||||
|
||||
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
|
||||
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
if (*entry == ~0ull)
|
||||
{
|
||||
*entry = edge;
|
||||
|
||||
// store vertex opposite to the edge
|
||||
edge_vertex_table[entry - edge_table] = i2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build resulting index buffer: 6 indices for each input triangle
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int patch[6];
|
||||
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count);
|
||||
|
||||
// note: this refers to the opposite edge!
|
||||
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
|
||||
unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
patch[e * 2 + 0] = i0;
|
||||
patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
|
||||
}
|
||||
|
||||
memcpy(destination + i * 2, patch, sizeof(patch));
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
static const int next[3] = {1, 2, 0};
|
||||
|
||||
// build position remap: for each vertex, which other (canonical) vertex does it map to?
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
|
||||
|
||||
// build edge set; this stores all triangle edges but we can look these up by any other wedge
|
||||
EdgeHasher edge_hasher = {remap};
|
||||
|
||||
size_t edge_table_size = hashBuckets(index_count);
|
||||
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
|
||||
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count);
|
||||
|
||||
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
|
||||
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
if (*entry == ~0ull)
|
||||
*entry = edge;
|
||||
}
|
||||
}
|
||||
|
||||
// build resulting index buffer: 12 indices for each input triangle
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int patch[12];
|
||||
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count);
|
||||
|
||||
// note: this refers to the opposite edge!
|
||||
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
|
||||
unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
// use the same edge if opposite edge doesn't exist (border)
|
||||
oppe = (oppe == ~0ull) ? edge : oppe;
|
||||
|
||||
// triangle index (0, 1, 2)
|
||||
patch[e] = i0;
|
||||
|
||||
// opposite edge (3, 4; 5, 6; 7, 8)
|
||||
patch[3 + e * 2 + 0] = unsigned(oppe);
|
||||
patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
|
||||
|
||||
// dominant vertex (9, 10, 11)
|
||||
patch[9 + e] = remap[i0];
|
||||
}
|
||||
|
||||
memcpy(destination + i * 4, patch, sizeof(patch));
|
||||
}
|
||||
}
|
||||
|
||||
size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(remap, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
// compute vertex valence; this is used to prioritize least used corner
|
||||
// note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic
|
||||
unsigned char* valence = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(valence, 0, vertex_count);
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
valence[index]++;
|
||||
}
|
||||
|
||||
unsigned int reorder_offset = 0;
|
||||
|
||||
// assign provoking vertices; leave the rest for the next pass
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
// try to rotate triangle such that provoking vertex hasn't been seen before
|
||||
// if multiple vertices are new, prioritize the one with least valence
|
||||
// this reduces the risk that a future triangle will have all three vertices seen
|
||||
unsigned int va = remap[a] == ~0u ? valence[a] : ~0u;
|
||||
unsigned int vb = remap[b] == ~0u ? valence[b] : ~0u;
|
||||
unsigned int vc = remap[c] == ~0u ? valence[c] : ~0u;
|
||||
|
||||
if (vb != ~0u && vb <= va && vb <= vc)
|
||||
{
|
||||
// abc -> bca
|
||||
unsigned int t = a;
|
||||
a = b, b = c, c = t;
|
||||
}
|
||||
else if (vc != ~0u && vc <= va && vc <= vb)
|
||||
{
|
||||
// abc -> cab
|
||||
unsigned int t = c;
|
||||
c = b, b = a, a = t;
|
||||
}
|
||||
|
||||
unsigned int newidx = reorder_offset;
|
||||
|
||||
// now remap[a] = ~0u or all three vertices are old
|
||||
// recording remap[a] makes it possible to remap future references to the same index, conserving space
|
||||
if (remap[a] == ~0u)
|
||||
remap[a] = newidx;
|
||||
|
||||
// we need to clone the provoking vertex to get a unique index
|
||||
// if all three are used the choice is arbitrary since no future triangle will be able to reuse any of these
|
||||
reorder[reorder_offset++] = a;
|
||||
|
||||
// note: first vertex is final, the other two will be fixed up in next pass
|
||||
destination[i + 0] = newidx;
|
||||
destination[i + 1] = b;
|
||||
destination[i + 2] = c;
|
||||
|
||||
// update vertex valences for corner heuristic
|
||||
valence[a]--;
|
||||
valence[b]--;
|
||||
valence[c]--;
|
||||
}
|
||||
|
||||
// remap or clone non-provoking vertices (iterating to skip provoking vertices)
|
||||
int step = 1;
|
||||
|
||||
for (size_t i = 1; i < index_count; i += step, step ^= 3)
|
||||
{
|
||||
unsigned int index = destination[i];
|
||||
|
||||
if (remap[index] == ~0u)
|
||||
{
|
||||
// we haven't seen the vertex before as a provoking vertex
|
||||
// to maintain the reference to the original vertex we need to clone it
|
||||
unsigned int newidx = reorder_offset;
|
||||
|
||||
remap[index] = newidx;
|
||||
reorder[reorder_offset++] = index;
|
||||
}
|
||||
|
||||
destination[i] = remap[index];
|
||||
}
|
||||
|
||||
assert(reorder_offset <= vertex_count + index_count / 3);
|
||||
return reorder_offset;
|
||||
}
|
1349
thirdparty/meshoptimizer/meshoptimizer.h
vendored
Normal file
1349
thirdparty/meshoptimizer/meshoptimizer.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
333
thirdparty/meshoptimizer/overdrawoptimizer.cpp
vendored
Normal file
333
thirdparty/meshoptimizer/overdrawoptimizer.cpp
vendored
Normal file
@@ -0,0 +1,333 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
float mesh_centroid[3] = {};
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
const float* p = vertex_positions + vertex_stride_float * indices[i];
|
||||
|
||||
mesh_centroid[0] += p[0];
|
||||
mesh_centroid[1] += p[1];
|
||||
mesh_centroid[2] += p[2];
|
||||
}
|
||||
|
||||
mesh_centroid[0] /= index_count;
|
||||
mesh_centroid[1] /= index_count;
|
||||
mesh_centroid[2] /= index_count;
|
||||
|
||||
for (size_t cluster = 0; cluster < cluster_count; ++cluster)
|
||||
{
|
||||
size_t cluster_begin = clusters[cluster] * 3;
|
||||
size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
|
||||
assert(cluster_begin < cluster_end);
|
||||
|
||||
float cluster_area = 0;
|
||||
float cluster_centroid[3] = {};
|
||||
float cluster_normal[3] = {};
|
||||
|
||||
for (size_t i = cluster_begin; i < cluster_end; i += 3)
|
||||
{
|
||||
const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0];
|
||||
const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1];
|
||||
const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2];
|
||||
|
||||
float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
|
||||
float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
|
||||
|
||||
float normalx = p10[1] * p20[2] - p10[2] * p20[1];
|
||||
float normaly = p10[2] * p20[0] - p10[0] * p20[2];
|
||||
float normalz = p10[0] * p20[1] - p10[1] * p20[0];
|
||||
|
||||
float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
|
||||
|
||||
cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3);
|
||||
cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3);
|
||||
cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3);
|
||||
cluster_normal[0] += normalx;
|
||||
cluster_normal[1] += normaly;
|
||||
cluster_normal[2] += normalz;
|
||||
cluster_area += area;
|
||||
}
|
||||
|
||||
float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area;
|
||||
|
||||
cluster_centroid[0] *= inv_cluster_area;
|
||||
cluster_centroid[1] *= inv_cluster_area;
|
||||
cluster_centroid[2] *= inv_cluster_area;
|
||||
|
||||
float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]);
|
||||
float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length;
|
||||
|
||||
cluster_normal[0] *= inv_cluster_normal_length;
|
||||
cluster_normal[1] *= inv_cluster_normal_length;
|
||||
cluster_normal[2] *= inv_cluster_normal_length;
|
||||
|
||||
float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]};
|
||||
|
||||
sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2];
|
||||
}
|
||||
}
|
||||
|
||||
static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count)
|
||||
{
|
||||
// compute sort data bounds and renormalize, using fixed point snorm
|
||||
float sort_data_max = 1e-3f;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
float dpa = fabsf(sort_data[i]);
|
||||
|
||||
sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max;
|
||||
}
|
||||
|
||||
const int sort_bits = 11;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
// note that we flip distribution since high dot product should come first
|
||||
float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max);
|
||||
|
||||
sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1);
|
||||
}
|
||||
|
||||
// fill histogram for counting sort
|
||||
unsigned int histogram[1 << sort_bits];
|
||||
memset(histogram, 0, sizeof(histogram));
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
histogram[sort_keys[i]]++;
|
||||
}
|
||||
|
||||
// compute offsets based on histogram data
|
||||
size_t histogram_sum = 0;
|
||||
|
||||
for (size_t i = 0; i < 1 << sort_bits; ++i)
|
||||
{
|
||||
size_t count = histogram[i];
|
||||
histogram[i] = unsigned(histogram_sum);
|
||||
histogram_sum += count;
|
||||
}
|
||||
|
||||
assert(histogram_sum == cluster_count);
|
||||
|
||||
// compute sort order based on offsets
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
sort_order[histogram[sort_keys[i]]++] = unsigned(i);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
|
||||
{
|
||||
unsigned int cache_misses = 0;
|
||||
|
||||
// if vertex is not in cache, put it in cache
|
||||
if (timestamp - cache_timestamps[a] > cache_size)
|
||||
{
|
||||
cache_timestamps[a] = timestamp++;
|
||||
cache_misses++;
|
||||
}
|
||||
|
||||
if (timestamp - cache_timestamps[b] > cache_size)
|
||||
{
|
||||
cache_timestamps[b] = timestamp++;
|
||||
cache_misses++;
|
||||
}
|
||||
|
||||
if (timestamp - cache_timestamps[c] > cache_size)
|
||||
{
|
||||
cache_timestamps[c] = timestamp++;
|
||||
cache_misses++;
|
||||
}
|
||||
|
||||
return cache_misses;
|
||||
}
|
||||
|
||||
static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps)
|
||||
{
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int timestamp = cache_size + 1;
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
size_t result = 0;
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
|
||||
|
||||
// when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
|
||||
// that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
|
||||
// suggests an inefficiency in the vertex cache optimization algorithm
|
||||
// usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
|
||||
if (i == 0 || m == 3)
|
||||
{
|
||||
destination[result++] = unsigned(i);
|
||||
}
|
||||
}
|
||||
|
||||
assert(result <= index_count / 3);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps)
|
||||
{
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int timestamp = 0;
|
||||
|
||||
size_t result = 0;
|
||||
|
||||
for (size_t it = 0; it < cluster_count; ++it)
|
||||
{
|
||||
size_t start = clusters[it];
|
||||
size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3;
|
||||
assert(start < end);
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
|
||||
// measure cluster ACMR
|
||||
unsigned int cluster_misses = 0;
|
||||
|
||||
for (size_t i = start; i < end; ++i)
|
||||
{
|
||||
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
|
||||
|
||||
cluster_misses += m;
|
||||
}
|
||||
|
||||
float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
|
||||
|
||||
// first cluster always starts from the hard cluster boundary
|
||||
destination[result++] = unsigned(start);
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
|
||||
unsigned int running_misses = 0;
|
||||
unsigned int running_faces = 0;
|
||||
|
||||
for (size_t i = start; i < end; ++i)
|
||||
{
|
||||
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
|
||||
|
||||
running_misses += m;
|
||||
running_faces += 1;
|
||||
|
||||
if (float(running_misses) / float(running_faces) <= cluster_threshold)
|
||||
{
|
||||
// we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
|
||||
// note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
|
||||
// cluster is empty; however, the 'pop_back' after the loop will clean it up
|
||||
destination[result++] = unsigned(i + 1);
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
|
||||
running_misses = 0;
|
||||
running_faces = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// each time we reach the target ACMR we flush the cluster
|
||||
// this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
|
||||
// in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
|
||||
// thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
|
||||
// there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
|
||||
// to the cluster boundary array which we need to remove anyway - this code will do that automatically
|
||||
if (destination[result - 1] != start)
|
||||
{
|
||||
result--;
|
||||
}
|
||||
}
|
||||
|
||||
assert(result >= cluster_count);
|
||||
assert(result <= index_count / 3);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// guard for empty meshes
|
||||
if (index_count == 0 || vertex_count == 0)
|
||||
return;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
unsigned int cache_size = 16;
|
||||
|
||||
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
|
||||
|
||||
// generate hard boundaries from full-triangle cache misses
|
||||
unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / 3);
|
||||
size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps);
|
||||
|
||||
// generate soft boundaries
|
||||
unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / 3 + 1);
|
||||
size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps);
|
||||
|
||||
const unsigned int* clusters = soft_clusters;
|
||||
size_t cluster_count = soft_cluster_count;
|
||||
|
||||
// fill sort data
|
||||
float* sort_data = allocator.allocate<float>(cluster_count);
|
||||
calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
|
||||
|
||||
// sort clusters using sort data
|
||||
unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count);
|
||||
unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count);
|
||||
calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count);
|
||||
|
||||
// fill output buffer
|
||||
size_t offset = 0;
|
||||
|
||||
for (size_t it = 0; it < cluster_count; ++it)
|
||||
{
|
||||
unsigned int cluster = sort_order[it];
|
||||
assert(cluster < cluster_count);
|
||||
|
||||
size_t cluster_begin = clusters[cluster] * 3;
|
||||
size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
|
||||
assert(cluster_begin < cluster_end);
|
||||
|
||||
memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int));
|
||||
offset += cluster_end - cluster_begin;
|
||||
}
|
||||
|
||||
assert(offset == index_count);
|
||||
}
|
499
thirdparty/meshoptimizer/partition.cpp
vendored
Normal file
499
thirdparty/meshoptimizer/partition.cpp
vendored
Normal file
@@ -0,0 +1,499 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Takio Kurita. An efficient agglomerative clustering algorithm using a heap. 1991
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
struct ClusterAdjacency
|
||||
{
|
||||
unsigned int* offsets;
|
||||
unsigned int* clusters;
|
||||
unsigned int* shared;
|
||||
};
|
||||
|
||||
static void filterClusterIndices(unsigned int* data, unsigned int* offsets, const unsigned int* cluster_indices, const unsigned int* cluster_index_counts, size_t cluster_count, unsigned char* used, size_t vertex_count, size_t total_index_count)
|
||||
{
|
||||
(void)vertex_count;
|
||||
(void)total_index_count;
|
||||
|
||||
size_t cluster_start = 0;
|
||||
size_t cluster_write = 0;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
offsets[i] = unsigned(cluster_write);
|
||||
|
||||
// copy cluster indices, skipping duplicates
|
||||
for (size_t j = 0; j < cluster_index_counts[i]; ++j)
|
||||
{
|
||||
unsigned int v = cluster_indices[cluster_start + j];
|
||||
assert(v < vertex_count);
|
||||
|
||||
data[cluster_write] = v;
|
||||
cluster_write += 1 - used[v];
|
||||
used[v] = 1;
|
||||
}
|
||||
|
||||
// reset used flags for the next cluster
|
||||
for (size_t j = offsets[i]; j < cluster_write; ++j)
|
||||
used[data[j]] = 0;
|
||||
|
||||
cluster_start += cluster_index_counts[i];
|
||||
}
|
||||
|
||||
assert(cluster_start == total_index_count);
|
||||
assert(cluster_write <= total_index_count);
|
||||
offsets[cluster_count] = unsigned(cluster_write);
|
||||
}
|
||||
|
||||
static void computeClusterBounds(float* cluster_bounds, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, const float* vertex_positions, size_t vertex_positions_stride)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
float center[3] = {0, 0, 0};
|
||||
|
||||
// approximate center of the cluster by averaging all vertex positions
|
||||
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
|
||||
{
|
||||
const float* p = vertex_positions + cluster_indices[j] * vertex_stride_float;
|
||||
|
||||
center[0] += p[0];
|
||||
center[1] += p[1];
|
||||
center[2] += p[2];
|
||||
}
|
||||
|
||||
// note: technically clusters can't be empty per meshopt_partitionCluster but we check for a division by zero in case that changes
|
||||
if (size_t cluster_size = cluster_offsets[i + 1] - cluster_offsets[i])
|
||||
{
|
||||
center[0] /= float(cluster_size);
|
||||
center[1] /= float(cluster_size);
|
||||
center[2] /= float(cluster_size);
|
||||
}
|
||||
|
||||
// compute radius of the bounding sphere for each cluster
|
||||
float radiussq = 0;
|
||||
|
||||
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
|
||||
{
|
||||
const float* p = vertex_positions + cluster_indices[j] * vertex_stride_float;
|
||||
|
||||
float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
|
||||
|
||||
radiussq = radiussq < d2 ? d2 : radiussq;
|
||||
}
|
||||
|
||||
cluster_bounds[i * 4 + 0] = center[0];
|
||||
cluster_bounds[i * 4 + 1] = center[1];
|
||||
cluster_bounds[i * 4 + 2] = center[2];
|
||||
cluster_bounds[i * 4 + 3] = sqrtf(radiussq);
|
||||
}
|
||||
}
|
||||
|
||||
static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, size_t vertex_count, meshopt_Allocator& allocator)
|
||||
{
|
||||
unsigned int* ref_offsets = allocator.allocate<unsigned int>(vertex_count + 1);
|
||||
|
||||
// compute number of clusters referenced by each vertex
|
||||
memset(ref_offsets, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
|
||||
ref_offsets[cluster_indices[j]]++;
|
||||
}
|
||||
|
||||
// compute (worst-case) number of adjacent clusters for each cluster
|
||||
size_t total_adjacency = 0;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
// worst case is every vertex has a disjoint cluster list
|
||||
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
|
||||
count += ref_offsets[cluster_indices[j]] - 1;
|
||||
|
||||
// ... but only every other cluster can be adjacent in the end
|
||||
total_adjacency += count < cluster_count - 1 ? count : cluster_count - 1;
|
||||
}
|
||||
|
||||
// we can now allocate adjacency buffers
|
||||
adjacency.offsets = allocator.allocate<unsigned int>(cluster_count + 1);
|
||||
adjacency.clusters = allocator.allocate<unsigned int>(total_adjacency);
|
||||
adjacency.shared = allocator.allocate<unsigned int>(total_adjacency);
|
||||
|
||||
// convert ref counts to offsets
|
||||
size_t total_refs = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
size_t count = ref_offsets[i];
|
||||
ref_offsets[i] = unsigned(total_refs);
|
||||
total_refs += count;
|
||||
}
|
||||
|
||||
unsigned int* ref_data = allocator.allocate<unsigned int>(total_refs);
|
||||
|
||||
// fill cluster refs for each vertex
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
|
||||
ref_data[ref_offsets[cluster_indices[j]]++] = unsigned(i);
|
||||
}
|
||||
|
||||
// after the previous pass, ref_offsets contain the end of the data for each vertex; shift it forward to get the start
|
||||
memmove(ref_offsets + 1, ref_offsets, vertex_count * sizeof(unsigned int));
|
||||
ref_offsets[0] = 0;
|
||||
|
||||
// fill cluster adjacency for each cluster...
|
||||
adjacency.offsets[0] = 0;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
unsigned int* adj = adjacency.clusters + adjacency.offsets[i];
|
||||
unsigned int* shd = adjacency.shared + adjacency.offsets[i];
|
||||
size_t count = 0;
|
||||
|
||||
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
|
||||
{
|
||||
unsigned int v = cluster_indices[j];
|
||||
|
||||
// merge the entire cluster list of each vertex into current list
|
||||
for (size_t k = ref_offsets[v]; k < ref_offsets[v + 1]; ++k)
|
||||
{
|
||||
unsigned int c = ref_data[k];
|
||||
assert(c < cluster_count);
|
||||
|
||||
if (c == unsigned(i))
|
||||
continue;
|
||||
|
||||
// if the cluster is already in the list, increment the shared count
|
||||
bool found = false;
|
||||
for (size_t l = 0; l < count; ++l)
|
||||
if (adj[l] == c)
|
||||
{
|
||||
found = true;
|
||||
shd[l]++;
|
||||
break;
|
||||
}
|
||||
|
||||
// .. or append a new cluster
|
||||
if (!found)
|
||||
{
|
||||
adj[count] = c;
|
||||
shd[count] = 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mark the end of the adjacency list; the next cluster will start there as well
|
||||
adjacency.offsets[i + 1] = adjacency.offsets[i] + unsigned(count);
|
||||
}
|
||||
|
||||
assert(adjacency.offsets[cluster_count] <= total_adjacency);
|
||||
|
||||
// ref_offsets can't be deallocated as it was allocated before adjacency
|
||||
allocator.deallocate(ref_data);
|
||||
}
|
||||
|
||||
struct ClusterGroup
|
||||
{
|
||||
int group;
|
||||
int next;
|
||||
unsigned int size; // 0 unless root
|
||||
unsigned int vertices;
|
||||
};
|
||||
|
||||
struct GroupOrder
|
||||
{
|
||||
unsigned int id;
|
||||
int order;
|
||||
};
|
||||
|
||||
static void heapPush(GroupOrder* heap, size_t size, GroupOrder item)
|
||||
{
|
||||
// insert a new element at the end (breaks heap invariant)
|
||||
heap[size++] = item;
|
||||
|
||||
// bubble up the new element to its correct position
|
||||
size_t i = size - 1;
|
||||
while (i > 0 && heap[i].order < heap[(i - 1) / 2].order)
|
||||
{
|
||||
size_t p = (i - 1) / 2;
|
||||
|
||||
GroupOrder temp = heap[i];
|
||||
heap[i] = heap[p];
|
||||
heap[p] = temp;
|
||||
i = p;
|
||||
}
|
||||
}
|
||||
|
||||
static GroupOrder heapPop(GroupOrder* heap, size_t size)
|
||||
{
|
||||
assert(size > 0);
|
||||
GroupOrder top = heap[0];
|
||||
|
||||
// move the last element to the top (breaks heap invariant)
|
||||
heap[0] = heap[--size];
|
||||
|
||||
// bubble down the new top element to its correct position
|
||||
size_t i = 0;
|
||||
while (i * 2 + 1 < size)
|
||||
{
|
||||
// find the smallest child
|
||||
size_t j = i * 2 + 1;
|
||||
j += (j + 1 < size && heap[j + 1].order < heap[j].order);
|
||||
|
||||
// if the parent is already smaller than both children, we're done
|
||||
if (heap[j].order >= heap[i].order)
|
||||
break;
|
||||
|
||||
// otherwise, swap the parent and child and continue
|
||||
GroupOrder temp = heap[i];
|
||||
heap[i] = heap[j];
|
||||
heap[j] = temp;
|
||||
i = j;
|
||||
}
|
||||
|
||||
return top;
|
||||
}
|
||||
|
||||
static unsigned int countShared(const ClusterGroup* groups, int group1, int group2, const ClusterAdjacency& adjacency)
|
||||
{
|
||||
unsigned int total = 0;
|
||||
|
||||
for (int i1 = group1; i1 >= 0; i1 = groups[i1].next)
|
||||
for (int i2 = group2; i2 >= 0; i2 = groups[i2].next)
|
||||
{
|
||||
for (unsigned int adj = adjacency.offsets[i1]; adj < adjacency.offsets[i1 + 1]; ++adj)
|
||||
if (adjacency.clusters[adj] == unsigned(i2))
|
||||
{
|
||||
total += adjacency.shared[adj];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static void mergeBounds(float* target, const float* source)
|
||||
{
|
||||
float r1 = target[3], r2 = source[3];
|
||||
float dx = source[0] - target[0], dy = source[1] - target[1], dz = source[2] - target[2];
|
||||
float d = sqrtf(dx * dx + dy * dy + dz * dz);
|
||||
|
||||
if (d + r1 < r2)
|
||||
{
|
||||
memcpy(target, source, 4 * sizeof(float));
|
||||
return;
|
||||
}
|
||||
|
||||
if (d + r2 > r1)
|
||||
{
|
||||
float k = d > 0 ? (d + r2 - r1) / (2 * d) : 0.f;
|
||||
|
||||
target[0] += dx * k;
|
||||
target[1] += dy * k;
|
||||
target[2] += dz * k;
|
||||
target[3] = (d + r2 + r1) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
static float boundsScore(const float* target, const float* source)
|
||||
{
|
||||
float r1 = target[3], r2 = source[3];
|
||||
float dx = source[0] - target[0], dy = source[1] - target[1], dz = source[2] - target[2];
|
||||
float d = sqrtf(dx * dx + dy * dy + dz * dz);
|
||||
|
||||
float mr = d + r1 < r2 ? r2 : (d + r2 < r1 ? r1 : (d + r2 + r1) / 2);
|
||||
|
||||
return mr > 0 ? r1 / mr : 0.f;
|
||||
}
|
||||
|
||||
static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size, const float* cluster_bounds)
|
||||
{
|
||||
assert(groups[id].size > 0);
|
||||
|
||||
float group_rsqrt = 1.f / sqrtf(float(int(groups[id].vertices)));
|
||||
|
||||
int best_group = -1;
|
||||
float best_score = 0;
|
||||
|
||||
for (int ci = id; ci >= 0; ci = groups[ci].next)
|
||||
{
|
||||
for (unsigned int adj = adjacency.offsets[ci]; adj != adjacency.offsets[ci + 1]; ++adj)
|
||||
{
|
||||
int other = groups[adjacency.clusters[adj]].group;
|
||||
if (other < 0)
|
||||
continue;
|
||||
|
||||
assert(groups[other].size > 0);
|
||||
if (groups[id].size + groups[other].size > max_partition_size)
|
||||
continue;
|
||||
|
||||
unsigned int shared = countShared(groups, id, other, adjacency);
|
||||
float other_rsqrt = 1.f / sqrtf(float(int(groups[other].vertices)));
|
||||
|
||||
// normalize shared count by the expected boundary of each group (+ keeps scoring symmetric)
|
||||
float score = float(int(shared)) * (group_rsqrt + other_rsqrt);
|
||||
|
||||
// incorporate spatial score to favor merging nearby groups
|
||||
if (cluster_bounds)
|
||||
score *= 1.f + 0.4f * boundsScore(&cluster_bounds[id * 4], &cluster_bounds[other * 4]);
|
||||
|
||||
if (score > best_score)
|
||||
{
|
||||
best_group = other;
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return best_group;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert((vertex_positions == NULL || vertex_positions_stride >= 12) && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
assert(target_partition_size > 0);
|
||||
|
||||
size_t max_partition_size = target_partition_size + target_partition_size * 3 / 8;
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(used, 0, vertex_count);
|
||||
|
||||
unsigned int* cluster_newindices = allocator.allocate<unsigned int>(total_index_count);
|
||||
unsigned int* cluster_offsets = allocator.allocate<unsigned int>(cluster_count + 1);
|
||||
|
||||
// make new cluster index list that filters out duplicate indices
|
||||
filterClusterIndices(cluster_newindices, cluster_offsets, cluster_indices, cluster_index_counts, cluster_count, used, vertex_count, total_index_count);
|
||||
cluster_indices = cluster_newindices;
|
||||
|
||||
// compute bounding sphere for each cluster if positions are provided
|
||||
float* cluster_bounds = NULL;
|
||||
|
||||
if (vertex_positions)
|
||||
{
|
||||
cluster_bounds = allocator.allocate<float>(cluster_count * 4);
|
||||
computeClusterBounds(cluster_bounds, cluster_indices, cluster_offsets, cluster_count, vertex_positions, vertex_positions_stride);
|
||||
}
|
||||
|
||||
// build cluster adjacency along with edge weights (shared vertex count)
|
||||
ClusterAdjacency adjacency = {};
|
||||
buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, vertex_count, allocator);
|
||||
|
||||
ClusterGroup* groups = allocator.allocate<ClusterGroup>(cluster_count);
|
||||
|
||||
GroupOrder* order = allocator.allocate<GroupOrder>(cluster_count);
|
||||
size_t pending = 0;
|
||||
|
||||
// create a singleton group for each cluster and order them by priority
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
groups[i].group = int(i);
|
||||
groups[i].next = -1;
|
||||
groups[i].size = 1;
|
||||
groups[i].vertices = cluster_offsets[i + 1] - cluster_offsets[i];
|
||||
assert(groups[i].vertices > 0);
|
||||
|
||||
GroupOrder item = {};
|
||||
item.id = unsigned(i);
|
||||
item.order = groups[i].vertices;
|
||||
|
||||
heapPush(order, pending++, item);
|
||||
}
|
||||
|
||||
// iteratively merge the smallest group with the best group
|
||||
while (pending)
|
||||
{
|
||||
GroupOrder top = heapPop(order, pending--);
|
||||
|
||||
// this group was merged into another group earlier
|
||||
if (groups[top.id].size == 0)
|
||||
continue;
|
||||
|
||||
// disassociate clusters from the group to prevent them from being merged again; we will re-associate them if the group is reinserted
|
||||
for (int i = top.id; i >= 0; i = groups[i].next)
|
||||
{
|
||||
assert(groups[i].group == int(top.id));
|
||||
groups[i].group = -1;
|
||||
}
|
||||
|
||||
// the group is large enough, emit as is
|
||||
if (groups[top.id].size >= target_partition_size)
|
||||
continue;
|
||||
|
||||
int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size, cluster_bounds);
|
||||
|
||||
// we can't grow the group any more, emit as is
|
||||
if (best_group == -1)
|
||||
continue;
|
||||
|
||||
// compute shared vertices to adjust the total vertices estimate after merging
|
||||
unsigned int shared = countShared(groups, top.id, best_group, adjacency);
|
||||
|
||||
// combine groups by linking them together
|
||||
assert(groups[best_group].size > 0);
|
||||
|
||||
for (int i = top.id; i >= 0; i = groups[i].next)
|
||||
if (groups[i].next < 0)
|
||||
{
|
||||
groups[i].next = best_group;
|
||||
break;
|
||||
}
|
||||
|
||||
// update group sizes; note, the vertex update is a O(1) approximation which avoids recomputing the true size
|
||||
groups[top.id].size += groups[best_group].size;
|
||||
groups[top.id].vertices += groups[best_group].vertices;
|
||||
groups[top.id].vertices = (groups[top.id].vertices > shared) ? groups[top.id].vertices - shared : 1;
|
||||
|
||||
groups[best_group].size = 0;
|
||||
groups[best_group].vertices = 0;
|
||||
|
||||
// merge bounding spheres if bounds are available
|
||||
if (cluster_bounds)
|
||||
{
|
||||
mergeBounds(&cluster_bounds[top.id * 4], &cluster_bounds[best_group * 4]);
|
||||
memset(&cluster_bounds[best_group * 4], 0, 4 * sizeof(float));
|
||||
}
|
||||
|
||||
// re-associate all clusters back to the merged group
|
||||
for (int i = top.id; i >= 0; i = groups[i].next)
|
||||
groups[i].group = int(top.id);
|
||||
|
||||
top.order = groups[top.id].vertices;
|
||||
heapPush(order, pending++, top);
|
||||
}
|
||||
|
||||
size_t next_group = 0;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
if (groups[i].size == 0)
|
||||
continue;
|
||||
|
||||
for (int j = int(i); j >= 0; j = groups[j].next)
|
||||
destination[j] = unsigned(next_group);
|
||||
|
||||
next_group++;
|
||||
}
|
||||
|
||||
assert(next_group <= cluster_count);
|
||||
return next_group;
|
||||
}
|
76
thirdparty/meshoptimizer/quantization.cpp
vendored
Normal file
76
thirdparty/meshoptimizer/quantization.cpp
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
union FloatBits
|
||||
{
|
||||
float f;
|
||||
unsigned int ui;
|
||||
};
|
||||
|
||||
unsigned short meshopt_quantizeHalf(float v)
|
||||
{
|
||||
FloatBits u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
int s = (ui >> 16) & 0x8000;
|
||||
int em = ui & 0x7fffffff;
|
||||
|
||||
// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
|
||||
int h = (em - (112 << 23) + (1 << 12)) >> 13;
|
||||
|
||||
// underflow: flush to zero; 113 encodes exponent -14
|
||||
h = (em < (113 << 23)) ? 0 : h;
|
||||
|
||||
// overflow: infinity; 143 encodes exponent 16
|
||||
h = (em >= (143 << 23)) ? 0x7c00 : h;
|
||||
|
||||
// NaN; note that we convert all types of NaN to qNaN
|
||||
h = (em > (255 << 23)) ? 0x7e00 : h;
|
||||
|
||||
return (unsigned short)(s | h);
|
||||
}
|
||||
|
||||
float meshopt_quantizeFloat(float v, int N)
|
||||
{
|
||||
assert(N >= 0 && N <= 23);
|
||||
|
||||
FloatBits u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
const int mask = (1 << (23 - N)) - 1;
|
||||
const int round = (1 << (23 - N)) >> 1;
|
||||
|
||||
int e = ui & 0x7f800000;
|
||||
unsigned int rui = (ui + round) & ~mask;
|
||||
|
||||
// round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0
|
||||
ui = e == 0x7f800000 ? ui : rui;
|
||||
|
||||
// flush denormals to zero
|
||||
ui = e == 0 ? 0 : ui;
|
||||
|
||||
u.ui = ui;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
float meshopt_dequantizeHalf(unsigned short h)
|
||||
{
|
||||
unsigned int s = unsigned(h & 0x8000) << 16;
|
||||
int em = h & 0x7fff;
|
||||
|
||||
// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
|
||||
int r = (em + (112 << 10)) << 13;
|
||||
|
||||
// denormal: flush to zero
|
||||
r = (em < (1 << 10)) ? 0 : r;
|
||||
|
||||
// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
|
||||
// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
|
||||
r += (em >= (31 << 10)) ? (112 << 23) : 0;
|
||||
|
||||
FloatBits u;
|
||||
u.ui = s | r;
|
||||
return u.f;
|
||||
}
|
289
thirdparty/meshoptimizer/rasterizer.cpp
vendored
Normal file
289
thirdparty/meshoptimizer/rasterizer.cpp
vendored
Normal file
@@ -0,0 +1,289 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Nicolas Capens. Advanced Rasterization. 2004
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
const int kViewport = 256;
|
||||
|
||||
struct OverdrawBuffer
|
||||
{
|
||||
float z[kViewport][kViewport][2];
|
||||
unsigned int overdraw[kViewport][kViewport][2];
|
||||
};
|
||||
|
||||
static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
|
||||
{
|
||||
// z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
|
||||
// z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1)
|
||||
// (x2-x1 y2-y1)(dzdx) = (z2-z1)
|
||||
// (x3-x1 y3-y1)(dzdy) (z3-z1)
|
||||
// we'll solve it with Cramer's rule
|
||||
float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
|
||||
float invdet = (det == 0) ? 0 : 1 / det;
|
||||
|
||||
dzdx = ((z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1)) * invdet;
|
||||
dzdy = ((x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1)) * invdet;
|
||||
|
||||
return det;
|
||||
}
|
||||
|
||||
// half-space fixed point triangle rasterizer
|
||||
static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z)
|
||||
{
|
||||
// compute depth gradients
|
||||
float DZx, DZy;
|
||||
float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
|
||||
int sign = det > 0;
|
||||
|
||||
// flip backfacing triangles to simplify rasterization logic
|
||||
if (sign)
|
||||
{
|
||||
// flipping v2 & v3 preserves depth gradients since they're based on v1; only v1z is used below
|
||||
float t;
|
||||
t = v2x, v2x = v3x, v3x = t;
|
||||
t = v2y, v2y = v3y, v3y = t;
|
||||
|
||||
// flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below
|
||||
v1z = kViewport - v1z;
|
||||
DZx = -DZx;
|
||||
DZy = -DZy;
|
||||
}
|
||||
|
||||
// coordinates, 28.4 fixed point
|
||||
int X1 = int(16.0f * v1x + 0.5f);
|
||||
int X2 = int(16.0f * v2x + 0.5f);
|
||||
int X3 = int(16.0f * v3x + 0.5f);
|
||||
|
||||
int Y1 = int(16.0f * v1y + 0.5f);
|
||||
int Y2 = int(16.0f * v2y + 0.5f);
|
||||
int Y3 = int(16.0f * v3y + 0.5f);
|
||||
|
||||
// bounding rectangle, clipped against viewport
|
||||
// since we rasterize pixels with covered centers, min >0.5 should round up
|
||||
// as for max, due to top-left filling convention we will never rasterize right/bottom edges
|
||||
// so max >= 0.5 should round down for inclusive bounds, and up for exclusive (in our case)
|
||||
int minx = X1 < X2 ? X1 : X2;
|
||||
minx = minx < X3 ? minx : X3;
|
||||
minx = (minx + 7) >> 4;
|
||||
minx = minx < 0 ? 0 : minx;
|
||||
|
||||
int miny = Y1 < Y2 ? Y1 : Y2;
|
||||
miny = miny < Y3 ? miny : Y3;
|
||||
miny = (miny + 7) >> 4;
|
||||
miny = miny < 0 ? 0 : miny;
|
||||
|
||||
int maxx = X1 > X2 ? X1 : X2;
|
||||
maxx = maxx > X3 ? maxx : X3;
|
||||
maxx = (maxx + 7) >> 4;
|
||||
maxx = maxx > kViewport ? kViewport : maxx;
|
||||
|
||||
int maxy = Y1 > Y2 ? Y1 : Y2;
|
||||
maxy = maxy > Y3 ? maxy : Y3;
|
||||
maxy = (maxy + 7) >> 4;
|
||||
maxy = maxy > kViewport ? kViewport : maxy;
|
||||
|
||||
// deltas, 28.4 fixed point
|
||||
int DX12 = X1 - X2;
|
||||
int DX23 = X2 - X3;
|
||||
int DX31 = X3 - X1;
|
||||
|
||||
int DY12 = Y1 - Y2;
|
||||
int DY23 = Y2 - Y3;
|
||||
int DY31 = Y3 - Y1;
|
||||
|
||||
// fill convention correction
|
||||
int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0);
|
||||
int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0);
|
||||
int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0);
|
||||
|
||||
// half edge equations, 24.8 fixed point
|
||||
// note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers
|
||||
int FX = (minx << 4) + 8;
|
||||
int FY = (miny << 4) + 8;
|
||||
int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1;
|
||||
int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1;
|
||||
int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1;
|
||||
float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f);
|
||||
|
||||
for (int y = miny; y < maxy; y++)
|
||||
{
|
||||
int CX1 = CY1;
|
||||
int CX2 = CY2;
|
||||
int CX3 = CY3;
|
||||
float ZX = ZY;
|
||||
|
||||
for (int x = minx; x < maxx; x++)
|
||||
{
|
||||
// check if all CXn are non-negative
|
||||
if ((CX1 | CX2 | CX3) >= 0)
|
||||
{
|
||||
if (ZX >= buffer->z[y][x][sign])
|
||||
{
|
||||
buffer->z[y][x][sign] = ZX;
|
||||
buffer->overdraw[y][x][sign]++;
|
||||
}
|
||||
}
|
||||
|
||||
// signed left shift is UB for negative numbers so use unsigned-signed casts
|
||||
CX1 -= int(unsigned(DY12) << 4);
|
||||
CX2 -= int(unsigned(DY23) << 4);
|
||||
CX3 -= int(unsigned(DY31) << 4);
|
||||
ZX += DZx;
|
||||
}
|
||||
|
||||
// signed left shift is UB for negative numbers so use unsigned-signed casts
|
||||
CY1 += int(unsigned(DX12) << 4);
|
||||
CY2 += int(unsigned(DX23) << 4);
|
||||
CY3 += int(unsigned(DX31) << 4);
|
||||
ZY += DZy;
|
||||
}
|
||||
}
|
||||
|
||||
static float transformTriangles(float* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
|
||||
float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
const float* v = vertex_positions + i * vertex_stride_float;
|
||||
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
float vj = v[j];
|
||||
|
||||
minv[j] = minv[j] > vj ? vj : minv[j];
|
||||
maxv[j] = maxv[j] < vj ? vj : maxv[j];
|
||||
}
|
||||
}
|
||||
|
||||
float extent = 0.f;
|
||||
|
||||
extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
|
||||
extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
|
||||
extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
|
||||
|
||||
float scale = kViewport / extent;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
const float* v = vertex_positions + index * vertex_stride_float;
|
||||
|
||||
triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
|
||||
triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
|
||||
triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
|
||||
}
|
||||
|
||||
return extent;
|
||||
}
|
||||
|
||||
static void rasterizeTriangles(OverdrawBuffer* buffer, const float* triangles, size_t index_count, int axis)
|
||||
{
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
const float* vn0 = &triangles[3 * (i + 0)];
|
||||
const float* vn1 = &triangles[3 * (i + 1)];
|
||||
const float* vn2 = &triangles[3 * (i + 2)];
|
||||
|
||||
switch (axis)
|
||||
{
|
||||
case 0:
|
||||
rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
|
||||
break;
|
||||
case 1:
|
||||
rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
|
||||
break;
|
||||
case 2:
|
||||
rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_OverdrawStatistics result = {};
|
||||
|
||||
float* triangles = allocator.allocate<float>(index_count * 3);
|
||||
transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
|
||||
OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
|
||||
|
||||
for (int axis = 0; axis < 3; ++axis)
|
||||
{
|
||||
memset(buffer, 0, sizeof(OverdrawBuffer));
|
||||
rasterizeTriangles(buffer, triangles, index_count, axis);
|
||||
|
||||
for (int y = 0; y < kViewport; ++y)
|
||||
for (int x = 0; x < kViewport; ++x)
|
||||
for (int s = 0; s < 2; ++s)
|
||||
{
|
||||
unsigned int overdraw = buffer->overdraw[y][x][s];
|
||||
|
||||
result.pixels_covered += overdraw > 0;
|
||||
result.pixels_shaded += overdraw;
|
||||
}
|
||||
}
|
||||
|
||||
result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_CoverageStatistics result = {};
|
||||
|
||||
float* triangles = allocator.allocate<float>(index_count * 3);
|
||||
float extent = transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
|
||||
OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
|
||||
|
||||
for (int axis = 0; axis < 3; ++axis)
|
||||
{
|
||||
memset(buffer, 0, sizeof(OverdrawBuffer));
|
||||
rasterizeTriangles(buffer, triangles, index_count, axis);
|
||||
|
||||
unsigned int covered = 0;
|
||||
|
||||
for (int y = 0; y < kViewport; ++y)
|
||||
for (int x = 0; x < kViewport; ++x)
|
||||
covered += (buffer->overdraw[y][x][0] | buffer->overdraw[y][x][1]) > 0;
|
||||
|
||||
result.coverage[axis] = float(covered) / float(kViewport * kViewport);
|
||||
}
|
||||
|
||||
result.extent = extent;
|
||||
|
||||
return result;
|
||||
}
|
2401
thirdparty/meshoptimizer/simplifier.cpp
vendored
Normal file
2401
thirdparty/meshoptimizer/simplifier.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
340
thirdparty/meshoptimizer/spatialorder.cpp
vendored
Normal file
340
thirdparty/meshoptimizer/spatialorder.cpp
vendored
Normal file
@@ -0,0 +1,340 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Fabian Giesen. Decoding Morton codes. 2009
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
// "Insert" two 0 bits after each of the 20 low bits of x
|
||||
inline unsigned long long part1By2(unsigned long long x)
|
||||
{
|
||||
x &= 0x000fffffull; // x = ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- jihg fedc ba98 7654 3210
|
||||
x = (x ^ (x << 32)) & 0x000f00000000ffffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- ---- ---- ---- ---- fedc ba98 7654 3210
|
||||
x = (x ^ (x << 16)) & 0x000f0000ff0000ffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- fedc ba98 ---- ---- ---- ---- 7654 3210
|
||||
x = (x ^ (x << 8)) & 0x000f00f00f00f00full; // x = ---- ---- ---- jihg ---- ---- fedc ---- ---- ba98 ---- ---- 7654 ---- ---- 3210
|
||||
x = (x ^ (x << 4)) & 0x00c30c30c30c30c3ull; // x = ---- ---- ji-- --hg ---- fe-- --dc ---- ba-- --98 ---- 76-- --54 ---- 32-- --10
|
||||
x = (x ^ (x << 2)) & 0x0249249249249249ull; // x = ---- --j- -i-- h--g --f- -e-- d--c --b- -a-- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
|
||||
return x;
|
||||
}
|
||||
|
||||
static void computeOrder(unsigned long long* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, bool morton)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
|
||||
float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
const float* v = vertex_positions_data + i * vertex_stride_float;
|
||||
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
float vj = v[j];
|
||||
|
||||
minv[j] = minv[j] > vj ? vj : minv[j];
|
||||
maxv[j] = maxv[j] < vj ? vj : maxv[j];
|
||||
}
|
||||
}
|
||||
|
||||
float extent = 0.f;
|
||||
|
||||
extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
|
||||
extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
|
||||
extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
|
||||
|
||||
// rescale each axis to 16 bits to get 48-bit Morton codes
|
||||
float scale = extent == 0 ? 0.f : 65535.f / extent;
|
||||
|
||||
// generate Morton order based on the position inside a unit cube
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
const float* v = vertex_positions_data + i * vertex_stride_float;
|
||||
|
||||
int x = int((v[0] - minv[0]) * scale + 0.5f);
|
||||
int y = int((v[1] - minv[1]) * scale + 0.5f);
|
||||
int z = int((v[2] - minv[2]) * scale + 0.5f);
|
||||
|
||||
if (morton)
|
||||
result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
|
||||
else
|
||||
result[i] = ((unsigned long long)x << 0) | ((unsigned long long)y << 20) | ((unsigned long long)z << 40);
|
||||
}
|
||||
}
|
||||
|
||||
static void radixSort10(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count)
|
||||
{
|
||||
unsigned int hist[1024];
|
||||
memset(hist, 0, sizeof(hist));
|
||||
|
||||
// compute histogram (assume keys are 10-bit)
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
hist[keys[i]]++;
|
||||
|
||||
unsigned int sum = 0;
|
||||
|
||||
// replace histogram data with prefix histogram sums in-place
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
unsigned int h = hist[i];
|
||||
hist[i] = sum;
|
||||
sum += h;
|
||||
}
|
||||
|
||||
assert(sum == count);
|
||||
|
||||
// reorder values
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned int id = keys[source[i]];
|
||||
|
||||
destination[hist[id]++] = source[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void computeHistogram(unsigned int (&hist)[256][2], const unsigned short* data, size_t count)
|
||||
{
|
||||
memset(hist, 0, sizeof(hist));
|
||||
|
||||
// compute 2 8-bit histograms in parallel
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned long long id = data[i];
|
||||
|
||||
hist[(id >> 0) & 255][0]++;
|
||||
hist[(id >> 8) & 255][1]++;
|
||||
}
|
||||
|
||||
unsigned int sum0 = 0, sum1 = 0;
|
||||
|
||||
// replace histogram data with prefix histogram sums in-place
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
unsigned int h0 = hist[i][0], h1 = hist[i][1];
|
||||
|
||||
hist[i][0] = sum0;
|
||||
hist[i][1] = sum1;
|
||||
|
||||
sum0 += h0;
|
||||
sum1 += h1;
|
||||
}
|
||||
|
||||
assert(sum0 == count && sum1 == count);
|
||||
}
|
||||
|
||||
static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count, unsigned int (&hist)[256][2], int pass)
|
||||
{
|
||||
int bitoff = pass * 8;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned int id = unsigned(keys[source[i]] >> bitoff) & 255;
|
||||
|
||||
destination[hist[id][pass]++] = source[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void partitionPoints(unsigned int* target, const unsigned int* order, const unsigned char* sides, size_t split, size_t count)
|
||||
{
|
||||
size_t l = 0, r = split;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned char side = sides[order[i]];
|
||||
target[side ? r : l] = order[i];
|
||||
l += 1;
|
||||
l -= side;
|
||||
r += side;
|
||||
}
|
||||
|
||||
assert(l == split && r == count);
|
||||
}
|
||||
|
||||
static void splitPoints(unsigned int* destination, unsigned int* orderx, unsigned int* ordery, unsigned int* orderz, const unsigned long long* keys, size_t count, void* scratch, size_t cluster_size)
|
||||
{
|
||||
if (count <= cluster_size)
|
||||
{
|
||||
memcpy(destination, orderx, count * sizeof(unsigned int));
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int* axes[3] = {orderx, ordery, orderz};
|
||||
|
||||
int bestk = -1;
|
||||
unsigned int bestdim = 0;
|
||||
|
||||
for (int k = 0; k < 3; ++k)
|
||||
{
|
||||
const unsigned int mask = (1 << 20) - 1;
|
||||
unsigned int dim = (unsigned(keys[axes[k][count - 1]] >> (k * 20)) & mask) - (unsigned(keys[axes[k][0]] >> (k * 20)) & mask);
|
||||
|
||||
if (dim >= bestdim)
|
||||
{
|
||||
bestk = k;
|
||||
bestdim = dim;
|
||||
}
|
||||
}
|
||||
|
||||
assert(bestk >= 0);
|
||||
|
||||
// split roughly in half, with the left split always being aligned to cluster size
|
||||
size_t split = ((count / 2) + cluster_size - 1) / cluster_size * cluster_size;
|
||||
assert(split > 0 && split < count);
|
||||
|
||||
// mark sides of split for partitioning
|
||||
unsigned char* sides = static_cast<unsigned char*>(scratch) + count * sizeof(unsigned int);
|
||||
|
||||
for (size_t i = 0; i < split; ++i)
|
||||
sides[axes[bestk][i]] = 0;
|
||||
|
||||
for (size_t i = split; i < count; ++i)
|
||||
sides[axes[bestk][i]] = 1;
|
||||
|
||||
// partition all axes into two sides, maintaining order
|
||||
unsigned int* temp = static_cast<unsigned int*>(scratch);
|
||||
|
||||
for (int k = 0; k < 3; ++k)
|
||||
{
|
||||
if (k == bestk)
|
||||
continue;
|
||||
|
||||
unsigned int* axis = axes[k];
|
||||
memcpy(temp, axis, sizeof(unsigned int) * count);
|
||||
partitionPoints(axis, temp, sides, split, count);
|
||||
}
|
||||
|
||||
splitPoints(destination, orderx, ordery, orderz, keys, split, scratch, cluster_size);
|
||||
splitPoints(destination + split, orderx + split, ordery + split, orderz + split, keys, count - split, scratch, cluster_size);
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
unsigned long long* keys = allocator.allocate<unsigned long long>(vertex_count);
|
||||
computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ true);
|
||||
|
||||
unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count * 2); // 4b for order + 2b for keys
|
||||
unsigned short* keyk = (unsigned short*)(scratch + vertex_count);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
destination[i] = unsigned(i);
|
||||
|
||||
unsigned int* order[] = {scratch, destination};
|
||||
|
||||
// 5-pass radix sort computes the resulting order into scratch
|
||||
for (int k = 0; k < 5; ++k)
|
||||
{
|
||||
// copy 10-bit key segments into keyk to reduce cache pressure during radix pass
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
keyk[i] = (unsigned short)((keys[i] >> (k * 10)) & 1023);
|
||||
|
||||
radixSort10(order[k % 2], order[(k + 1) % 2], keyk, vertex_count);
|
||||
}
|
||||
|
||||
// since our remap table is mapping old=>new, we need to reverse it
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
destination[scratch[i]] = unsigned(i);
|
||||
}
|
||||
|
||||
void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
(void)vertex_count;
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
float* centroids = allocator.allocate<float>(face_count * 3);
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
const float* va = vertex_positions + a * vertex_stride_float;
|
||||
const float* vb = vertex_positions + b * vertex_stride_float;
|
||||
const float* vc = vertex_positions + c * vertex_stride_float;
|
||||
|
||||
centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f;
|
||||
centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f;
|
||||
centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f;
|
||||
}
|
||||
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(face_count);
|
||||
|
||||
meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3);
|
||||
|
||||
// support in-order remap
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
unsigned int r = remap[i];
|
||||
|
||||
destination[r * 3 + 0] = a;
|
||||
destination[r * 3 + 1] = b;
|
||||
destination[r * 3 + 2] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
assert(cluster_size > 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
unsigned long long* keys = allocator.allocate<unsigned long long>(vertex_count);
|
||||
computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ false);
|
||||
|
||||
unsigned int* order = allocator.allocate<unsigned int>(vertex_count * 3);
|
||||
unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count * 2); // 4b for order + 1b for side or 2b for keys
|
||||
unsigned short* keyk = reinterpret_cast<unsigned short*>(scratch + vertex_count);
|
||||
|
||||
for (int k = 0; k < 3; ++k)
|
||||
{
|
||||
// copy 16-bit key segments into keyk to reduce cache pressure during radix pass
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
keyk[i] = (unsigned short)(keys[i] >> (k * 20));
|
||||
|
||||
unsigned int hist[256][2];
|
||||
computeHistogram(hist, keyk, vertex_count);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
order[k * vertex_count + i] = unsigned(i);
|
||||
|
||||
radixPass(scratch, order + k * vertex_count, keyk, vertex_count, hist, 0);
|
||||
radixPass(order + k * vertex_count, scratch, keyk, vertex_count, hist, 1);
|
||||
}
|
||||
|
||||
splitPoints(destination, order, order + vertex_count, order + 2 * vertex_count, keys, vertex_count, scratch, cluster_size);
|
||||
}
|
296
thirdparty/meshoptimizer/stripifier.cpp
vendored
Normal file
296
thirdparty/meshoptimizer/stripifier.cpp
vendored
Normal file
@@ -0,0 +1,296 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned char* valence)
|
||||
{
|
||||
unsigned int index = 0;
|
||||
unsigned int iv = ~0u;
|
||||
|
||||
for (size_t i = 0; i < buffer_size; ++i)
|
||||
{
|
||||
unsigned char va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]];
|
||||
unsigned int v = (va < vb && va < vc) ? va : (vb < vc ? vb : vc);
|
||||
|
||||
if (v < iv)
|
||||
{
|
||||
index = unsigned(i);
|
||||
iv = v;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1)
|
||||
{
|
||||
for (size_t i = 0; i < buffer_size; ++i)
|
||||
{
|
||||
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
|
||||
|
||||
if (e0 == a && e1 == b)
|
||||
return (int(i) << 2) | 2;
|
||||
else if (e0 == b && e1 == c)
|
||||
return (int(i) << 2) | 0;
|
||||
else if (e0 == c && e1 == a)
|
||||
return (int(i) << 2) | 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index)
|
||||
{
|
||||
assert(destination != indices);
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
using namespace meshopt;
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
const size_t buffer_capacity = 8;
|
||||
|
||||
unsigned int buffer[buffer_capacity][3] = {};
|
||||
unsigned int buffer_size = 0;
|
||||
|
||||
size_t index_offset = 0;
|
||||
|
||||
unsigned int strip[2] = {};
|
||||
unsigned int parity = 0;
|
||||
|
||||
size_t strip_size = 0;
|
||||
|
||||
// compute vertex valence; this is used to prioritize starting triangle for strips
|
||||
// note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic
|
||||
unsigned char* valence = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(valence, 0, vertex_count);
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
valence[index]++;
|
||||
}
|
||||
|
||||
int next = -1;
|
||||
|
||||
while (buffer_size > 0 || index_offset < index_count)
|
||||
{
|
||||
assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3));
|
||||
|
||||
// fill triangle buffer
|
||||
while (buffer_size < buffer_capacity && index_offset < index_count)
|
||||
{
|
||||
buffer[buffer_size][0] = indices[index_offset + 0];
|
||||
buffer[buffer_size][1] = indices[index_offset + 1];
|
||||
buffer[buffer_size][2] = indices[index_offset + 2];
|
||||
|
||||
buffer_size++;
|
||||
index_offset += 3;
|
||||
}
|
||||
|
||||
assert(buffer_size > 0);
|
||||
|
||||
if (next >= 0)
|
||||
{
|
||||
unsigned int i = next >> 2;
|
||||
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
|
||||
unsigned int v = buffer[i][next & 3];
|
||||
|
||||
// ordered removal from the buffer
|
||||
memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
|
||||
buffer_size--;
|
||||
|
||||
// update vertex valences for strip start heuristic
|
||||
valence[a]--;
|
||||
valence[b]--;
|
||||
valence[c]--;
|
||||
|
||||
// find next triangle (note that edge order flips on every iteration)
|
||||
// in some cases we need to perform a swap to pick a different outgoing triangle edge
|
||||
// for [a b c], the default strip edge is [b c], but we might want to use [a c]
|
||||
int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]);
|
||||
int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1;
|
||||
|
||||
if (cont < 0 && swap >= 0)
|
||||
{
|
||||
// [a b c] => [a b a c]
|
||||
destination[strip_size++] = strip[0];
|
||||
destination[strip_size++] = v;
|
||||
|
||||
// next strip has same winding
|
||||
// ? a b => b a v
|
||||
strip[1] = v;
|
||||
|
||||
next = swap;
|
||||
}
|
||||
else
|
||||
{
|
||||
// emit the next vertex in the strip
|
||||
destination[strip_size++] = v;
|
||||
|
||||
// next strip has flipped winding
|
||||
strip[0] = strip[1];
|
||||
strip[1] = v;
|
||||
parity ^= 1;
|
||||
|
||||
next = cont;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// if we didn't find anything, we need to find the next new triangle
|
||||
// we use a heuristic to maximize the strip length
|
||||
unsigned int i = findStripFirst(buffer, buffer_size, valence);
|
||||
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
|
||||
|
||||
// ordered removal from the buffer
|
||||
memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
|
||||
buffer_size--;
|
||||
|
||||
// update vertex valences for strip start heuristic
|
||||
valence[a]--;
|
||||
valence[b]--;
|
||||
valence[c]--;
|
||||
|
||||
// we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration
|
||||
int ea = findStripNext(buffer, buffer_size, c, b);
|
||||
int eb = findStripNext(buffer, buffer_size, a, c);
|
||||
int ec = findStripNext(buffer, buffer_size, b, a);
|
||||
|
||||
// in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest
|
||||
// triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear
|
||||
// reasons - slightly improves the stripification efficiency
|
||||
int mine = INT_MAX;
|
||||
mine = (ea >= 0 && mine > ea) ? ea : mine;
|
||||
mine = (eb >= 0 && mine > eb) ? eb : mine;
|
||||
mine = (ec >= 0 && mine > ec) ? ec : mine;
|
||||
|
||||
if (ea == mine)
|
||||
{
|
||||
// keep abc
|
||||
next = ea;
|
||||
}
|
||||
else if (eb == mine)
|
||||
{
|
||||
// abc -> bca
|
||||
unsigned int t = a;
|
||||
a = b, b = c, c = t;
|
||||
|
||||
next = eb;
|
||||
}
|
||||
else if (ec == mine)
|
||||
{
|
||||
// abc -> cab
|
||||
unsigned int t = c;
|
||||
c = b, b = a, a = t;
|
||||
|
||||
next = ec;
|
||||
}
|
||||
|
||||
if (restart_index)
|
||||
{
|
||||
if (strip_size)
|
||||
destination[strip_size++] = restart_index;
|
||||
|
||||
destination[strip_size++] = a;
|
||||
destination[strip_size++] = b;
|
||||
destination[strip_size++] = c;
|
||||
|
||||
// new strip always starts with the same edge winding
|
||||
strip[0] = b;
|
||||
strip[1] = c;
|
||||
parity = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strip_size)
|
||||
{
|
||||
// connect last strip using degenerate triangles
|
||||
destination[strip_size++] = strip[1];
|
||||
destination[strip_size++] = a;
|
||||
}
|
||||
|
||||
// note that we may need to flip the emitted triangle based on parity
|
||||
// we always end up with outgoing edge "cb" in the end
|
||||
unsigned int e0 = parity ? c : b;
|
||||
unsigned int e1 = parity ? b : c;
|
||||
|
||||
destination[strip_size++] = a;
|
||||
destination[strip_size++] = e0;
|
||||
destination[strip_size++] = e1;
|
||||
|
||||
strip[0] = e0;
|
||||
strip[1] = e1;
|
||||
parity ^= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strip_size;
|
||||
}
|
||||
|
||||
size_t meshopt_stripifyBound(size_t index_count)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
// worst case without restarts is 2 degenerate indices and 3 indices per triangle
|
||||
// worst case with restarts is 1 restart index and 3 indices per triangle
|
||||
return (index_count / 3) * 5;
|
||||
}
|
||||
|
||||
size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index)
|
||||
{
|
||||
assert(destination != indices);
|
||||
|
||||
size_t offset = 0;
|
||||
size_t start = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
if (restart_index && indices[i] == restart_index)
|
||||
{
|
||||
start = i + 1;
|
||||
}
|
||||
else if (i - start >= 2)
|
||||
{
|
||||
unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i];
|
||||
|
||||
// flip winding for odd triangles
|
||||
if ((i - start) & 1)
|
||||
{
|
||||
unsigned int t = a;
|
||||
a = b, b = t;
|
||||
}
|
||||
|
||||
// although we use restart indices, strip swaps still produce degenerate triangles, so skip them
|
||||
if (a != b && a != c && b != c)
|
||||
{
|
||||
destination[offset + 0] = a;
|
||||
destination[offset + 1] = b;
|
||||
destination[offset + 2] = c;
|
||||
offset += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t meshopt_unstripifyBound(size_t index_count)
|
||||
{
|
||||
assert(index_count == 0 || index_count >= 3);
|
||||
|
||||
return (index_count == 0) ? 0 : (index_count - 2) * 3;
|
||||
}
|
467
thirdparty/meshoptimizer/vcacheoptimizer.cpp
vendored
Normal file
467
thirdparty/meshoptimizer/vcacheoptimizer.cpp
vendored
Normal file
@@ -0,0 +1,467 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
|
||||
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
const size_t kCacheSizeMax = 16;
|
||||
const size_t kValenceMax = 8;
|
||||
|
||||
struct VertexScoreTable
|
||||
{
|
||||
float cache[1 + kCacheSizeMax];
|
||||
float live[1 + kValenceMax];
|
||||
};
|
||||
|
||||
// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
|
||||
static const VertexScoreTable kVertexScoreTable = {
|
||||
{0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
|
||||
{0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
|
||||
};
|
||||
|
||||
// Tuned to minimize the encoded index buffer size
|
||||
static const VertexScoreTable kVertexScoreTableStrip = {
|
||||
{0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
|
||||
{0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
|
||||
};
|
||||
|
||||
struct TriangleAdjacency
|
||||
{
|
||||
unsigned int* counts;
|
||||
unsigned int* offsets;
|
||||
unsigned int* data;
|
||||
};
|
||||
|
||||
static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
|
||||
{
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
// allocate arrays
|
||||
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
|
||||
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
|
||||
adjacency.data = allocator.allocate<unsigned int>(index_count);
|
||||
|
||||
// fill triangle counts
|
||||
memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
assert(indices[i] < vertex_count);
|
||||
|
||||
adjacency.counts[indices[i]]++;
|
||||
}
|
||||
|
||||
// fill offset table
|
||||
unsigned int offset = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
adjacency.offsets[i] = offset;
|
||||
offset += adjacency.counts[i];
|
||||
}
|
||||
|
||||
assert(offset == index_count);
|
||||
|
||||
// fill triangle data
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
|
||||
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
|
||||
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
|
||||
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
|
||||
}
|
||||
|
||||
// fix offsets that have been disturbed by the previous pass
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
assert(adjacency.offsets[i] >= adjacency.counts[i]);
|
||||
|
||||
adjacency.offsets[i] -= adjacency.counts[i];
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
|
||||
{
|
||||
// check dead-end stack
|
||||
while (dead_end_top)
|
||||
{
|
||||
unsigned int vertex = dead_end[--dead_end_top];
|
||||
|
||||
if (live_triangles[vertex] > 0)
|
||||
return vertex;
|
||||
}
|
||||
|
||||
// input order
|
||||
while (input_cursor < vertex_count)
|
||||
{
|
||||
if (live_triangles[input_cursor] > 0)
|
||||
return input_cursor;
|
||||
|
||||
++input_cursor;
|
||||
}
|
||||
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
static unsigned int getNextVertexNeighbor(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
|
||||
{
|
||||
unsigned int best_candidate = ~0u;
|
||||
int best_priority = -1;
|
||||
|
||||
for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
|
||||
{
|
||||
unsigned int vertex = *next_candidate;
|
||||
|
||||
// otherwise we don't need to process it
|
||||
if (live_triangles[vertex] > 0)
|
||||
{
|
||||
int priority = 0;
|
||||
|
||||
// will it be in cache after fanning?
|
||||
if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
|
||||
{
|
||||
priority = timestamp - cache_timestamps[vertex]; // position in cache
|
||||
}
|
||||
|
||||
if (priority > best_priority)
|
||||
{
|
||||
best_candidate = vertex;
|
||||
best_priority = priority;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return best_candidate;
|
||||
}
|
||||
|
||||
static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
|
||||
{
|
||||
assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
|
||||
|
||||
unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
|
||||
|
||||
return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
|
||||
}
|
||||
|
||||
static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
|
||||
{
|
||||
// input order
|
||||
while (input_cursor < face_count)
|
||||
{
|
||||
if (!emitted_flags[input_cursor])
|
||||
return input_cursor;
|
||||
|
||||
++input_cursor;
|
||||
}
|
||||
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// guard for empty meshes
|
||||
if (index_count == 0 || vertex_count == 0)
|
||||
return;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
unsigned int cache_size = 16;
|
||||
assert(cache_size <= kCacheSizeMax);
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
// build adjacency information
|
||||
TriangleAdjacency adjacency = {};
|
||||
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
|
||||
|
||||
// live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match
|
||||
unsigned int* live_triangles = adjacency.counts;
|
||||
|
||||
// emitted flags
|
||||
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
|
||||
memset(emitted_flags, 0, face_count);
|
||||
|
||||
// compute initial vertex scores
|
||||
float* vertex_scores = allocator.allocate<float>(vertex_count);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
|
||||
|
||||
// compute triangle scores
|
||||
float* triangle_scores = allocator.allocate<float>(face_count);
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0];
|
||||
unsigned int b = indices[i * 3 + 1];
|
||||
unsigned int c = indices[i * 3 + 2];
|
||||
|
||||
triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
|
||||
}
|
||||
|
||||
unsigned int cache_holder[2 * (kCacheSizeMax + 4)];
|
||||
unsigned int* cache = cache_holder;
|
||||
unsigned int* cache_new = cache_holder + kCacheSizeMax + 4;
|
||||
size_t cache_count = 0;
|
||||
|
||||
unsigned int current_triangle = 0;
|
||||
unsigned int input_cursor = 1;
|
||||
|
||||
unsigned int output_triangle = 0;
|
||||
|
||||
while (current_triangle != ~0u)
|
||||
{
|
||||
assert(output_triangle < face_count);
|
||||
|
||||
unsigned int a = indices[current_triangle * 3 + 0];
|
||||
unsigned int b = indices[current_triangle * 3 + 1];
|
||||
unsigned int c = indices[current_triangle * 3 + 2];
|
||||
|
||||
// output indices
|
||||
destination[output_triangle * 3 + 0] = a;
|
||||
destination[output_triangle * 3 + 1] = b;
|
||||
destination[output_triangle * 3 + 2] = c;
|
||||
output_triangle++;
|
||||
|
||||
// update emitted flags
|
||||
emitted_flags[current_triangle] = true;
|
||||
triangle_scores[current_triangle] = 0;
|
||||
|
||||
// new triangle
|
||||
size_t cache_write = 0;
|
||||
cache_new[cache_write++] = a;
|
||||
cache_new[cache_write++] = b;
|
||||
cache_new[cache_write++] = c;
|
||||
|
||||
// old triangles
|
||||
for (size_t i = 0; i < cache_count; ++i)
|
||||
{
|
||||
unsigned int index = cache[i];
|
||||
|
||||
cache_new[cache_write] = index;
|
||||
cache_write += (index != a) & (index != b) & (index != c);
|
||||
}
|
||||
|
||||
unsigned int* cache_temp = cache;
|
||||
cache = cache_new, cache_new = cache_temp;
|
||||
cache_count = cache_write > cache_size ? cache_size : cache_write;
|
||||
|
||||
// remove emitted triangle from adjacency data
|
||||
// this makes sure that we spend less time traversing these lists on subsequent iterations
|
||||
// live triangle counts are updated as a byproduct of these adjustments
|
||||
for (size_t k = 0; k < 3; ++k)
|
||||
{
|
||||
unsigned int index = indices[current_triangle * 3 + k];
|
||||
|
||||
unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
|
||||
size_t neighbors_size = adjacency.counts[index];
|
||||
|
||||
for (size_t i = 0; i < neighbors_size; ++i)
|
||||
{
|
||||
unsigned int tri = neighbors[i];
|
||||
|
||||
if (tri == current_triangle)
|
||||
{
|
||||
neighbors[i] = neighbors[neighbors_size - 1];
|
||||
adjacency.counts[index]--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int best_triangle = ~0u;
|
||||
float best_score = 0;
|
||||
|
||||
// update cache positions, vertex scores and triangle scores, and find next best triangle
|
||||
for (size_t i = 0; i < cache_write; ++i)
|
||||
{
|
||||
unsigned int index = cache[i];
|
||||
|
||||
// no need to update scores if we are never going to use this vertex
|
||||
if (adjacency.counts[index] == 0)
|
||||
continue;
|
||||
|
||||
int cache_position = i >= cache_size ? -1 : int(i);
|
||||
|
||||
// update vertex score
|
||||
float score = vertexScore(table, cache_position, live_triangles[index]);
|
||||
float score_diff = score - vertex_scores[index];
|
||||
|
||||
vertex_scores[index] = score;
|
||||
|
||||
// update scores of vertex triangles
|
||||
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[index];
|
||||
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[index];
|
||||
|
||||
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
|
||||
{
|
||||
unsigned int tri = *it;
|
||||
assert(!emitted_flags[tri]);
|
||||
|
||||
float tri_score = triangle_scores[tri] + score_diff;
|
||||
assert(tri_score > 0);
|
||||
|
||||
best_triangle = best_score < tri_score ? tri : best_triangle;
|
||||
best_score = best_score < tri_score ? tri_score : best_score;
|
||||
|
||||
triangle_scores[tri] = tri_score;
|
||||
}
|
||||
}
|
||||
|
||||
// step through input triangles in order if we hit a dead-end
|
||||
current_triangle = best_triangle;
|
||||
|
||||
if (current_triangle == ~0u)
|
||||
{
|
||||
current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
|
||||
}
|
||||
}
|
||||
|
||||
assert(input_cursor == face_count);
|
||||
assert(output_triangle == face_count);
|
||||
}
|
||||
|
||||
void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
|
||||
}
|
||||
|
||||
void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
|
||||
}
|
||||
|
||||
void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(cache_size >= 3);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// guard for empty meshes
|
||||
if (index_count == 0 || vertex_count == 0)
|
||||
return;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
// build adjacency information
|
||||
TriangleAdjacency adjacency = {};
|
||||
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
|
||||
|
||||
// live triangle counts
|
||||
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
|
||||
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
|
||||
|
||||
// cache time stamps
|
||||
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
// dead-end stack
|
||||
unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
|
||||
unsigned int dead_end_top = 0;
|
||||
|
||||
// emitted flags
|
||||
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
|
||||
memset(emitted_flags, 0, face_count);
|
||||
|
||||
unsigned int current_vertex = 0;
|
||||
|
||||
unsigned int timestamp = cache_size + 1;
|
||||
unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
|
||||
|
||||
unsigned int output_triangle = 0;
|
||||
|
||||
while (current_vertex != ~0u)
|
||||
{
|
||||
const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
|
||||
|
||||
// emit all vertex neighbors
|
||||
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
|
||||
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[current_vertex];
|
||||
|
||||
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
|
||||
{
|
||||
unsigned int triangle = *it;
|
||||
|
||||
if (!emitted_flags[triangle])
|
||||
{
|
||||
unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
|
||||
|
||||
// output indices
|
||||
destination[output_triangle * 3 + 0] = a;
|
||||
destination[output_triangle * 3 + 1] = b;
|
||||
destination[output_triangle * 3 + 2] = c;
|
||||
output_triangle++;
|
||||
|
||||
// update dead-end stack
|
||||
dead_end[dead_end_top + 0] = a;
|
||||
dead_end[dead_end_top + 1] = b;
|
||||
dead_end[dead_end_top + 2] = c;
|
||||
dead_end_top += 3;
|
||||
|
||||
// update live triangle counts
|
||||
live_triangles[a]--;
|
||||
live_triangles[b]--;
|
||||
live_triangles[c]--;
|
||||
|
||||
// update cache info
|
||||
// if vertex is not in cache, put it in cache
|
||||
if (timestamp - cache_timestamps[a] > cache_size)
|
||||
cache_timestamps[a] = timestamp++;
|
||||
|
||||
if (timestamp - cache_timestamps[b] > cache_size)
|
||||
cache_timestamps[b] = timestamp++;
|
||||
|
||||
if (timestamp - cache_timestamps[c] > cache_size)
|
||||
cache_timestamps[c] = timestamp++;
|
||||
|
||||
// update emitted flags
|
||||
emitted_flags[triangle] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// next candidates are the ones we pushed to dead-end stack just now
|
||||
const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
|
||||
|
||||
// get next vertex
|
||||
current_vertex = getNextVertexNeighbor(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
|
||||
|
||||
if (current_vertex == ~0u)
|
||||
{
|
||||
current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
|
||||
}
|
||||
}
|
||||
|
||||
assert(output_triangle == face_count);
|
||||
}
|
1910
thirdparty/meshoptimizer/vertexcodec.cpp
vendored
Normal file
1910
thirdparty/meshoptimizer/vertexcodec.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1047
thirdparty/meshoptimizer/vertexfilter.cpp
vendored
Normal file
1047
thirdparty/meshoptimizer/vertexfilter.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
74
thirdparty/meshoptimizer/vfetchoptimizer.cpp
vendored
Normal file
74
thirdparty/meshoptimizer/vfetchoptimizer.cpp
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
memset(destination, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int next_vertex = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (destination[index] == ~0u)
|
||||
{
|
||||
destination[index] = next_vertex++;
|
||||
}
|
||||
}
|
||||
|
||||
assert(next_vertex <= vertex_count);
|
||||
|
||||
return next_vertex;
|
||||
}
|
||||
|
||||
size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == vertices)
|
||||
{
|
||||
unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
|
||||
memcpy(vertices_copy, vertices, vertex_count * vertex_size);
|
||||
vertices = vertices_copy;
|
||||
}
|
||||
|
||||
// build vertex remap table
|
||||
unsigned int* vertex_remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(vertex_remap, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int next_vertex = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
unsigned int& remap = vertex_remap[index];
|
||||
|
||||
if (remap == ~0u) // vertex was not added to destination VB
|
||||
{
|
||||
// add vertex
|
||||
memcpy(static_cast<unsigned char*>(destination) + next_vertex * vertex_size, static_cast<const unsigned char*>(vertices) + index * vertex_size, vertex_size);
|
||||
|
||||
remap = next_vertex++;
|
||||
}
|
||||
|
||||
// modify indices in place
|
||||
indices[i] = remap;
|
||||
}
|
||||
|
||||
assert(next_vertex <= vertex_count);
|
||||
|
||||
return next_vertex;
|
||||
}
|
Reference in New Issue
Block a user