initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

21
thirdparty/meshoptimizer/LICENSE.md vendored Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2016-2025 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,8 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*))
{
meshopt_Allocator::Storage::allocate = allocate;
meshopt_Allocator::Storage::deallocate = deallocate;
}

1696
thirdparty/meshoptimizer/clusterizer.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <string.h>
meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
{
assert(index_count % 3 == 0);
assert(cache_size >= 3);
assert(warp_size == 0 || warp_size >= 3);
meshopt_Allocator allocator;
meshopt_VertexCacheStatistics result = {};
unsigned int warp_offset = 0;
unsigned int primgroup_offset = 0;
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
unsigned int timestamp = cache_size + 1;
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
bool ac = (timestamp - cache_timestamps[a]) > cache_size;
bool bc = (timestamp - cache_timestamps[b]) > cache_size;
bool cc = (timestamp - cache_timestamps[c]) > cache_size;
// flush cache if triangle doesn't fit into warp or into the primitive buffer
if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
{
result.warps_executed += warp_offset > 0;
warp_offset = 0;
primgroup_offset = 0;
// reset cache
timestamp += cache_size + 1;
}
// update cache and add vertices to warp
for (int j = 0; j < 3; ++j)
{
unsigned int index = indices[i + j];
if (timestamp - cache_timestamps[index] > cache_size)
{
cache_timestamps[index] = timestamp++;
result.vertices_transformed++;
warp_offset++;
}
}
primgroup_offset++;
}
size_t unique_vertex_count = 0;
for (size_t i = 0; i < vertex_count; ++i)
unique_vertex_count += cache_timestamps[i] > 0;
result.warps_executed += warp_offset > 0;
result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count);
return result;
}
meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
{
assert(index_count % 3 == 0);
assert(vertex_size > 0 && vertex_size <= 256);
meshopt_Allocator allocator;
meshopt_VertexFetchStatistics result = {};
unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
memset(vertex_visited, 0, vertex_count);
const size_t kCacheLine = 64;
const size_t kCacheSize = 128 * 1024;
// simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
size_t cache[kCacheSize / kCacheLine] = {};
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);
vertex_visited[index] = 1;
size_t start_address = index * vertex_size;
size_t end_address = start_address + vertex_size;
size_t start_tag = start_address / kCacheLine;
size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
assert(start_tag < end_tag);
for (size_t tag = start_tag; tag < end_tag; ++tag)
{
size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
// we store +1 since cache is filled with 0 by default
result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
cache[line] = tag + 1;
}
}
size_t unique_vertex_count = 0;
for (size_t i = 0; i < vertex_count; ++i)
unique_vertex_count += vertex_visited[i];
result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
return result;
}

688
thirdparty/meshoptimizer/indexcodec.cpp vendored Normal file
View File

@@ -0,0 +1,688 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <string.h>
// This work is based on:
// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
namespace meshopt
{
const unsigned char kIndexHeader = 0xe0;
const unsigned char kSequenceHeader = 0xd0;
static int gEncodeIndexVersion = 1;
const int kDecodeIndexVersion = 1;
typedef unsigned int VertexFifo[16];
typedef unsigned int EdgeFifo[16][2];
static const unsigned int kTriangleIndexOrder[3][3] = {
{0, 1, 2},
{1, 2, 0},
{2, 0, 1},
};
static const unsigned char kCodeAuxEncodingTable[16] = {
0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
0, 0, // last two entries aren't used for encoding
};
static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
{
(void)a;
return (b == next) ? 1 : (c == next ? 2 : 0);
}
static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
{
for (int i = 0; i < 16; ++i)
{
size_t index = (offset - 1 - i) & 15;
unsigned int e0 = fifo[index][0];
unsigned int e1 = fifo[index][1];
if (e0 == a && e1 == b)
return (i << 2) | 0;
if (e0 == b && e1 == c)
return (i << 2) | 1;
if (e0 == c && e1 == a)
return (i << 2) | 2;
}
return -1;
}
static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
{
fifo[offset][0] = a;
fifo[offset][1] = b;
offset = (offset + 1) & 15;
}
static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
{
for (int i = 0; i < 16; ++i)
{
size_t index = (offset - 1 - i) & 15;
if (fifo[index] == v)
return i;
}
return -1;
}
static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
{
fifo[offset] = v;
offset = (offset + cond) & 15;
}
static void encodeVByte(unsigned char*& data, unsigned int v)
{
// encode 32-bit value in up to 5 7-bit groups
do
{
*data++ = (v & 127) | (v > 127 ? 128 : 0);
v >>= 7;
} while (v);
}
static unsigned int decodeVByte(const unsigned char*& data)
{
unsigned char lead = *data++;
// fast path: single byte
if (lead < 128)
return lead;
// slow path: up to 4 extra bytes
// note that this loop always terminates, which is important for malformed data
unsigned int result = lead & 127;
unsigned int shift = 7;
for (int i = 0; i < 4; ++i)
{
unsigned char group = *data++;
result |= unsigned(group & 127) << shift;
shift += 7;
if (group < 128)
break;
}
return result;
}
static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last)
{
unsigned int d = index - last;
unsigned int v = (d << 1) ^ (int(d) >> 31);
encodeVByte(data, v);
}
static unsigned int decodeIndex(const unsigned char*& data, unsigned int last)
{
unsigned int v = decodeVByte(data);
unsigned int d = (v >> 1) ^ -int(v & 1);
return last + d;
}
static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
{
for (int i = 0; i < 16; ++i)
if (table[i] == v)
return i;
return -1;
}
static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
{
if (index_size == 2)
{
static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a);
static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b);
static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c);
}
else
{
static_cast<unsigned int*>(destination)[offset + 0] = a;
static_cast<unsigned int*>(destination)[offset + 1] = b;
static_cast<unsigned int*>(destination)[offset + 2] = c;
}
}
} // namespace meshopt
size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
{
using namespace meshopt;
assert(index_count % 3 == 0);
// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
if (buffer_size < 1 + index_count / 3 + 16)
return 0;
int version = gEncodeIndexVersion;
buffer[0] = (unsigned char)(kIndexHeader | version);
EdgeFifo edgefifo;
memset(edgefifo, -1, sizeof(edgefifo));
VertexFifo vertexfifo;
memset(vertexfifo, -1, sizeof(vertexfifo));
size_t edgefifooffset = 0;
size_t vertexfifooffset = 0;
unsigned int next = 0;
unsigned int last = 0;
unsigned char* code = buffer + 1;
unsigned char* data = code + index_count / 3;
unsigned char* data_safe_end = buffer + buffer_size - 16;
int fecmax = version >= 1 ? 13 : 15;
// use static encoding table; it's possible to pack the result and then build an optimal table and repack
// for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
const unsigned char* codeaux_table = kCodeAuxEncodingTable;
for (size_t i = 0; i < index_count; i += 3)
{
// make sure we have enough space to write a triangle
// each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
// after this we can be sure we can write without extra bounds checks
if (data > data_safe_end)
return 0;
int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
if (fer >= 0 && (fer >> 2) < 15)
{
// note: getEdgeFifo implicitly rotates triangles by matching a/b to existing edge
const unsigned int* order = kTriangleIndexOrder[fer & 3];
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
// encode edge index and vertex fifo index, next or free index
int fe = fer >> 2;
int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next ? (next++, 0) : 15);
if (fec == 15 && version >= 1)
{
// encode last-1 and last+1 to optimize strip-like sequences
if (c + 1 == last)
fec = 13, last = c;
if (c == last + 1)
fec = 14, last = c;
}
*code++ = (unsigned char)((fe << 4) | fec);
// note that we need to update the last index since free indices are delta-encoded
if (fec == 15)
encodeIndex(data, c, last), last = c;
// we only need to push third vertex since first two are likely already in the vertex fifo
if (fec == 0 || fec >= fecmax)
pushVertexFifo(vertexfifo, c, vertexfifooffset);
// we only need to push two new edges to edge fifo since the third one is already there
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
}
else
{
int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
const unsigned int* order = kTriangleIndexOrder[rotation];
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
// if a/b/c are 0/1/2, we emit a reset code
bool reset = false;
if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1)
{
reset = true;
next = 0;
// reset vertex fifo to make sure we don't accidentally reference vertices from that in the future
// this makes sure next continues to get incremented instead of being stuck
memset(vertexfifo, -1, sizeof(vertexfifo));
}
int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
// after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
// note: decoder implicitly assumes that if feb=fec=0, then fea=0 (reset code); this is enforced by rotation
int fea = (a == next) ? (next++, 0) : 15;
int feb = (fb >= 0 && fb < 14) ? fb + 1 : (b == next ? (next++, 0) : 15);
int fec = (fc >= 0 && fc < 14) ? fc + 1 : (c == next ? (next++, 0) : 15);
// we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
unsigned char codeaux = (unsigned char)((feb << 4) | fec);
int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
// <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset)
{
*code++ = (unsigned char)((15 << 4) | codeauxindex);
}
else
{
*code++ = (unsigned char)((15 << 4) | 14 | fea);
*data++ = codeaux;
}
// note that we need to update the last index since free indices are delta-encoded
if (fea == 15)
encodeIndex(data, a, last), last = a;
if (feb == 15)
encodeIndex(data, b, last), last = b;
if (fec == 15)
encodeIndex(data, c, last), last = c;
// only push vertices that weren't already in fifo
if (fea == 0 || fea == 15)
pushVertexFifo(vertexfifo, a, vertexfifooffset);
if (feb == 0 || feb == 15)
pushVertexFifo(vertexfifo, b, vertexfifooffset);
if (fec == 0 || fec == 15)
pushVertexFifo(vertexfifo, c, vertexfifooffset);
// all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
}
}
// make sure we have enough space to write codeaux table
if (data > data_safe_end)
return 0;
// add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
// we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
// this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
for (size_t i = 0; i < 16; ++i)
{
// decoder assumes that table entries never refer to separately encoded indices
assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
*data++ = codeaux_table[i];
}
// since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference
assert(codeaux_table[0] == 0);
assert(data >= buffer + index_count / 3 + 16);
assert(data <= buffer + buffer_size);
return data - buffer;
}
size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
{
assert(index_count % 3 == 0);
// compute number of bits required for each index
unsigned int vertex_bits = 1;
while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
vertex_bits++;
// worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
}
void meshopt_encodeIndexVersion(int version)
{
assert(unsigned(version) <= unsigned(meshopt::kDecodeIndexVersion));
meshopt::gEncodeIndexVersion = version;
}
int meshopt_decodeIndexVersion(const unsigned char* buffer, size_t buffer_size)
{
if (buffer_size < 1)
return -1;
unsigned char header = buffer[0];
if ((header & 0xf0) != meshopt::kIndexHeader && (header & 0xf0) != meshopt::kSequenceHeader)
return -1;
int version = header & 0x0f;
if (version > meshopt::kDecodeIndexVersion)
return -1;
return version;
}
int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(index_size == 2 || index_size == 4);
// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
if (buffer_size < 1 + index_count / 3 + 16)
return -2;
if ((buffer[0] & 0xf0) != kIndexHeader)
return -1;
int version = buffer[0] & 0x0f;
if (version > kDecodeIndexVersion)
return -1;
EdgeFifo edgefifo;
memset(edgefifo, -1, sizeof(edgefifo));
VertexFifo vertexfifo;
memset(vertexfifo, -1, sizeof(vertexfifo));
size_t edgefifooffset = 0;
size_t vertexfifooffset = 0;
unsigned int next = 0;
unsigned int last = 0;
int fecmax = version >= 1 ? 13 : 15;
// since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
const unsigned char* code = buffer + 1;
const unsigned char* data = code + index_count / 3;
const unsigned char* data_safe_end = buffer + buffer_size - 16;
const unsigned char* codeaux_table = data_safe_end;
for (size_t i = 0; i < index_count; i += 3)
{
// make sure we have enough data to read for a triangle
// each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
// after this we can be sure we can read without extra bounds checks
if (data > data_safe_end)
return -2;
unsigned char codetri = *code++;
if (codetri < 0xf0)
{
int fe = codetri >> 4;
// fifo reads are wrapped around 16 entry buffer
unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
unsigned int c = 0;
int fec = codetri & 15;
// note: this is the most common path in the entire decoder
// inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
if (fec < fecmax)
{
// fifo reads are wrapped around 16 entry buffer
unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
c = (fec == 0) ? next : cf;
int fec0 = fec == 0;
next += fec0;
// push vertex fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
}
else
{
// fec - (fec ^ 3) decodes 13, 14 into -1, 1
// note that we need to update the last index since free indices are delta-encoded
last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
pushVertexFifo(vertexfifo, c, vertexfifooffset);
}
// push edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
// output triangle
writeTriangle(destination, i, index_size, a, b, c);
}
else
{
// fast path: read codeaux from the table
if (codetri < 0xfe)
{
unsigned char codeaux = codeaux_table[codetri & 15];
// note: table can't contain feb/fec=15
int feb = codeaux >> 4;
int fec = codeaux & 15;
// fifo reads are wrapped around 16 entry buffer
// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
unsigned int a = next++;
unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
unsigned int b = (feb == 0) ? next : bf;
int feb0 = feb == 0;
next += feb0;
unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
unsigned int c = (fec == 0) ? next : cf;
int fec0 = fec == 0;
next += fec0;
// output triangle
writeTriangle(destination, i, index_size, a, b, c);
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
pushVertexFifo(vertexfifo, a, vertexfifooffset);
pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
}
else
{
// slow path: read a full byte for codeaux instead of using a table lookup
unsigned char codeaux = *data++;
int fea = codetri == 0xfe ? 0 : 15;
int feb = codeaux >> 4;
int fec = codeaux & 15;
// reset: codeaux is 0 but encoded as not-a-table
if (codeaux == 0)
next = 0;
// fifo reads are wrapped around 16 entry buffer
// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
unsigned int a = (fea == 0) ? next++ : 0;
unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
// note that we need to update the last index since free indices are delta-encoded
if (fea == 15)
last = a = decodeIndex(data, last);
if (feb == 15)
last = b = decodeIndex(data, last);
if (fec == 15)
last = c = decodeIndex(data, last);
// output triangle
writeTriangle(destination, i, index_size, a, b, c);
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
pushVertexFifo(vertexfifo, a, vertexfifooffset);
pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
}
}
}
// we should've read all data bytes and stopped at the boundary between data and codeaux table
if (data != data_safe_end)
return -3;
return 0;
}
size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
{
using namespace meshopt;
// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
if (buffer_size < 1 + index_count + 4)
return 0;
int version = gEncodeIndexVersion;
buffer[0] = (unsigned char)(kSequenceHeader | version);
unsigned int last[2] = {};
unsigned int current = 0;
unsigned char* data = buffer + 1;
unsigned char* data_safe_end = buffer + buffer_size - 4;
for (size_t i = 0; i < index_count; ++i)
{
// make sure we have enough data to write
// each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end
// after this we can be sure we can write without extra bounds checks
if (data >= data_safe_end)
return 0;
unsigned int index = indices[i];
// this is a heuristic that switches between baselines when the delta grows too large
// we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index
// for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily
int cd = int(index - last[current]);
current ^= ((cd < 0 ? -cd : cd) >= 30);
// encode delta from the last index
unsigned int d = index - last[current];
unsigned int v = (d << 1) ^ (int(d) >> 31);
// note: low bit encodes the index of the last baseline which will be used for reconstruction
encodeVByte(data, (v << 1) | current);
// update last for the next iteration that uses it
last[current] = index;
}
// make sure we have enough space to write tail
if (data > data_safe_end)
return 0;
for (int k = 0; k < 4; ++k)
*data++ = 0;
return data - buffer;
}
size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count)
{
// compute number of bits required for each index
unsigned int vertex_bits = 1;
while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
vertex_bits++;
// worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit
unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7;
return 1 + index_count * vertex_groups + 4;
}
int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
{
using namespace meshopt;
// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
if (buffer_size < 1 + index_count + 4)
return -2;
if ((buffer[0] & 0xf0) != kSequenceHeader)
return -1;
int version = buffer[0] & 0x0f;
if (version > kDecodeIndexVersion)
return -1;
const unsigned char* data = buffer + 1;
const unsigned char* data_safe_end = buffer + buffer_size - 4;
unsigned int last[2] = {};
for (size_t i = 0; i < index_count; ++i)
{
// make sure we have enough data to read
// each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end
// after this we can be sure we can read without extra bounds checks
if (data >= data_safe_end)
return -2;
unsigned int v = decodeVByte(data);
// decode the index of the last baseline
unsigned int current = v & 1;
v >>= 1;
// reconstruct index as a delta
unsigned int d = (v >> 1) ^ -int(v & 1);
unsigned int index = last[current] + d;
// update last for the next iteration that uses it
last[current] = index;
if (index_size == 2)
{
static_cast<unsigned short*>(destination)[i] = (unsigned short)(index);
}
else
{
static_cast<unsigned int*>(destination)[i] = index;
}
}
// we should've read all data bytes and stopped at the boundary between data and tail
if (data != data_safe_end)
return -3;
return 0;
}

View File

@@ -0,0 +1,679 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <string.h>
// This work is based on:
// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
// John Hable. Variable Rate Shading with Visibility Buffer Rendering. 2024
namespace meshopt
{
static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len)
{
// MurmurHash2
const unsigned int m = 0x5bd1e995;
const int r = 24;
while (len >= 4)
{
unsigned int k = *reinterpret_cast<const unsigned int*>(key);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
key += 4;
len -= 4;
}
return h;
}
struct VertexHasher
{
const unsigned char* vertices;
size_t vertex_size;
size_t vertex_stride;
size_t hash(unsigned int index) const
{
return hashUpdate4(0, vertices + index * vertex_stride, vertex_size);
}
bool equal(unsigned int lhs, unsigned int rhs) const
{
return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0;
}
};
struct VertexStreamHasher
{
const meshopt_Stream* streams;
size_t stream_count;
size_t hash(unsigned int index) const
{
unsigned int h = 0;
for (size_t i = 0; i < stream_count; ++i)
{
const meshopt_Stream& s = streams[i];
const unsigned char* data = static_cast<const unsigned char*>(s.data);
h = hashUpdate4(h, data + index * s.stride, s.size);
}
return h;
}
bool equal(unsigned int lhs, unsigned int rhs) const
{
for (size_t i = 0; i < stream_count; ++i)
{
const meshopt_Stream& s = streams[i];
const unsigned char* data = static_cast<const unsigned char*>(s.data);
if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0)
return false;
}
return true;
}
};
struct VertexCustomHasher
{
const float* vertex_positions;
size_t vertex_stride_float;
int (*callback)(void*, unsigned int, unsigned int);
void* context;
size_t hash(unsigned int index) const
{
const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float);
unsigned int x = key[0], y = key[1], z = key[2];
// replace negative zero with zero
x = (x == 0x80000000) ? 0 : x;
y = (y == 0x80000000) ? 0 : y;
z = (z == 0x80000000) ? 0 : z;
// scramble bits to make sure that integer coordinates have entropy in lower bits
x ^= x >> 17;
y ^= y >> 17;
z ^= z >> 17;
// Optimized Spatial Hashing for Collision Detection of Deformable Objects
return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
}
bool equal(unsigned int lhs, unsigned int rhs) const
{
const float* lp = vertex_positions + lhs * vertex_stride_float;
const float* rp = vertex_positions + rhs * vertex_stride_float;
if (lp[0] != rp[0] || lp[1] != rp[1] || lp[2] != rp[2])
return false;
return callback ? callback(context, lhs, rhs) : true;
}
};
struct EdgeHasher
{
const unsigned int* remap;
size_t hash(unsigned long long edge) const
{
unsigned int e0 = unsigned(edge >> 32);
unsigned int e1 = unsigned(edge);
unsigned int h1 = remap[e0];
unsigned int h2 = remap[e1];
const unsigned int m = 0x5bd1e995;
// MurmurHash64B finalizer
h1 ^= h2 >> 18;
h1 *= m;
h2 ^= h1 >> 22;
h2 *= m;
h1 ^= h2 >> 17;
h1 *= m;
h2 ^= h1 >> 19;
h2 *= m;
return h2;
}
bool equal(unsigned long long lhs, unsigned long long rhs) const
{
unsigned int l0 = unsigned(lhs >> 32);
unsigned int l1 = unsigned(lhs);
unsigned int r0 = unsigned(rhs >> 32);
unsigned int r1 = unsigned(rhs);
return remap[l0] == remap[r0] && remap[l1] == remap[r1];
}
};
static size_t hashBuckets(size_t count)
{
size_t buckets = 1;
while (buckets < count + count / 4)
buckets *= 2;
return buckets;
}
template <typename T, typename Hash>
static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
{
assert(buckets > 0);
assert((buckets & (buckets - 1)) == 0);
size_t hashmod = buckets - 1;
size_t bucket = hash.hash(key) & hashmod;
for (size_t probe = 0; probe <= hashmod; ++probe)
{
T& item = table[bucket];
if (item == empty)
return &item;
if (hash.equal(item, key))
return &item;
// hash collision, quadratic probing
bucket = (bucket + probe + 1) & hashmod;
}
assert(false && "Hash table is full"); // unreachable
return NULL;
}
static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
{
VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
size_t vertex_table_size = hashBuckets(vertex_count);
unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size);
memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
for (size_t i = 0; i < vertex_count; ++i)
{
unsigned int index = unsigned(i);
unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
if (*entry == ~0u)
*entry = index;
remap[index] = *entry;
}
allocator.deallocate(vertex_table);
}
template <typename Hash>
static size_t generateVertexRemap(unsigned int* remap, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator)
{
memset(remap, -1, vertex_count * sizeof(unsigned int));
size_t table_size = hashBuckets(vertex_count);
unsigned int* table = allocator.allocate<unsigned int>(table_size);
memset(table, -1, table_size * sizeof(unsigned int));
unsigned int next_vertex = 0;
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices ? indices[i] : unsigned(i);
assert(index < vertex_count);
if (remap[index] != ~0u)
continue;
unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u);
if (*entry == ~0u)
{
*entry = index;
remap[index] = next_vertex++;
}
else
{
assert(remap[*entry] != ~0u);
remap[index] = remap[*entry];
}
}
assert(next_vertex <= vertex_count);
return next_vertex;
}
template <size_t BlockSize>
static void remapVertices(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
{
size_t block_size = BlockSize == 0 ? vertex_size : BlockSize;
assert(block_size == vertex_size);
for (size_t i = 0; i < vertex_count; ++i)
if (remap[i] != ~0u)
{
assert(remap[i] < vertex_count);
memcpy(static_cast<unsigned char*>(destination) + remap[i] * block_size, static_cast<const unsigned char*>(vertices) + i * block_size, block_size);
}
}
template <typename Hash>
static void generateShadowBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator)
{
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
memset(remap, -1, vertex_count * sizeof(unsigned int));
size_t table_size = hashBuckets(vertex_count);
unsigned int* table = allocator.allocate<unsigned int>(table_size);
memset(table, -1, table_size * sizeof(unsigned int));
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);
if (remap[index] == ~0u)
{
unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u);
if (*entry == ~0u)
*entry = index;
remap[index] = *entry;
}
destination[i] = remap[index];
}
}
} // namespace meshopt
size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
{
using namespace meshopt;
assert(indices || index_count == vertex_count);
assert(!indices || index_count % 3 == 0);
assert(vertex_size > 0 && vertex_size <= 256);
meshopt_Allocator allocator;
VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size};
return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
}
size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
{
using namespace meshopt;
assert(indices || index_count == vertex_count);
assert(index_count % 3 == 0);
assert(stream_count > 0 && stream_count <= 16);
for (size_t i = 0; i < stream_count; ++i)
{
assert(streams[i].size > 0 && streams[i].size <= 256);
assert(streams[i].size <= streams[i].stride);
}
meshopt_Allocator allocator;
VertexStreamHasher hasher = {streams, stream_count};
return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
}
size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context)
{
using namespace meshopt;
assert(indices || index_count == vertex_count);
assert(!indices || index_count % 3 == 0);
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
VertexCustomHasher hasher = {vertex_positions, vertex_positions_stride / sizeof(float), callback, context};
return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator);
}
void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
{
using namespace meshopt;
assert(vertex_size > 0 && vertex_size <= 256);
meshopt_Allocator allocator;
// support in-place remap
if (destination == vertices)
{
unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
memcpy(vertices_copy, vertices, vertex_count * vertex_size);
vertices = vertices_copy;
}
// specialize the loop for common vertex sizes to ensure memcpy is compiled as an inlined intrinsic
switch (vertex_size)
{
case 4:
return remapVertices<4>(destination, vertices, vertex_count, vertex_size, remap);
case 8:
return remapVertices<8>(destination, vertices, vertex_count, vertex_size, remap);
case 12:
return remapVertices<12>(destination, vertices, vertex_count, vertex_size, remap);
case 16:
return remapVertices<16>(destination, vertices, vertex_count, vertex_size, remap);
default:
return remapVertices<0>(destination, vertices, vertex_count, vertex_size, remap);
}
}
void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap)
{
assert(index_count % 3 == 0);
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices ? indices[i] : unsigned(i);
assert(remap[index] != ~0u);
destination[i] = remap[index];
}
}
void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
{
using namespace meshopt;
assert(indices);
assert(index_count % 3 == 0);
assert(vertex_size > 0 && vertex_size <= 256);
assert(vertex_size <= vertex_stride);
meshopt_Allocator allocator;
VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride};
generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator);
}
void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
{
using namespace meshopt;
assert(indices);
assert(index_count % 3 == 0);
assert(stream_count > 0 && stream_count <= 16);
for (size_t i = 0; i < stream_count; ++i)
{
assert(streams[i].size > 0 && streams[i].size <= 256);
assert(streams[i].size <= streams[i].stride);
}
meshopt_Allocator allocator;
VertexStreamHasher hasher = {streams, stream_count};
generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator);
}
void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
static const int next[4] = {1, 2, 0, 1};
// build position remap: for each vertex, which other (canonical) vertex does it map to?
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
// build edge set; this stores all triangle edges but we can look these up by any other wedge
EdgeHasher edge_hasher = {remap};
size_t edge_table_size = hashBuckets(index_count);
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size);
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
for (size_t i = 0; i < index_count; i += 3)
{
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
unsigned int i2 = indices[i + next[e + 1]];
assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
if (*entry == ~0ull)
{
*entry = edge;
// store vertex opposite to the edge
edge_vertex_table[entry - edge_table] = i2;
}
}
}
// build resulting index buffer: 6 indices for each input triangle
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int patch[6];
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
assert(i0 < vertex_count && i1 < vertex_count);
// note: this refers to the opposite edge!
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
patch[e * 2 + 0] = i0;
patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
}
memcpy(destination + i * 2, patch, sizeof(patch));
}
}
void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
static const int next[3] = {1, 2, 0};
// build position remap: for each vertex, which other (canonical) vertex does it map to?
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
// build edge set; this stores all triangle edges but we can look these up by any other wedge
EdgeHasher edge_hasher = {remap};
size_t edge_table_size = hashBuckets(index_count);
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
for (size_t i = 0; i < index_count; i += 3)
{
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
assert(i0 < vertex_count && i1 < vertex_count);
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
if (*entry == ~0ull)
*entry = edge;
}
}
// build resulting index buffer: 12 indices for each input triangle
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int patch[12];
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
assert(i0 < vertex_count && i1 < vertex_count);
// note: this refers to the opposite edge!
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
// use the same edge if opposite edge doesn't exist (border)
oppe = (oppe == ~0ull) ? edge : oppe;
// triangle index (0, 1, 2)
patch[e] = i0;
// opposite edge (3, 4; 5, 6; 7, 8)
patch[3 + e * 2 + 0] = unsigned(oppe);
patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
// dominant vertex (9, 10, 11)
patch[9 + e] = remap[i0];
}
memcpy(destination + i * 4, patch, sizeof(patch));
}
}
size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count)
{
assert(index_count % 3 == 0);
meshopt_Allocator allocator;
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
memset(remap, -1, vertex_count * sizeof(unsigned int));
// compute vertex valence; this is used to prioritize least used corner
// note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic
unsigned char* valence = allocator.allocate<unsigned char>(vertex_count);
memset(valence, 0, vertex_count);
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);
valence[index]++;
}
unsigned int reorder_offset = 0;
// assign provoking vertices; leave the rest for the next pass
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
// try to rotate triangle such that provoking vertex hasn't been seen before
// if multiple vertices are new, prioritize the one with least valence
// this reduces the risk that a future triangle will have all three vertices seen
unsigned int va = remap[a] == ~0u ? valence[a] : ~0u;
unsigned int vb = remap[b] == ~0u ? valence[b] : ~0u;
unsigned int vc = remap[c] == ~0u ? valence[c] : ~0u;
if (vb != ~0u && vb <= va && vb <= vc)
{
// abc -> bca
unsigned int t = a;
a = b, b = c, c = t;
}
else if (vc != ~0u && vc <= va && vc <= vb)
{
// abc -> cab
unsigned int t = c;
c = b, b = a, a = t;
}
unsigned int newidx = reorder_offset;
// now remap[a] = ~0u or all three vertices are old
// recording remap[a] makes it possible to remap future references to the same index, conserving space
if (remap[a] == ~0u)
remap[a] = newidx;
// we need to clone the provoking vertex to get a unique index
// if all three are used the choice is arbitrary since no future triangle will be able to reuse any of these
reorder[reorder_offset++] = a;
// note: first vertex is final, the other two will be fixed up in next pass
destination[i + 0] = newidx;
destination[i + 1] = b;
destination[i + 2] = c;
// update vertex valences for corner heuristic
valence[a]--;
valence[b]--;
valence[c]--;
}
// remap or clone non-provoking vertices (iterating to skip provoking vertices)
int step = 1;
for (size_t i = 1; i < index_count; i += step, step ^= 3)
{
unsigned int index = destination[i];
if (remap[index] == ~0u)
{
// we haven't seen the vertex before as a provoking vertex
// to maintain the reference to the original vertex we need to clone it
unsigned int newidx = reorder_offset;
remap[index] = newidx;
reorder[reorder_offset++] = index;
}
destination[i] = remap[index];
}
assert(reorder_offset <= vertex_count + index_count / 3);
return reorder_offset;
}

1349
thirdparty/meshoptimizer/meshoptimizer.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,333 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <math.h>
#include <string.h>
// This work is based on:
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
namespace meshopt
{
static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
{
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
float mesh_centroid[3] = {};
for (size_t i = 0; i < index_count; ++i)
{
const float* p = vertex_positions + vertex_stride_float * indices[i];
mesh_centroid[0] += p[0];
mesh_centroid[1] += p[1];
mesh_centroid[2] += p[2];
}
mesh_centroid[0] /= index_count;
mesh_centroid[1] /= index_count;
mesh_centroid[2] /= index_count;
for (size_t cluster = 0; cluster < cluster_count; ++cluster)
{
size_t cluster_begin = clusters[cluster] * 3;
size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
assert(cluster_begin < cluster_end);
float cluster_area = 0;
float cluster_centroid[3] = {};
float cluster_normal[3] = {};
for (size_t i = cluster_begin; i < cluster_end; i += 3)
{
const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0];
const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1];
const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2];
float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
float normalx = p10[1] * p20[2] - p10[2] * p20[1];
float normaly = p10[2] * p20[0] - p10[0] * p20[2];
float normalz = p10[0] * p20[1] - p10[1] * p20[0];
float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3);
cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3);
cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3);
cluster_normal[0] += normalx;
cluster_normal[1] += normaly;
cluster_normal[2] += normalz;
cluster_area += area;
}
float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area;
cluster_centroid[0] *= inv_cluster_area;
cluster_centroid[1] *= inv_cluster_area;
cluster_centroid[2] *= inv_cluster_area;
float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]);
float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length;
cluster_normal[0] *= inv_cluster_normal_length;
cluster_normal[1] *= inv_cluster_normal_length;
cluster_normal[2] *= inv_cluster_normal_length;
float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]};
sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2];
}
}
static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count)
{
// compute sort data bounds and renormalize, using fixed point snorm
float sort_data_max = 1e-3f;
for (size_t i = 0; i < cluster_count; ++i)
{
float dpa = fabsf(sort_data[i]);
sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max;
}
const int sort_bits = 11;
for (size_t i = 0; i < cluster_count; ++i)
{
// note that we flip distribution since high dot product should come first
float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max);
sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1);
}
// fill histogram for counting sort
unsigned int histogram[1 << sort_bits];
memset(histogram, 0, sizeof(histogram));
for (size_t i = 0; i < cluster_count; ++i)
{
histogram[sort_keys[i]]++;
}
// compute offsets based on histogram data
size_t histogram_sum = 0;
for (size_t i = 0; i < 1 << sort_bits; ++i)
{
size_t count = histogram[i];
histogram[i] = unsigned(histogram_sum);
histogram_sum += count;
}
assert(histogram_sum == cluster_count);
// compute sort order based on offsets
for (size_t i = 0; i < cluster_count; ++i)
{
sort_order[histogram[sort_keys[i]]++] = unsigned(i);
}
}
static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
{
unsigned int cache_misses = 0;
// if vertex is not in cache, put it in cache
if (timestamp - cache_timestamps[a] > cache_size)
{
cache_timestamps[a] = timestamp++;
cache_misses++;
}
if (timestamp - cache_timestamps[b] > cache_size)
{
cache_timestamps[b] = timestamp++;
cache_misses++;
}
if (timestamp - cache_timestamps[c] > cache_size)
{
cache_timestamps[c] = timestamp++;
cache_misses++;
}
return cache_misses;
}
static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps)
{
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
unsigned int timestamp = cache_size + 1;
size_t face_count = index_count / 3;
size_t result = 0;
for (size_t i = 0; i < face_count; ++i)
{
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
// when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
// that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
// suggests an inefficiency in the vertex cache optimization algorithm
// usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
if (i == 0 || m == 3)
{
destination[result++] = unsigned(i);
}
}
assert(result <= index_count / 3);
return result;
}
static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps)
{
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
unsigned int timestamp = 0;
size_t result = 0;
for (size_t it = 0; it < cluster_count; ++it)
{
size_t start = clusters[it];
size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3;
assert(start < end);
// reset cache
timestamp += cache_size + 1;
// measure cluster ACMR
unsigned int cluster_misses = 0;
for (size_t i = start; i < end; ++i)
{
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
cluster_misses += m;
}
float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
// first cluster always starts from the hard cluster boundary
destination[result++] = unsigned(start);
// reset cache
timestamp += cache_size + 1;
unsigned int running_misses = 0;
unsigned int running_faces = 0;
for (size_t i = start; i < end; ++i)
{
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
running_misses += m;
running_faces += 1;
if (float(running_misses) / float(running_faces) <= cluster_threshold)
{
// we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
// note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
// cluster is empty; however, the 'pop_back' after the loop will clean it up
destination[result++] = unsigned(i + 1);
// reset cache
timestamp += cache_size + 1;
running_misses = 0;
running_faces = 0;
}
}
// each time we reach the target ACMR we flush the cluster
// this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
// in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
// thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
// there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
// to the cluster boundary array which we need to remove anyway - this code will do that automatically
if (destination[result - 1] != start)
{
result--;
}
}
assert(result >= cluster_count);
assert(result <= index_count / 3);
return result;
}
} // namespace meshopt
void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
// guard for empty meshes
if (index_count == 0 || vertex_count == 0)
return;
// support in-place optimization
if (destination == indices)
{
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
indices = indices_copy;
}
unsigned int cache_size = 16;
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
// generate hard boundaries from full-triangle cache misses
unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / 3);
size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps);
// generate soft boundaries
unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / 3 + 1);
size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps);
const unsigned int* clusters = soft_clusters;
size_t cluster_count = soft_cluster_count;
// fill sort data
float* sort_data = allocator.allocate<float>(cluster_count);
calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
// sort clusters using sort data
unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count);
unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count);
calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count);
// fill output buffer
size_t offset = 0;
for (size_t it = 0; it < cluster_count; ++it)
{
unsigned int cluster = sort_order[it];
assert(cluster < cluster_count);
size_t cluster_begin = clusters[cluster] * 3;
size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
assert(cluster_begin < cluster_end);
memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int));
offset += cluster_end - cluster_begin;
}
assert(offset == index_count);
}

499
thirdparty/meshoptimizer/partition.cpp vendored Normal file
View File

@@ -0,0 +1,499 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <math.h>
#include <string.h>
// This work is based on:
// Takio Kurita. An efficient agglomerative clustering algorithm using a heap. 1991
namespace meshopt
{
struct ClusterAdjacency
{
unsigned int* offsets;
unsigned int* clusters;
unsigned int* shared;
};
static void filterClusterIndices(unsigned int* data, unsigned int* offsets, const unsigned int* cluster_indices, const unsigned int* cluster_index_counts, size_t cluster_count, unsigned char* used, size_t vertex_count, size_t total_index_count)
{
(void)vertex_count;
(void)total_index_count;
size_t cluster_start = 0;
size_t cluster_write = 0;
for (size_t i = 0; i < cluster_count; ++i)
{
offsets[i] = unsigned(cluster_write);
// copy cluster indices, skipping duplicates
for (size_t j = 0; j < cluster_index_counts[i]; ++j)
{
unsigned int v = cluster_indices[cluster_start + j];
assert(v < vertex_count);
data[cluster_write] = v;
cluster_write += 1 - used[v];
used[v] = 1;
}
// reset used flags for the next cluster
for (size_t j = offsets[i]; j < cluster_write; ++j)
used[data[j]] = 0;
cluster_start += cluster_index_counts[i];
}
assert(cluster_start == total_index_count);
assert(cluster_write <= total_index_count);
offsets[cluster_count] = unsigned(cluster_write);
}
static void computeClusterBounds(float* cluster_bounds, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, const float* vertex_positions, size_t vertex_positions_stride)
{
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
for (size_t i = 0; i < cluster_count; ++i)
{
float center[3] = {0, 0, 0};
// approximate center of the cluster by averaging all vertex positions
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
{
const float* p = vertex_positions + cluster_indices[j] * vertex_stride_float;
center[0] += p[0];
center[1] += p[1];
center[2] += p[2];
}
// note: technically clusters can't be empty per meshopt_partitionCluster but we check for a division by zero in case that changes
if (size_t cluster_size = cluster_offsets[i + 1] - cluster_offsets[i])
{
center[0] /= float(cluster_size);
center[1] /= float(cluster_size);
center[2] /= float(cluster_size);
}
// compute radius of the bounding sphere for each cluster
float radiussq = 0;
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
{
const float* p = vertex_positions + cluster_indices[j] * vertex_stride_float;
float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
radiussq = radiussq < d2 ? d2 : radiussq;
}
cluster_bounds[i * 4 + 0] = center[0];
cluster_bounds[i * 4 + 1] = center[1];
cluster_bounds[i * 4 + 2] = center[2];
cluster_bounds[i * 4 + 3] = sqrtf(radiussq);
}
}
static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, size_t vertex_count, meshopt_Allocator& allocator)
{
unsigned int* ref_offsets = allocator.allocate<unsigned int>(vertex_count + 1);
// compute number of clusters referenced by each vertex
memset(ref_offsets, 0, vertex_count * sizeof(unsigned int));
for (size_t i = 0; i < cluster_count; ++i)
{
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
ref_offsets[cluster_indices[j]]++;
}
// compute (worst-case) number of adjacent clusters for each cluster
size_t total_adjacency = 0;
for (size_t i = 0; i < cluster_count; ++i)
{
size_t count = 0;
// worst case is every vertex has a disjoint cluster list
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
count += ref_offsets[cluster_indices[j]] - 1;
// ... but only every other cluster can be adjacent in the end
total_adjacency += count < cluster_count - 1 ? count : cluster_count - 1;
}
// we can now allocate adjacency buffers
adjacency.offsets = allocator.allocate<unsigned int>(cluster_count + 1);
adjacency.clusters = allocator.allocate<unsigned int>(total_adjacency);
adjacency.shared = allocator.allocate<unsigned int>(total_adjacency);
// convert ref counts to offsets
size_t total_refs = 0;
for (size_t i = 0; i < vertex_count; ++i)
{
size_t count = ref_offsets[i];
ref_offsets[i] = unsigned(total_refs);
total_refs += count;
}
unsigned int* ref_data = allocator.allocate<unsigned int>(total_refs);
// fill cluster refs for each vertex
for (size_t i = 0; i < cluster_count; ++i)
{
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
ref_data[ref_offsets[cluster_indices[j]]++] = unsigned(i);
}
// after the previous pass, ref_offsets contain the end of the data for each vertex; shift it forward to get the start
memmove(ref_offsets + 1, ref_offsets, vertex_count * sizeof(unsigned int));
ref_offsets[0] = 0;
// fill cluster adjacency for each cluster...
adjacency.offsets[0] = 0;
for (size_t i = 0; i < cluster_count; ++i)
{
unsigned int* adj = adjacency.clusters + adjacency.offsets[i];
unsigned int* shd = adjacency.shared + adjacency.offsets[i];
size_t count = 0;
for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
{
unsigned int v = cluster_indices[j];
// merge the entire cluster list of each vertex into current list
for (size_t k = ref_offsets[v]; k < ref_offsets[v + 1]; ++k)
{
unsigned int c = ref_data[k];
assert(c < cluster_count);
if (c == unsigned(i))
continue;
// if the cluster is already in the list, increment the shared count
bool found = false;
for (size_t l = 0; l < count; ++l)
if (adj[l] == c)
{
found = true;
shd[l]++;
break;
}
// .. or append a new cluster
if (!found)
{
adj[count] = c;
shd[count] = 1;
count++;
}
}
}
// mark the end of the adjacency list; the next cluster will start there as well
adjacency.offsets[i + 1] = adjacency.offsets[i] + unsigned(count);
}
assert(adjacency.offsets[cluster_count] <= total_adjacency);
// ref_offsets can't be deallocated as it was allocated before adjacency
allocator.deallocate(ref_data);
}
struct ClusterGroup
{
int group;
int next;
unsigned int size; // 0 unless root
unsigned int vertices;
};
struct GroupOrder
{
unsigned int id;
int order;
};
static void heapPush(GroupOrder* heap, size_t size, GroupOrder item)
{
// insert a new element at the end (breaks heap invariant)
heap[size++] = item;
// bubble up the new element to its correct position
size_t i = size - 1;
while (i > 0 && heap[i].order < heap[(i - 1) / 2].order)
{
size_t p = (i - 1) / 2;
GroupOrder temp = heap[i];
heap[i] = heap[p];
heap[p] = temp;
i = p;
}
}
static GroupOrder heapPop(GroupOrder* heap, size_t size)
{
assert(size > 0);
GroupOrder top = heap[0];
// move the last element to the top (breaks heap invariant)
heap[0] = heap[--size];
// bubble down the new top element to its correct position
size_t i = 0;
while (i * 2 + 1 < size)
{
// find the smallest child
size_t j = i * 2 + 1;
j += (j + 1 < size && heap[j + 1].order < heap[j].order);
// if the parent is already smaller than both children, we're done
if (heap[j].order >= heap[i].order)
break;
// otherwise, swap the parent and child and continue
GroupOrder temp = heap[i];
heap[i] = heap[j];
heap[j] = temp;
i = j;
}
return top;
}
static unsigned int countShared(const ClusterGroup* groups, int group1, int group2, const ClusterAdjacency& adjacency)
{
unsigned int total = 0;
for (int i1 = group1; i1 >= 0; i1 = groups[i1].next)
for (int i2 = group2; i2 >= 0; i2 = groups[i2].next)
{
for (unsigned int adj = adjacency.offsets[i1]; adj < adjacency.offsets[i1 + 1]; ++adj)
if (adjacency.clusters[adj] == unsigned(i2))
{
total += adjacency.shared[adj];
break;
}
}
return total;
}
static void mergeBounds(float* target, const float* source)
{
float r1 = target[3], r2 = source[3];
float dx = source[0] - target[0], dy = source[1] - target[1], dz = source[2] - target[2];
float d = sqrtf(dx * dx + dy * dy + dz * dz);
if (d + r1 < r2)
{
memcpy(target, source, 4 * sizeof(float));
return;
}
if (d + r2 > r1)
{
float k = d > 0 ? (d + r2 - r1) / (2 * d) : 0.f;
target[0] += dx * k;
target[1] += dy * k;
target[2] += dz * k;
target[3] = (d + r2 + r1) / 2;
}
}
static float boundsScore(const float* target, const float* source)
{
float r1 = target[3], r2 = source[3];
float dx = source[0] - target[0], dy = source[1] - target[1], dz = source[2] - target[2];
float d = sqrtf(dx * dx + dy * dy + dz * dz);
float mr = d + r1 < r2 ? r2 : (d + r2 < r1 ? r1 : (d + r2 + r1) / 2);
return mr > 0 ? r1 / mr : 0.f;
}
static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size, const float* cluster_bounds)
{
assert(groups[id].size > 0);
float group_rsqrt = 1.f / sqrtf(float(int(groups[id].vertices)));
int best_group = -1;
float best_score = 0;
for (int ci = id; ci >= 0; ci = groups[ci].next)
{
for (unsigned int adj = adjacency.offsets[ci]; adj != adjacency.offsets[ci + 1]; ++adj)
{
int other = groups[adjacency.clusters[adj]].group;
if (other < 0)
continue;
assert(groups[other].size > 0);
if (groups[id].size + groups[other].size > max_partition_size)
continue;
unsigned int shared = countShared(groups, id, other, adjacency);
float other_rsqrt = 1.f / sqrtf(float(int(groups[other].vertices)));
// normalize shared count by the expected boundary of each group (+ keeps scoring symmetric)
float score = float(int(shared)) * (group_rsqrt + other_rsqrt);
// incorporate spatial score to favor merging nearby groups
if (cluster_bounds)
score *= 1.f + 0.4f * boundsScore(&cluster_bounds[id * 4], &cluster_bounds[other * 4]);
if (score > best_score)
{
best_group = other;
best_score = score;
}
}
}
return best_group;
}
} // namespace meshopt
size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size)
{
using namespace meshopt;
assert((vertex_positions == NULL || vertex_positions_stride >= 12) && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
assert(target_partition_size > 0);
size_t max_partition_size = target_partition_size + target_partition_size * 3 / 8;
meshopt_Allocator allocator;
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
memset(used, 0, vertex_count);
unsigned int* cluster_newindices = allocator.allocate<unsigned int>(total_index_count);
unsigned int* cluster_offsets = allocator.allocate<unsigned int>(cluster_count + 1);
// make new cluster index list that filters out duplicate indices
filterClusterIndices(cluster_newindices, cluster_offsets, cluster_indices, cluster_index_counts, cluster_count, used, vertex_count, total_index_count);
cluster_indices = cluster_newindices;
// compute bounding sphere for each cluster if positions are provided
float* cluster_bounds = NULL;
if (vertex_positions)
{
cluster_bounds = allocator.allocate<float>(cluster_count * 4);
computeClusterBounds(cluster_bounds, cluster_indices, cluster_offsets, cluster_count, vertex_positions, vertex_positions_stride);
}
// build cluster adjacency along with edge weights (shared vertex count)
ClusterAdjacency adjacency = {};
buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, vertex_count, allocator);
ClusterGroup* groups = allocator.allocate<ClusterGroup>(cluster_count);
GroupOrder* order = allocator.allocate<GroupOrder>(cluster_count);
size_t pending = 0;
// create a singleton group for each cluster and order them by priority
for (size_t i = 0; i < cluster_count; ++i)
{
groups[i].group = int(i);
groups[i].next = -1;
groups[i].size = 1;
groups[i].vertices = cluster_offsets[i + 1] - cluster_offsets[i];
assert(groups[i].vertices > 0);
GroupOrder item = {};
item.id = unsigned(i);
item.order = groups[i].vertices;
heapPush(order, pending++, item);
}
// iteratively merge the smallest group with the best group
while (pending)
{
GroupOrder top = heapPop(order, pending--);
// this group was merged into another group earlier
if (groups[top.id].size == 0)
continue;
// disassociate clusters from the group to prevent them from being merged again; we will re-associate them if the group is reinserted
for (int i = top.id; i >= 0; i = groups[i].next)
{
assert(groups[i].group == int(top.id));
groups[i].group = -1;
}
// the group is large enough, emit as is
if (groups[top.id].size >= target_partition_size)
continue;
int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size, cluster_bounds);
// we can't grow the group any more, emit as is
if (best_group == -1)
continue;
// compute shared vertices to adjust the total vertices estimate after merging
unsigned int shared = countShared(groups, top.id, best_group, adjacency);
// combine groups by linking them together
assert(groups[best_group].size > 0);
for (int i = top.id; i >= 0; i = groups[i].next)
if (groups[i].next < 0)
{
groups[i].next = best_group;
break;
}
// update group sizes; note, the vertex update is a O(1) approximation which avoids recomputing the true size
groups[top.id].size += groups[best_group].size;
groups[top.id].vertices += groups[best_group].vertices;
groups[top.id].vertices = (groups[top.id].vertices > shared) ? groups[top.id].vertices - shared : 1;
groups[best_group].size = 0;
groups[best_group].vertices = 0;
// merge bounding spheres if bounds are available
if (cluster_bounds)
{
mergeBounds(&cluster_bounds[top.id * 4], &cluster_bounds[best_group * 4]);
memset(&cluster_bounds[best_group * 4], 0, 4 * sizeof(float));
}
// re-associate all clusters back to the merged group
for (int i = top.id; i >= 0; i = groups[i].next)
groups[i].group = int(top.id);
top.order = groups[top.id].vertices;
heapPush(order, pending++, top);
}
size_t next_group = 0;
for (size_t i = 0; i < cluster_count; ++i)
{
if (groups[i].size == 0)
continue;
for (int j = int(i); j >= 0; j = groups[j].next)
destination[j] = unsigned(next_group);
next_group++;
}
assert(next_group <= cluster_count);
return next_group;
}

View File

@@ -0,0 +1,76 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
union FloatBits
{
float f;
unsigned int ui;
};
unsigned short meshopt_quantizeHalf(float v)
{
FloatBits u = {v};
unsigned int ui = u.ui;
int s = (ui >> 16) & 0x8000;
int em = ui & 0x7fffffff;
// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
int h = (em - (112 << 23) + (1 << 12)) >> 13;
// underflow: flush to zero; 113 encodes exponent -14
h = (em < (113 << 23)) ? 0 : h;
// overflow: infinity; 143 encodes exponent 16
h = (em >= (143 << 23)) ? 0x7c00 : h;
// NaN; note that we convert all types of NaN to qNaN
h = (em > (255 << 23)) ? 0x7e00 : h;
return (unsigned short)(s | h);
}
float meshopt_quantizeFloat(float v, int N)
{
assert(N >= 0 && N <= 23);
FloatBits u = {v};
unsigned int ui = u.ui;
const int mask = (1 << (23 - N)) - 1;
const int round = (1 << (23 - N)) >> 1;
int e = ui & 0x7f800000;
unsigned int rui = (ui + round) & ~mask;
// round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0
ui = e == 0x7f800000 ? ui : rui;
// flush denormals to zero
ui = e == 0 ? 0 : ui;
u.ui = ui;
return u.f;
}
float meshopt_dequantizeHalf(unsigned short h)
{
unsigned int s = unsigned(h & 0x8000) << 16;
int em = h & 0x7fff;
// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
int r = (em + (112 << 10)) << 13;
// denormal: flush to zero
r = (em < (1 << 10)) ? 0 : r;
// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
r += (em >= (31 << 10)) ? (112 << 23) : 0;
FloatBits u;
u.ui = s | r;
return u.f;
}

289
thirdparty/meshoptimizer/rasterizer.cpp vendored Normal file
View File

@@ -0,0 +1,289 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <float.h>
#include <string.h>
// This work is based on:
// Nicolas Capens. Advanced Rasterization. 2004
namespace meshopt
{
const int kViewport = 256;
struct OverdrawBuffer
{
float z[kViewport][kViewport][2];
unsigned int overdraw[kViewport][kViewport][2];
};
static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
{
// z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
// z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1)
// (x2-x1 y2-y1)(dzdx) = (z2-z1)
// (x3-x1 y3-y1)(dzdy) (z3-z1)
// we'll solve it with Cramer's rule
float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
float invdet = (det == 0) ? 0 : 1 / det;
dzdx = ((z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1)) * invdet;
dzdy = ((x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1)) * invdet;
return det;
}
// half-space fixed point triangle rasterizer
static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z)
{
// compute depth gradients
float DZx, DZy;
float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
int sign = det > 0;
// flip backfacing triangles to simplify rasterization logic
if (sign)
{
// flipping v2 & v3 preserves depth gradients since they're based on v1; only v1z is used below
float t;
t = v2x, v2x = v3x, v3x = t;
t = v2y, v2y = v3y, v3y = t;
// flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below
v1z = kViewport - v1z;
DZx = -DZx;
DZy = -DZy;
}
// coordinates, 28.4 fixed point
int X1 = int(16.0f * v1x + 0.5f);
int X2 = int(16.0f * v2x + 0.5f);
int X3 = int(16.0f * v3x + 0.5f);
int Y1 = int(16.0f * v1y + 0.5f);
int Y2 = int(16.0f * v2y + 0.5f);
int Y3 = int(16.0f * v3y + 0.5f);
// bounding rectangle, clipped against viewport
// since we rasterize pixels with covered centers, min >0.5 should round up
// as for max, due to top-left filling convention we will never rasterize right/bottom edges
// so max >= 0.5 should round down for inclusive bounds, and up for exclusive (in our case)
int minx = X1 < X2 ? X1 : X2;
minx = minx < X3 ? minx : X3;
minx = (minx + 7) >> 4;
minx = minx < 0 ? 0 : minx;
int miny = Y1 < Y2 ? Y1 : Y2;
miny = miny < Y3 ? miny : Y3;
miny = (miny + 7) >> 4;
miny = miny < 0 ? 0 : miny;
int maxx = X1 > X2 ? X1 : X2;
maxx = maxx > X3 ? maxx : X3;
maxx = (maxx + 7) >> 4;
maxx = maxx > kViewport ? kViewport : maxx;
int maxy = Y1 > Y2 ? Y1 : Y2;
maxy = maxy > Y3 ? maxy : Y3;
maxy = (maxy + 7) >> 4;
maxy = maxy > kViewport ? kViewport : maxy;
// deltas, 28.4 fixed point
int DX12 = X1 - X2;
int DX23 = X2 - X3;
int DX31 = X3 - X1;
int DY12 = Y1 - Y2;
int DY23 = Y2 - Y3;
int DY31 = Y3 - Y1;
// fill convention correction
int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0);
int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0);
int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0);
// half edge equations, 24.8 fixed point
// note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers
int FX = (minx << 4) + 8;
int FY = (miny << 4) + 8;
int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1;
int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1;
int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1;
float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f);
for (int y = miny; y < maxy; y++)
{
int CX1 = CY1;
int CX2 = CY2;
int CX3 = CY3;
float ZX = ZY;
for (int x = minx; x < maxx; x++)
{
// check if all CXn are non-negative
if ((CX1 | CX2 | CX3) >= 0)
{
if (ZX >= buffer->z[y][x][sign])
{
buffer->z[y][x][sign] = ZX;
buffer->overdraw[y][x][sign]++;
}
}
// signed left shift is UB for negative numbers so use unsigned-signed casts
CX1 -= int(unsigned(DY12) << 4);
CX2 -= int(unsigned(DY23) << 4);
CX3 -= int(unsigned(DY31) << 4);
ZX += DZx;
}
// signed left shift is UB for negative numbers so use unsigned-signed casts
CY1 += int(unsigned(DX12) << 4);
CY2 += int(unsigned(DX23) << 4);
CY3 += int(unsigned(DX31) << 4);
ZY += DZy;
}
}
static float transformTriangles(float* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
for (size_t i = 0; i < vertex_count; ++i)
{
const float* v = vertex_positions + i * vertex_stride_float;
for (int j = 0; j < 3; ++j)
{
float vj = v[j];
minv[j] = minv[j] > vj ? vj : minv[j];
maxv[j] = maxv[j] < vj ? vj : maxv[j];
}
}
float extent = 0.f;
extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
float scale = kViewport / extent;
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);
const float* v = vertex_positions + index * vertex_stride_float;
triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
}
return extent;
}
static void rasterizeTriangles(OverdrawBuffer* buffer, const float* triangles, size_t index_count, int axis)
{
for (size_t i = 0; i < index_count; i += 3)
{
const float* vn0 = &triangles[3 * (i + 0)];
const float* vn1 = &triangles[3 * (i + 1)];
const float* vn2 = &triangles[3 * (i + 2)];
switch (axis)
{
case 0:
rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
break;
case 1:
rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
break;
case 2:
rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
break;
}
}
}
} // namespace meshopt
meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
meshopt_OverdrawStatistics result = {};
float* triangles = allocator.allocate<float>(index_count * 3);
transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
for (int axis = 0; axis < 3; ++axis)
{
memset(buffer, 0, sizeof(OverdrawBuffer));
rasterizeTriangles(buffer, triangles, index_count, axis);
for (int y = 0; y < kViewport; ++y)
for (int x = 0; x < kViewport; ++x)
for (int s = 0; s < 2; ++s)
{
unsigned int overdraw = buffer->overdraw[y][x][s];
result.pixels_covered += overdraw > 0;
result.pixels_shaded += overdraw;
}
}
result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f;
return result;
}
meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
meshopt_CoverageStatistics result = {};
float* triangles = allocator.allocate<float>(index_count * 3);
float extent = transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
for (int axis = 0; axis < 3; ++axis)
{
memset(buffer, 0, sizeof(OverdrawBuffer));
rasterizeTriangles(buffer, triangles, index_count, axis);
unsigned int covered = 0;
for (int y = 0; y < kViewport; ++y)
for (int x = 0; x < kViewport; ++x)
covered += (buffer->overdraw[y][x][0] | buffer->overdraw[y][x][1]) > 0;
result.coverage[axis] = float(covered) / float(kViewport * kViewport);
}
result.extent = extent;
return result;
}

2401
thirdparty/meshoptimizer/simplifier.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,340 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <float.h>
#include <string.h>
// This work is based on:
// Fabian Giesen. Decoding Morton codes. 2009
namespace meshopt
{
// "Insert" two 0 bits after each of the 20 low bits of x
inline unsigned long long part1By2(unsigned long long x)
{
x &= 0x000fffffull; // x = ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- jihg fedc ba98 7654 3210
x = (x ^ (x << 32)) & 0x000f00000000ffffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- ---- ---- ---- ---- fedc ba98 7654 3210
x = (x ^ (x << 16)) & 0x000f0000ff0000ffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- fedc ba98 ---- ---- ---- ---- 7654 3210
x = (x ^ (x << 8)) & 0x000f00f00f00f00full; // x = ---- ---- ---- jihg ---- ---- fedc ---- ---- ba98 ---- ---- 7654 ---- ---- 3210
x = (x ^ (x << 4)) & 0x00c30c30c30c30c3ull; // x = ---- ---- ji-- --hg ---- fe-- --dc ---- ba-- --98 ---- 76-- --54 ---- 32-- --10
x = (x ^ (x << 2)) & 0x0249249249249249ull; // x = ---- --j- -i-- h--g --f- -e-- d--c --b- -a-- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
return x;
}
static void computeOrder(unsigned long long* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, bool morton)
{
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
for (size_t i = 0; i < vertex_count; ++i)
{
const float* v = vertex_positions_data + i * vertex_stride_float;
for (int j = 0; j < 3; ++j)
{
float vj = v[j];
minv[j] = minv[j] > vj ? vj : minv[j];
maxv[j] = maxv[j] < vj ? vj : maxv[j];
}
}
float extent = 0.f;
extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
// rescale each axis to 16 bits to get 48-bit Morton codes
float scale = extent == 0 ? 0.f : 65535.f / extent;
// generate Morton order based on the position inside a unit cube
for (size_t i = 0; i < vertex_count; ++i)
{
const float* v = vertex_positions_data + i * vertex_stride_float;
int x = int((v[0] - minv[0]) * scale + 0.5f);
int y = int((v[1] - minv[1]) * scale + 0.5f);
int z = int((v[2] - minv[2]) * scale + 0.5f);
if (morton)
result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
else
result[i] = ((unsigned long long)x << 0) | ((unsigned long long)y << 20) | ((unsigned long long)z << 40);
}
}
static void radixSort10(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count)
{
unsigned int hist[1024];
memset(hist, 0, sizeof(hist));
// compute histogram (assume keys are 10-bit)
for (size_t i = 0; i < count; ++i)
hist[keys[i]]++;
unsigned int sum = 0;
// replace histogram data with prefix histogram sums in-place
for (int i = 0; i < 1024; ++i)
{
unsigned int h = hist[i];
hist[i] = sum;
sum += h;
}
assert(sum == count);
// reorder values
for (size_t i = 0; i < count; ++i)
{
unsigned int id = keys[source[i]];
destination[hist[id]++] = source[i];
}
}
static void computeHistogram(unsigned int (&hist)[256][2], const unsigned short* data, size_t count)
{
memset(hist, 0, sizeof(hist));
// compute 2 8-bit histograms in parallel
for (size_t i = 0; i < count; ++i)
{
unsigned long long id = data[i];
hist[(id >> 0) & 255][0]++;
hist[(id >> 8) & 255][1]++;
}
unsigned int sum0 = 0, sum1 = 0;
// replace histogram data with prefix histogram sums in-place
for (int i = 0; i < 256; ++i)
{
unsigned int h0 = hist[i][0], h1 = hist[i][1];
hist[i][0] = sum0;
hist[i][1] = sum1;
sum0 += h0;
sum1 += h1;
}
assert(sum0 == count && sum1 == count);
}
static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count, unsigned int (&hist)[256][2], int pass)
{
int bitoff = pass * 8;
for (size_t i = 0; i < count; ++i)
{
unsigned int id = unsigned(keys[source[i]] >> bitoff) & 255;
destination[hist[id][pass]++] = source[i];
}
}
static void partitionPoints(unsigned int* target, const unsigned int* order, const unsigned char* sides, size_t split, size_t count)
{
size_t l = 0, r = split;
for (size_t i = 0; i < count; ++i)
{
unsigned char side = sides[order[i]];
target[side ? r : l] = order[i];
l += 1;
l -= side;
r += side;
}
assert(l == split && r == count);
}
static void splitPoints(unsigned int* destination, unsigned int* orderx, unsigned int* ordery, unsigned int* orderz, const unsigned long long* keys, size_t count, void* scratch, size_t cluster_size)
{
if (count <= cluster_size)
{
memcpy(destination, orderx, count * sizeof(unsigned int));
return;
}
unsigned int* axes[3] = {orderx, ordery, orderz};
int bestk = -1;
unsigned int bestdim = 0;
for (int k = 0; k < 3; ++k)
{
const unsigned int mask = (1 << 20) - 1;
unsigned int dim = (unsigned(keys[axes[k][count - 1]] >> (k * 20)) & mask) - (unsigned(keys[axes[k][0]] >> (k * 20)) & mask);
if (dim >= bestdim)
{
bestk = k;
bestdim = dim;
}
}
assert(bestk >= 0);
// split roughly in half, with the left split always being aligned to cluster size
size_t split = ((count / 2) + cluster_size - 1) / cluster_size * cluster_size;
assert(split > 0 && split < count);
// mark sides of split for partitioning
unsigned char* sides = static_cast<unsigned char*>(scratch) + count * sizeof(unsigned int);
for (size_t i = 0; i < split; ++i)
sides[axes[bestk][i]] = 0;
for (size_t i = split; i < count; ++i)
sides[axes[bestk][i]] = 1;
// partition all axes into two sides, maintaining order
unsigned int* temp = static_cast<unsigned int*>(scratch);
for (int k = 0; k < 3; ++k)
{
if (k == bestk)
continue;
unsigned int* axis = axes[k];
memcpy(temp, axis, sizeof(unsigned int) * count);
partitionPoints(axis, temp, sides, split, count);
}
splitPoints(destination, orderx, ordery, orderz, keys, split, scratch, cluster_size);
splitPoints(destination + split, orderx + split, ordery + split, orderz + split, keys, count - split, scratch, cluster_size);
}
} // namespace meshopt
void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
unsigned long long* keys = allocator.allocate<unsigned long long>(vertex_count);
computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ true);
unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count * 2); // 4b for order + 2b for keys
unsigned short* keyk = (unsigned short*)(scratch + vertex_count);
for (size_t i = 0; i < vertex_count; ++i)
destination[i] = unsigned(i);
unsigned int* order[] = {scratch, destination};
// 5-pass radix sort computes the resulting order into scratch
for (int k = 0; k < 5; ++k)
{
// copy 10-bit key segments into keyk to reduce cache pressure during radix pass
for (size_t i = 0; i < vertex_count; ++i)
keyk[i] = (unsigned short)((keys[i] >> (k * 10)) & 1023);
radixSort10(order[k % 2], order[(k + 1) % 2], keyk, vertex_count);
}
// since our remap table is mapping old=>new, we need to reverse it
for (size_t i = 0; i < vertex_count; ++i)
destination[scratch[i]] = unsigned(i);
}
void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
(void)vertex_count;
size_t face_count = index_count / 3;
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
meshopt_Allocator allocator;
float* centroids = allocator.allocate<float>(face_count * 3);
for (size_t i = 0; i < face_count; ++i)
{
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
const float* va = vertex_positions + a * vertex_stride_float;
const float* vb = vertex_positions + b * vertex_stride_float;
const float* vc = vertex_positions + c * vertex_stride_float;
centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f;
centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f;
centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f;
}
unsigned int* remap = allocator.allocate<unsigned int>(face_count);
meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3);
// support in-order remap
if (destination == indices)
{
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
indices = indices_copy;
}
for (size_t i = 0; i < face_count; ++i)
{
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
unsigned int r = remap[i];
destination[r * 3 + 0] = a;
destination[r * 3 + 1] = b;
destination[r * 3 + 2] = c;
}
}
void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size)
{
using namespace meshopt;
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
assert(cluster_size > 0);
meshopt_Allocator allocator;
unsigned long long* keys = allocator.allocate<unsigned long long>(vertex_count);
computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ false);
unsigned int* order = allocator.allocate<unsigned int>(vertex_count * 3);
unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count * 2); // 4b for order + 1b for side or 2b for keys
unsigned short* keyk = reinterpret_cast<unsigned short*>(scratch + vertex_count);
for (int k = 0; k < 3; ++k)
{
// copy 16-bit key segments into keyk to reduce cache pressure during radix pass
for (size_t i = 0; i < vertex_count; ++i)
keyk[i] = (unsigned short)(keys[i] >> (k * 20));
unsigned int hist[256][2];
computeHistogram(hist, keyk, vertex_count);
for (size_t i = 0; i < vertex_count; ++i)
order[k * vertex_count + i] = unsigned(i);
radixPass(scratch, order + k * vertex_count, keyk, vertex_count, hist, 0);
radixPass(order + k * vertex_count, scratch, keyk, vertex_count, hist, 1);
}
splitPoints(destination, order, order + vertex_count, order + 2 * vertex_count, keys, vertex_count, scratch, cluster_size);
}

296
thirdparty/meshoptimizer/stripifier.cpp vendored Normal file
View File

@@ -0,0 +1,296 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <limits.h>
#include <string.h>
// This work is based on:
// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996
namespace meshopt
{
static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned char* valence)
{
unsigned int index = 0;
unsigned int iv = ~0u;
for (size_t i = 0; i < buffer_size; ++i)
{
unsigned char va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]];
unsigned int v = (va < vb && va < vc) ? va : (vb < vc ? vb : vc);
if (v < iv)
{
index = unsigned(i);
iv = v;
}
}
return index;
}
static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1)
{
for (size_t i = 0; i < buffer_size; ++i)
{
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
if (e0 == a && e1 == b)
return (int(i) << 2) | 2;
else if (e0 == b && e1 == c)
return (int(i) << 2) | 0;
else if (e0 == c && e1 == a)
return (int(i) << 2) | 1;
}
return -1;
}
} // namespace meshopt
size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index)
{
assert(destination != indices);
assert(index_count % 3 == 0);
using namespace meshopt;
meshopt_Allocator allocator;
const size_t buffer_capacity = 8;
unsigned int buffer[buffer_capacity][3] = {};
unsigned int buffer_size = 0;
size_t index_offset = 0;
unsigned int strip[2] = {};
unsigned int parity = 0;
size_t strip_size = 0;
// compute vertex valence; this is used to prioritize starting triangle for strips
// note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic
unsigned char* valence = allocator.allocate<unsigned char>(vertex_count);
memset(valence, 0, vertex_count);
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);
valence[index]++;
}
int next = -1;
while (buffer_size > 0 || index_offset < index_count)
{
assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3));
// fill triangle buffer
while (buffer_size < buffer_capacity && index_offset < index_count)
{
buffer[buffer_size][0] = indices[index_offset + 0];
buffer[buffer_size][1] = indices[index_offset + 1];
buffer[buffer_size][2] = indices[index_offset + 2];
buffer_size++;
index_offset += 3;
}
assert(buffer_size > 0);
if (next >= 0)
{
unsigned int i = next >> 2;
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
unsigned int v = buffer[i][next & 3];
// ordered removal from the buffer
memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
buffer_size--;
// update vertex valences for strip start heuristic
valence[a]--;
valence[b]--;
valence[c]--;
// find next triangle (note that edge order flips on every iteration)
// in some cases we need to perform a swap to pick a different outgoing triangle edge
// for [a b c], the default strip edge is [b c], but we might want to use [a c]
int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]);
int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1;
if (cont < 0 && swap >= 0)
{
// [a b c] => [a b a c]
destination[strip_size++] = strip[0];
destination[strip_size++] = v;
// next strip has same winding
// ? a b => b a v
strip[1] = v;
next = swap;
}
else
{
// emit the next vertex in the strip
destination[strip_size++] = v;
// next strip has flipped winding
strip[0] = strip[1];
strip[1] = v;
parity ^= 1;
next = cont;
}
}
else
{
// if we didn't find anything, we need to find the next new triangle
// we use a heuristic to maximize the strip length
unsigned int i = findStripFirst(buffer, buffer_size, valence);
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
// ordered removal from the buffer
memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
buffer_size--;
// update vertex valences for strip start heuristic
valence[a]--;
valence[b]--;
valence[c]--;
// we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration
int ea = findStripNext(buffer, buffer_size, c, b);
int eb = findStripNext(buffer, buffer_size, a, c);
int ec = findStripNext(buffer, buffer_size, b, a);
// in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest
// triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear
// reasons - slightly improves the stripification efficiency
int mine = INT_MAX;
mine = (ea >= 0 && mine > ea) ? ea : mine;
mine = (eb >= 0 && mine > eb) ? eb : mine;
mine = (ec >= 0 && mine > ec) ? ec : mine;
if (ea == mine)
{
// keep abc
next = ea;
}
else if (eb == mine)
{
// abc -> bca
unsigned int t = a;
a = b, b = c, c = t;
next = eb;
}
else if (ec == mine)
{
// abc -> cab
unsigned int t = c;
c = b, b = a, a = t;
next = ec;
}
if (restart_index)
{
if (strip_size)
destination[strip_size++] = restart_index;
destination[strip_size++] = a;
destination[strip_size++] = b;
destination[strip_size++] = c;
// new strip always starts with the same edge winding
strip[0] = b;
strip[1] = c;
parity = 1;
}
else
{
if (strip_size)
{
// connect last strip using degenerate triangles
destination[strip_size++] = strip[1];
destination[strip_size++] = a;
}
// note that we may need to flip the emitted triangle based on parity
// we always end up with outgoing edge "cb" in the end
unsigned int e0 = parity ? c : b;
unsigned int e1 = parity ? b : c;
destination[strip_size++] = a;
destination[strip_size++] = e0;
destination[strip_size++] = e1;
strip[0] = e0;
strip[1] = e1;
parity ^= 1;
}
}
}
return strip_size;
}
size_t meshopt_stripifyBound(size_t index_count)
{
assert(index_count % 3 == 0);
// worst case without restarts is 2 degenerate indices and 3 indices per triangle
// worst case with restarts is 1 restart index and 3 indices per triangle
return (index_count / 3) * 5;
}
size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index)
{
assert(destination != indices);
size_t offset = 0;
size_t start = 0;
for (size_t i = 0; i < index_count; ++i)
{
if (restart_index && indices[i] == restart_index)
{
start = i + 1;
}
else if (i - start >= 2)
{
unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i];
// flip winding for odd triangles
if ((i - start) & 1)
{
unsigned int t = a;
a = b, b = t;
}
// although we use restart indices, strip swaps still produce degenerate triangles, so skip them
if (a != b && a != c && b != c)
{
destination[offset + 0] = a;
destination[offset + 1] = b;
destination[offset + 2] = c;
offset += 3;
}
}
}
return offset;
}
size_t meshopt_unstripifyBound(size_t index_count)
{
assert(index_count == 0 || index_count >= 3);
return (index_count == 0) ? 0 : (index_count - 2) * 3;
}

View File

@@ -0,0 +1,467 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <string.h>
// This work is based on:
// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
namespace meshopt
{
const size_t kCacheSizeMax = 16;
const size_t kValenceMax = 8;
struct VertexScoreTable
{
float cache[1 + kCacheSizeMax];
float live[1 + kValenceMax];
};
// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
static const VertexScoreTable kVertexScoreTable = {
{0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
{0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
};
// Tuned to minimize the encoded index buffer size
static const VertexScoreTable kVertexScoreTableStrip = {
{0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
{0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
};
struct TriangleAdjacency
{
unsigned int* counts;
unsigned int* offsets;
unsigned int* data;
};
static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
{
size_t face_count = index_count / 3;
// allocate arrays
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
adjacency.data = allocator.allocate<unsigned int>(index_count);
// fill triangle counts
memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
for (size_t i = 0; i < index_count; ++i)
{
assert(indices[i] < vertex_count);
adjacency.counts[indices[i]]++;
}
// fill offset table
unsigned int offset = 0;
for (size_t i = 0; i < vertex_count; ++i)
{
adjacency.offsets[i] = offset;
offset += adjacency.counts[i];
}
assert(offset == index_count);
// fill triangle data
for (size_t i = 0; i < face_count; ++i)
{
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
}
// fix offsets that have been disturbed by the previous pass
for (size_t i = 0; i < vertex_count; ++i)
{
assert(adjacency.offsets[i] >= adjacency.counts[i]);
adjacency.offsets[i] -= adjacency.counts[i];
}
}
static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
{
// check dead-end stack
while (dead_end_top)
{
unsigned int vertex = dead_end[--dead_end_top];
if (live_triangles[vertex] > 0)
return vertex;
}
// input order
while (input_cursor < vertex_count)
{
if (live_triangles[input_cursor] > 0)
return input_cursor;
++input_cursor;
}
return ~0u;
}
static unsigned int getNextVertexNeighbor(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
{
unsigned int best_candidate = ~0u;
int best_priority = -1;
for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
{
unsigned int vertex = *next_candidate;
// otherwise we don't need to process it
if (live_triangles[vertex] > 0)
{
int priority = 0;
// will it be in cache after fanning?
if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
{
priority = timestamp - cache_timestamps[vertex]; // position in cache
}
if (priority > best_priority)
{
best_candidate = vertex;
best_priority = priority;
}
}
}
return best_candidate;
}
static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
{
assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
}
static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
{
// input order
while (input_cursor < face_count)
{
if (!emitted_flags[input_cursor])
return input_cursor;
++input_cursor;
}
return ~0u;
}
} // namespace meshopt
void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
{
using namespace meshopt;
assert(index_count % 3 == 0);
meshopt_Allocator allocator;
// guard for empty meshes
if (index_count == 0 || vertex_count == 0)
return;
// support in-place optimization
if (destination == indices)
{
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
indices = indices_copy;
}
unsigned int cache_size = 16;
assert(cache_size <= kCacheSizeMax);
size_t face_count = index_count / 3;
// build adjacency information
TriangleAdjacency adjacency = {};
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
// live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match
unsigned int* live_triangles = adjacency.counts;
// emitted flags
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
memset(emitted_flags, 0, face_count);
// compute initial vertex scores
float* vertex_scores = allocator.allocate<float>(vertex_count);
for (size_t i = 0; i < vertex_count; ++i)
vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
// compute triangle scores
float* triangle_scores = allocator.allocate<float>(face_count);
for (size_t i = 0; i < face_count; ++i)
{
unsigned int a = indices[i * 3 + 0];
unsigned int b = indices[i * 3 + 1];
unsigned int c = indices[i * 3 + 2];
triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
}
unsigned int cache_holder[2 * (kCacheSizeMax + 4)];
unsigned int* cache = cache_holder;
unsigned int* cache_new = cache_holder + kCacheSizeMax + 4;
size_t cache_count = 0;
unsigned int current_triangle = 0;
unsigned int input_cursor = 1;
unsigned int output_triangle = 0;
while (current_triangle != ~0u)
{
assert(output_triangle < face_count);
unsigned int a = indices[current_triangle * 3 + 0];
unsigned int b = indices[current_triangle * 3 + 1];
unsigned int c = indices[current_triangle * 3 + 2];
// output indices
destination[output_triangle * 3 + 0] = a;
destination[output_triangle * 3 + 1] = b;
destination[output_triangle * 3 + 2] = c;
output_triangle++;
// update emitted flags
emitted_flags[current_triangle] = true;
triangle_scores[current_triangle] = 0;
// new triangle
size_t cache_write = 0;
cache_new[cache_write++] = a;
cache_new[cache_write++] = b;
cache_new[cache_write++] = c;
// old triangles
for (size_t i = 0; i < cache_count; ++i)
{
unsigned int index = cache[i];
cache_new[cache_write] = index;
cache_write += (index != a) & (index != b) & (index != c);
}
unsigned int* cache_temp = cache;
cache = cache_new, cache_new = cache_temp;
cache_count = cache_write > cache_size ? cache_size : cache_write;
// remove emitted triangle from adjacency data
// this makes sure that we spend less time traversing these lists on subsequent iterations
// live triangle counts are updated as a byproduct of these adjustments
for (size_t k = 0; k < 3; ++k)
{
unsigned int index = indices[current_triangle * 3 + k];
unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
size_t neighbors_size = adjacency.counts[index];
for (size_t i = 0; i < neighbors_size; ++i)
{
unsigned int tri = neighbors[i];
if (tri == current_triangle)
{
neighbors[i] = neighbors[neighbors_size - 1];
adjacency.counts[index]--;
break;
}
}
}
unsigned int best_triangle = ~0u;
float best_score = 0;
// update cache positions, vertex scores and triangle scores, and find next best triangle
for (size_t i = 0; i < cache_write; ++i)
{
unsigned int index = cache[i];
// no need to update scores if we are never going to use this vertex
if (adjacency.counts[index] == 0)
continue;
int cache_position = i >= cache_size ? -1 : int(i);
// update vertex score
float score = vertexScore(table, cache_position, live_triangles[index]);
float score_diff = score - vertex_scores[index];
vertex_scores[index] = score;
// update scores of vertex triangles
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[index];
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[index];
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
{
unsigned int tri = *it;
assert(!emitted_flags[tri]);
float tri_score = triangle_scores[tri] + score_diff;
assert(tri_score > 0);
best_triangle = best_score < tri_score ? tri : best_triangle;
best_score = best_score < tri_score ? tri_score : best_score;
triangle_scores[tri] = tri_score;
}
}
// step through input triangles in order if we hit a dead-end
current_triangle = best_triangle;
if (current_triangle == ~0u)
{
current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
}
}
assert(input_cursor == face_count);
assert(output_triangle == face_count);
}
void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
{
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
}
void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
{
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
}
void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(cache_size >= 3);
meshopt_Allocator allocator;
// guard for empty meshes
if (index_count == 0 || vertex_count == 0)
return;
// support in-place optimization
if (destination == indices)
{
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
indices = indices_copy;
}
size_t face_count = index_count / 3;
// build adjacency information
TriangleAdjacency adjacency = {};
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
// live triangle counts
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
// cache time stamps
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
// dead-end stack
unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
unsigned int dead_end_top = 0;
// emitted flags
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
memset(emitted_flags, 0, face_count);
unsigned int current_vertex = 0;
unsigned int timestamp = cache_size + 1;
unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
unsigned int output_triangle = 0;
while (current_vertex != ~0u)
{
const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
// emit all vertex neighbors
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[current_vertex];
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
{
unsigned int triangle = *it;
if (!emitted_flags[triangle])
{
unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
// output indices
destination[output_triangle * 3 + 0] = a;
destination[output_triangle * 3 + 1] = b;
destination[output_triangle * 3 + 2] = c;
output_triangle++;
// update dead-end stack
dead_end[dead_end_top + 0] = a;
dead_end[dead_end_top + 1] = b;
dead_end[dead_end_top + 2] = c;
dead_end_top += 3;
// update live triangle counts
live_triangles[a]--;
live_triangles[b]--;
live_triangles[c]--;
// update cache info
// if vertex is not in cache, put it in cache
if (timestamp - cache_timestamps[a] > cache_size)
cache_timestamps[a] = timestamp++;
if (timestamp - cache_timestamps[b] > cache_size)
cache_timestamps[b] = timestamp++;
if (timestamp - cache_timestamps[c] > cache_size)
cache_timestamps[c] = timestamp++;
// update emitted flags
emitted_flags[triangle] = true;
}
}
// next candidates are the ones we pushed to dead-end stack just now
const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
// get next vertex
current_vertex = getNextVertexNeighbor(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
if (current_vertex == ~0u)
{
current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
}
}
assert(output_triangle == face_count);
}

1910
thirdparty/meshoptimizer/vertexcodec.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1047
thirdparty/meshoptimizer/vertexfilter.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,74 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"
#include <assert.h>
#include <string.h>
size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
{
assert(index_count % 3 == 0);
memset(destination, -1, vertex_count * sizeof(unsigned int));
unsigned int next_vertex = 0;
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);
if (destination[index] == ~0u)
{
destination[index] = next_vertex++;
}
}
assert(next_vertex <= vertex_count);
return next_vertex;
}
size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
{
assert(index_count % 3 == 0);
assert(vertex_size > 0 && vertex_size <= 256);
meshopt_Allocator allocator;
// support in-place optimization
if (destination == vertices)
{
unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
memcpy(vertices_copy, vertices, vertex_count * vertex_size);
vertices = vertices_copy;
}
// build vertex remap table
unsigned int* vertex_remap = allocator.allocate<unsigned int>(vertex_count);
memset(vertex_remap, -1, vertex_count * sizeof(unsigned int));
unsigned int next_vertex = 0;
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
assert(index < vertex_count);
unsigned int& remap = vertex_remap[index];
if (remap == ~0u) // vertex was not added to destination VB
{
// add vertex
memcpy(static_cast<unsigned char*>(destination) + next_vertex * vertex_size, static_cast<const unsigned char*>(vertices) + index * vertex_size, vertex_size);
remap = next_vertex++;
}
// modify indices in place
indices[i] = remap;
}
assert(next_vertex <= vertex_count);
return next_vertex;
}