initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

5
thirdparty/etcpak/AUTHORS.txt vendored Normal file
View File

@@ -0,0 +1,5 @@
Bartosz Taudul <wolf@nereid.pl>
Daniel Jungmann <el.3d.source@gmail.com>
Florian Penzkofer <fp@nullptr.de>
Jae-Ho Nah <nahjaeho@gmail.com>
Marcin Ławicki <marcin.lawicki@gmail.com>

797
thirdparty/etcpak/DecodeRGB.cpp vendored Normal file
View File

@@ -0,0 +1,797 @@
#include "DecodeRGB.hpp"
#include "Tables.hpp"
#include "Math.hpp"
#include <string.h>
#ifdef __ARM_NEON
# include <arm_neon.h>
#endif
#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER
# ifdef _MSC_VER
# include <intrin.h>
# include <Windows.h>
# define _bswap(x) _byteswap_ulong(x)
# define _bswap64(x) _byteswap_uint64(x)
# else
# include <x86intrin.h>
# endif
#endif
#ifndef _bswap
# define _bswap(x) __builtin_bswap32(x)
# define _bswap64(x) __builtin_bswap64(x)
#endif
static uint8_t table59T58H[8] = { 3,6,11,16,23,32,41,64 };
namespace
{
static etcpak_force_inline int32_t expand6(uint32_t value)
{
return (value << 2) | (value >> 4);
}
static etcpak_force_inline int32_t expand7(uint32_t value)
{
return (value << 1) | (value >> 6);
}
static etcpak_force_inline void DecodeT( uint64_t block, uint32_t* dst, uint32_t w )
{
const auto r0 = ( block >> 24 ) & 0x1B;
const auto rh0 = ( r0 >> 3 ) & 0x3;
const auto rl0 = r0 & 0x3;
const auto g0 = ( block >> 20 ) & 0xF;
const auto b0 = ( block >> 16 ) & 0xF;
const auto r1 = ( block >> 12 ) & 0xF;
const auto g1 = ( block >> 8 ) & 0xF;
const auto b1 = ( block >> 4 ) & 0xF;
const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
const auto cg0 = ( g0 << 4 ) | g0;
const auto cb0 = ( b0 << 4 ) | b0;
const auto cr1 = ( r1 << 4 ) | r1;
const auto cg1 = ( g1 << 4 ) | g1;
const auto cb1 = ( b1 << 4 ) | b1;
const auto codeword_hi = ( block >> 2 ) & 0x3;
const auto codeword_lo = block & 0x1;
const auto codeword = ( codeword_hi << 1 ) | codeword_lo;
const auto c2r = clampu8( cr1 + table59T58H[codeword] );
const auto c2g = clampu8( cg1 + table59T58H[codeword] );
const auto c2b = clampu8( cb1 + table59T58H[codeword] );
const auto c3r = clampu8( cr1 - table59T58H[codeword] );
const auto c3g = clampu8( cg1 - table59T58H[codeword] );
const auto c3b = clampu8( cb1 - table59T58H[codeword] );
const uint32_t col_tab[4] = {
uint32_t( cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000 ),
uint32_t( c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000 ),
uint32_t( cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000 ),
uint32_t( c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000 )
};
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
for( uint8_t j = 0; j < 4; j++ )
{
for( uint8_t i = 0; i < 4; i++ )
{
//2bit indices distributed on two lane 16bit numbers
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1) | ( ( indexes >> ( j + i * 4 ) ) & 0x1);
dst[j * w + i] = col_tab[index];
}
}
}
static etcpak_force_inline void DecodeTAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
{
const auto r0 = ( block >> 24 ) & 0x1B;
const auto rh0 = ( r0 >> 3 ) & 0x3;
const auto rl0 = r0 & 0x3;
const auto g0 = ( block >> 20 ) & 0xF;
const auto b0 = ( block >> 16 ) & 0xF;
const auto r1 = ( block >> 12 ) & 0xF;
const auto g1 = ( block >> 8 ) & 0xF;
const auto b1 = ( block >> 4 ) & 0xF;
const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
const auto cg0 = ( g0 << 4 ) | g0;
const auto cb0 = ( b0 << 4 ) | b0;
const auto cr1 = ( r1 << 4 ) | r1;
const auto cg1 = ( g1 << 4 ) | g1;
const auto cb1 = ( b1 << 4 ) | b1;
const auto codeword_hi = ( block >> 2 ) & 0x3;
const auto codeword_lo = block & 0x1;
const auto codeword = (codeword_hi << 1) | codeword_lo;
const int32_t base = alpha >> 56;
const int32_t mul = ( alpha >> 52 ) & 0xF;
const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
const auto c2r = clampu8( cr1 + table59T58H[codeword] );
const auto c2g = clampu8( cg1 + table59T58H[codeword] );
const auto c2b = clampu8( cb1 + table59T58H[codeword] );
const auto c3r = clampu8( cr1 - table59T58H[codeword] );
const auto c3g = clampu8( cg1 - table59T58H[codeword] );
const auto c3b = clampu8( cb1 - table59T58H[codeword] );
const uint32_t col_tab[4] = {
uint32_t( cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) ),
uint32_t( c2r | ( c2g << 8 ) | ( c2b << 16 ) ),
uint32_t( cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) ),
uint32_t( c3r | ( c3g << 8 ) | ( c3b << 16 ) )
};
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
for( uint8_t j = 0; j < 4; j++ )
{
for( uint8_t i = 0; i < 4; i++ )
{
//2bit indices distributed on two lane 16bit numbers
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12 ) ) & 0x7];
const uint32_t a = clampu8( base + amod * mul );
dst[j * w + i] = col_tab[index] | ( a << 24 );
}
}
}
static etcpak_force_inline void DecodeH( uint64_t block, uint32_t* dst, uint32_t w )
{
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
const auto r0444 = ( block >> 27 ) & 0xF;
const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
const auto r1444 = ( block >> 11 ) & 0xF;
const auto g1444 = ( block >> 7 ) & 0xF;
const auto b1444 = ( block >> 3 ) & 0xF;
const auto r0 = ( r0444 << 4 ) | r0444;
const auto g0 = ( g0444 << 4 ) | g0444;
const auto b0 = ( b0444 << 4 ) | b0444;
const auto r1 = ( r1444 << 4 ) | r1444;
const auto g1 = ( g1444 << 4 ) | g1444;
const auto b1 = ( b1444 << 4 ) | b1444;
const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
const auto codeword = codeword_hi | codeword_lo;
const uint32_t col_tab[] = {
uint32_t( clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ) ),
uint32_t( clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ) ),
uint32_t( clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ) ),
uint32_t( clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) )
};
for( uint8_t j = 0; j < 4; j++ )
{
for( uint8_t i = 0; i < 4; i++ )
{
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
dst[j * w + i] = col_tab[index] | 0xFF000000;
}
}
}
static etcpak_force_inline void DecodeHAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
{
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
const auto r0444 = ( block >> 27 ) & 0xF;
const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
const auto r1444 = ( block >> 11 ) & 0xF;
const auto g1444 = ( block >> 7 ) & 0xF;
const auto b1444 = ( block >> 3 ) & 0xF;
const auto r0 = ( r0444 << 4 ) | r0444;
const auto g0 = ( g0444 << 4 ) | g0444;
const auto b0 = ( b0444 << 4 ) | b0444;
const auto r1 = ( r1444 << 4 ) | r1444;
const auto g1 = ( g1444 << 4 ) | g1444;
const auto b1 = ( b1444 << 4 ) | b1444;
const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
const auto codeword = codeword_hi | codeword_lo;
const int32_t base = alpha >> 56;
const int32_t mul = ( alpha >> 52 ) & 0xF;
const auto tbl = g_alpha[(alpha >> 48) & 0xF];
const uint32_t col_tab[] = {
uint32_t( clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ) ),
uint32_t( clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ) ),
uint32_t( clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ) ),
uint32_t( clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) )
};
for( uint8_t j = 0; j < 4; j++ )
{
for( uint8_t i = 0; i < 4; i++ )
{
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12) ) & 0x7];
const uint32_t a = clampu8( base + amod * mul );
dst[j * w + i] = col_tab[index] | ( a << 24 );
}
}
}
static etcpak_force_inline void DecodePlanar( uint64_t block, uint32_t* dst, uint32_t w )
{
const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
const auto rv = expand6((block >> (13 + 32)) & 0x3F);
const auto bh = expand6((block >> (19 + 32)) & 0x3F);
const auto gh = expand7((block >> (25 + 32)) & 0x7F);
const auto rh0 = (block >> (32 - 32)) & 0x01;
const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
const auto rh = expand6(rh0 | rh1);
const auto bo0 = (block >> (39 - 32)) & 0x07;
const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
const auto bo = expand6(bo0 | bo1 | bo2);
const auto go0 = (block >> (49 - 32)) & 0x3F;
const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
const auto go = expand7(go0 | go1);
const auto ro = expand6((block >> (57 - 32)) & 0x3F);
#ifdef __ARM_NEON
uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 ) | ( uint64_t(0xFFF) << 48 );
int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
for( int j=0; j<4; j++ )
{
for( int i=0; i<4; i++ )
{
uint8x8_t c = vqshrun_n_s16( col, 2 );
vst1_lane_u32( dst+j*w+i, vreinterpret_u32_u8( c ), 0 );
col = vaddq_s16( col, chco );
}
col = vaddq_s16( col, cvco );
}
#elif defined __AVX2__
const auto R0 = 4*ro+2;
const auto G0 = 4*go+2;
const auto B0 = 4*bo+2;
const auto RHO = rh-ro;
const auto GHO = gh-go;
const auto BHO = bh-bo;
__m256i cvco = _mm256_setr_epi16( rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0 );
__m256i col = _mm256_setr_epi16( R0, G0, B0, 0xFFF, R0+RHO, G0+GHO, B0+BHO, 0xFFF, R0+2*RHO, G0+2*GHO, B0+2*BHO, 0xFFF, R0+3*RHO, G0+3*GHO, B0+3*BHO, 0xFFF );
for( int j=0; j<4; j++ )
{
__m256i c = _mm256_srai_epi16( col, 2 );
__m128i s = _mm_packus_epi16( _mm256_castsi256_si128( c ), _mm256_extracti128_si256( c, 1 ) );
_mm_storeu_si128( (__m128i*)(dst+j*w), s );
col = _mm256_add_epi16( col, cvco );
}
#elif defined __SSE4_1__
__m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
__m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
__m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0xFFF, 0, 0, 0, 0 );
for( int j=0; j<4; j++ )
{
for( int i=0; i<4; i++ )
{
__m128i c = _mm_srai_epi16( col, 2 );
__m128i s = _mm_packus_epi16( c, c );
dst[j*w+i] = _mm_cvtsi128_si32( s );
col = _mm_add_epi16( col, chco );
}
col = _mm_add_epi16( col, cvco );
}
#else
for( int j=0; j<4; j++ )
{
for( int i=0; i<4; i++ )
{
const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
if( ( ( r | g | b ) & ~0xFF ) == 0 )
{
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
}
else
{
const auto rc = clampu8( r );
const auto gc = clampu8( g );
const auto bc = clampu8( b );
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
}
}
}
#endif
}
static etcpak_force_inline void DecodePlanarAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
{
const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
const auto rv = expand6((block >> (13 + 32)) & 0x3F);
const auto bh = expand6((block >> (19 + 32)) & 0x3F);
const auto gh = expand7((block >> (25 + 32)) & 0x7F);
const auto rh0 = (block >> (32 - 32)) & 0x01;
const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
const auto rh = expand6(rh0 | rh1);
const auto bo0 = (block >> (39 - 32)) & 0x07;
const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
const auto bo = expand6(bo0 | bo1 | bo2);
const auto go0 = (block >> (49 - 32)) & 0x3F;
const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
const auto go = expand7(go0 | go1);
const auto ro = expand6((block >> (57 - 32)) & 0x3F);
const int32_t base = alpha >> 56;
const int32_t mul = ( alpha >> 52 ) & 0xF;
const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
#ifdef __ARM_NEON
uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 );
int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
for( int j=0; j<4; j++ )
{
for( int i=0; i<4; i++ )
{
const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t a = clampu8( base + amod * mul );
uint8x8_t c = vqshrun_n_s16( col, 2 );
dst[j*w+i] = vget_lane_u32( vreinterpret_u32_u8( c ), 0 ) | ( a << 24 );
col = vaddq_s16( col, chco );
}
col = vaddq_s16( col, cvco );
}
#elif defined __SSE4_1__
__m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
__m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
__m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0, 0, 0, 0, 0 );
for( int j=0; j<4; j++ )
{
for( int i=0; i<4; i++ )
{
const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t a = clampu8( base + amod * mul );
__m128i c = _mm_srai_epi16( col, 2 );
__m128i s = _mm_packus_epi16( c, c );
dst[j*w+i] = _mm_cvtsi128_si32( s ) | ( a << 24 );
col = _mm_add_epi16( col, chco );
}
col = _mm_add_epi16( col, cvco );
}
#else
for (auto j = 0; j < 4; j++)
{
for (auto i = 0; i < 4; i++)
{
const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t a = clampu8( base + amod * mul );
if( ( ( r | g | b ) & ~0xFF ) == 0 )
{
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
}
else
{
const auto rc = clampu8( r );
const auto gc = clampu8( g );
const auto bc = clampu8( b );
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
}
}
}
#endif
}
}
static etcpak_force_inline uint64_t ConvertByteOrder( uint64_t d )
{
uint32_t word[2];
memcpy( word, &d, 8 );
word[0] = _bswap( word[0] );
word[1] = _bswap( word[1] );
memcpy( &d, word, 8 );
return d;
}
static etcpak_force_inline void DecodeRGBPart( uint64_t d, uint32_t* dst, uint32_t w )
{
d = ConvertByteOrder( d );
uint32_t br[2], bg[2], bb[2];
if( d & 0x2 )
{
int32_t dr, dg, db;
uint32_t r0 = ( d & 0xF8000000 ) >> 27;
uint32_t g0 = ( d & 0x00F80000 ) >> 19;
uint32_t b0 = ( d & 0x0000F800 ) >> 11;
dr = ( int32_t(d) << 5 ) >> 29;
dg = ( int32_t(d) << 13 ) >> 29;
db = ( int32_t(d) << 21 ) >> 29;
int32_t r1 = int32_t(r0) + dr;
int32_t g1 = int32_t(g0) + dg;
int32_t b1 = int32_t(b0) + db;
// T mode
if ( (r1 < 0) || (r1 > 31) )
{
DecodeT( d, dst, w );
return;
}
// H mode
if ((g1 < 0) || (g1 > 31))
{
DecodeH( d, dst, w );
return;
}
// P mode
if( (b1 < 0) || (b1 > 31) )
{
DecodePlanar( d, dst, w );
return;
}
br[0] = ( r0 << 3 ) | ( r0 >> 2 );
br[1] = ( r1 << 3 ) | ( r1 >> 2 );
bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
}
else
{
br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
bb[0] = ( ( d & 0x0000F000 ) >> 8 ) | ( ( d & 0x0000F000 ) >> 12 );
bb[1] = ( ( d & 0x00000F00 ) >> 4 ) | ( ( d & 0x00000F00 ) >> 8 );
}
unsigned int tcw[2];
tcw[0] = ( d & 0xE0 ) >> 5;
tcw[1] = ( d & 0x1C ) >> 2;
uint32_t b1 = ( d >> 32 ) & 0xFFFF;
uint32_t b2 = ( d >> 48 );
b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
uint32_t idx = b1 | ( b2 << 1 );
if( d & 0x1 )
{
for( int i=0; i<4; i++ )
{
for( int j=0; j<4; j++ )
{
const auto mod = g_table[tcw[j/2]][idx & 0x3];
const auto r = br[j/2] + mod;
const auto g = bg[j/2] + mod;
const auto b = bb[j/2] + mod;
if( ( ( r | g | b ) & ~0xFF ) == 0 )
{
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
}
else
{
const auto rc = clampu8( r );
const auto gc = clampu8( g );
const auto bc = clampu8( b );
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
}
idx >>= 2;
}
}
}
else
{
for( int i=0; i<4; i++ )
{
const auto tbl = g_table[tcw[i/2]];
const auto cr = br[i/2];
const auto cg = bg[i/2];
const auto cb = bb[i/2];
for( int j=0; j<4; j++ )
{
const auto mod = tbl[idx & 0x3];
const auto r = cr + mod;
const auto g = cg + mod;
const auto b = cb + mod;
if( ( ( r | g | b ) & ~0xFF ) == 0 )
{
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
}
else
{
const auto rc = clampu8( r );
const auto gc = clampu8( g );
const auto bc = clampu8( b );
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
}
idx >>= 2;
}
}
}
}
static etcpak_force_inline void DecodeRGBAPart( uint64_t d, uint64_t alpha, uint32_t* dst, uint32_t w )
{
d = ConvertByteOrder( d );
alpha = _bswap64( alpha );
uint32_t br[2], bg[2], bb[2];
if( d & 0x2 )
{
int32_t dr, dg, db;
uint32_t r0 = ( d & 0xF8000000 ) >> 27;
uint32_t g0 = ( d & 0x00F80000 ) >> 19;
uint32_t b0 = ( d & 0x0000F800 ) >> 11;
dr = ( int32_t(d) << 5 ) >> 29;
dg = ( int32_t(d) << 13 ) >> 29;
db = ( int32_t(d) << 21 ) >> 29;
int32_t r1 = int32_t(r0) + dr;
int32_t g1 = int32_t(g0) + dg;
int32_t b1 = int32_t(b0) + db;
// T mode
if ( (r1 < 0) || (r1 > 31) )
{
DecodeTAlpha( d, alpha, dst, w );
return;
}
// H mode
if ( (g1 < 0) || (g1 > 31) )
{
DecodeHAlpha( d, alpha, dst, w );
return;
}
// P mode
if ( (b1 < 0) || (b1 > 31) )
{
DecodePlanarAlpha( d, alpha, dst, w );
return;
}
br[0] = ( r0 << 3 ) | ( r0 >> 2 );
br[1] = ( r1 << 3 ) | ( r1 >> 2 );
bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
}
else
{
br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
bb[0] = ( ( d & 0x0000F000 ) >> 8 ) | ( ( d & 0x0000F000 ) >> 12 );
bb[1] = ( ( d & 0x00000F00 ) >> 4 ) | ( ( d & 0x00000F00 ) >> 8 );
}
unsigned int tcw[2];
tcw[0] = ( d & 0xE0 ) >> 5;
tcw[1] = ( d & 0x1C ) >> 2;
uint32_t b1 = ( d >> 32 ) & 0xFFFF;
uint32_t b2 = ( d >> 48 );
b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
uint32_t idx = b1 | ( b2 << 1 );
const int32_t base = alpha >> 56;
const int32_t mul = ( alpha >> 52 ) & 0xF;
const auto atbl = g_alpha[( alpha >> 48 ) & 0xF];
if( d & 0x1 )
{
for( int i=0; i<4; i++ )
{
for( int j=0; j<4; j++ )
{
const auto mod = g_table[tcw[j/2]][idx & 0x3];
const auto r = br[j/2] + mod;
const auto g = bg[j/2] + mod;
const auto b = bb[j/2] + mod;
const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t a = clampu8( base + amod * mul );
if( ( ( r | g | b ) & ~0xFF ) == 0 )
{
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
}
else
{
const auto rc = clampu8( r );
const auto gc = clampu8( g );
const auto bc = clampu8( b );
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
}
idx >>= 2;
}
}
}
else
{
for( int i=0; i<4; i++ )
{
const auto tbl = g_table[tcw[i/2]];
const auto cr = br[i/2];
const auto cg = bg[i/2];
const auto cb = bb[i/2];
for( int j=0; j<4; j++ )
{
const auto mod = tbl[idx & 0x3];
const auto r = cr + mod;
const auto g = cg + mod;
const auto b = cb + mod;
const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t a = clampu8( base + amod * mul );
if( ( ( r | g | b ) & ~0xFF ) == 0 )
{
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
}
else
{
const auto rc = clampu8( r );
const auto gc = clampu8( g );
const auto bc = clampu8( b );
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
}
idx >>= 2;
}
}
}
}
static etcpak_force_inline void DecodeRPart( uint64_t r, uint32_t* dst, uint32_t w )
{
r = _bswap64( r );
const int32_t base = ( r >> 56 )*8+4;
const int32_t mul = ( r >> 52 ) & 0xF;
const auto atbl = g_alpha[( r >> 48 ) & 0xF];
for( int i=0; i<4; i++ )
{
for ( int j=0; j<4; j++ )
{
const auto amod = atbl[(r >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t rc = clampu8( ( base + amod * g_alpha11Mul[mul] )/8 );
dst[j*w+i] = rc | 0xFF000000;
}
}
}
static etcpak_force_inline void DecodeRGPart( uint64_t r, uint64_t g, uint32_t* dst, uint32_t w )
{
r = _bswap64( r );
g = _bswap64( g );
const int32_t rbase = ( r >> 56 )*8+4;
const int32_t rmul = ( r >> 52 ) & 0xF;
const auto rtbl = g_alpha[( r >> 48 ) & 0xF];
const int32_t gbase = ( g >> 56 )*8+4;
const int32_t gmul = ( g >> 52 ) & 0xF;
const auto gtbl = g_alpha[( g >> 48 ) & 0xF];
for( int i=0; i<4; i++ )
{
for( int j=0; j<4; j++ )
{
const auto rmod = rtbl[(r >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t rc = clampu8( ( rbase + rmod * g_alpha11Mul[rmul] )/8 );
const auto gmod = gtbl[(g >> ( 45 - j*3 - i*12 )) & 0x7];
const uint32_t gc = clampu8( ( gbase + gmod * g_alpha11Mul[gmul] )/8 );
dst[j*w+i] = rc | (gc << 8) | 0xFF000000;
}
}
}
void DecodeRBlock( const void* src, void* dst, size_t width )
{
uint64_t* srcPtr = (uint64_t*)src;
uint64_t r = *srcPtr++;
DecodeRPart( r, (uint32_t*)dst, width );
}
void DecodeRGBlock( const void* src, void* dst, size_t width )
{
uint64_t* srcPtr = (uint64_t*)src;
uint64_t r = *srcPtr++;
uint64_t g = *srcPtr++;
DecodeRGPart( r, g, (uint32_t*)dst, width );
}
void DecodeRGBBlock( const void* src, void* dst, size_t width )
{
uint64_t* srcPtr = (uint64_t*)src;
uint64_t d = *srcPtr++;
DecodeRGBPart( d, (uint32_t*)dst, width );
}
void DecodeRGBABlock( const void* src, void* dst, size_t width )
{
uint64_t* srcPtr = (uint64_t*)src;
uint64_t a = *srcPtr++;
uint64_t d = *srcPtr++;
DecodeRGBAPart( d, a, (uint32_t*)dst, width );
}

12
thirdparty/etcpak/DecodeRGB.hpp vendored Normal file
View File

@@ -0,0 +1,12 @@
#ifndef __DECODERGB_HPP__
#define __DECODERGB_HPP__
#include <stddef.h>
#include <stdint.h>
void DecodeRBlock( const void* src, void* dst, size_t width );
void DecodeRGBlock( const void* src, void* dst, size_t width );
void DecodeRGBBlock( const void* src, void* dst, size_t width );
void DecodeRGBABlock( const void* src, void* dst, size_t width );
#endif

120
thirdparty/etcpak/Dither.cpp vendored Normal file
View File

@@ -0,0 +1,120 @@
#include <algorithm>
#include <string.h>
#include "Dither.hpp"
#include "Math.hpp"
#ifdef __SSE4_1__
# ifdef _MSC_VER
# include <intrin.h>
# include <Windows.h>
# else
# include <x86intrin.h>
# endif
#endif
#ifdef __AVX2__
void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
{
static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
const __m256i BayerAdd0 = _mm256_setr_epi8(
a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
);
const __m256i BayerAdd1 = _mm256_setr_epi8(
a31[8], a63[8], a31[8], 0, a31[9], a63[9], a31[9], 0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
);
const __m256i BayerSub0 = _mm256_setr_epi8(
s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
);
const __m256i BayerSub1 = _mm256_setr_epi8(
s31[8], s63[8], s31[8], 0, s31[9], s63[9], s31[9], 0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
);
__m256i l0 = _mm256_inserti128_si256( _mm256_castsi128_si256( px0 ), px1, 1 );
__m256i l1 = _mm256_inserti128_si256( _mm256_castsi128_si256( px2 ), px3, 1 );
__m256i a0 = _mm256_adds_epu8( l0, BayerAdd0 );
__m256i a1 = _mm256_adds_epu8( l1, BayerAdd1 );
__m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
__m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
_mm256_storeu_si256( (__m256i*)(data ), s0 );
_mm256_storeu_si256( (__m256i*)(data+32), s1 );
}
#endif
void Dither( uint8_t* data )
{
#ifdef __AVX2__
static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
const __m256i BayerAdd0 = _mm256_setr_epi8(
a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
);
const __m256i BayerAdd1 = _mm256_setr_epi8(
a31[8], a63[8], a31[8], 0, a31[9], a63[9], a31[9], 0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
);
const __m256i BayerSub0 = _mm256_setr_epi8(
s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
);
const __m256i BayerSub1 = _mm256_setr_epi8(
s31[8], s63[8], s31[8], 0, s31[9], s63[9], s31[9], 0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
);
__m256i px0 = _mm256_loadu_si256( (__m256i*)(data ) );
__m256i px1 = _mm256_loadu_si256( (__m256i*)(data+32) );
__m256i a0 = _mm256_adds_epu8( px0, BayerAdd0 );
__m256i a1 = _mm256_adds_epu8( px1, BayerAdd1 );
__m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
__m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
_mm256_storeu_si256( (__m256i*)(data ), s0 );
_mm256_storeu_si256( (__m256i*)(data+32), s1 );
#else
static constexpr int8_t Bayer31[16] = {
( 0-8)*2/3, ( 8-8)*2/3, ( 2-8)*2/3, (10-8)*2/3,
(12-8)*2/3, ( 4-8)*2/3, (14-8)*2/3, ( 6-8)*2/3,
( 3-8)*2/3, (11-8)*2/3, ( 1-8)*2/3, ( 9-8)*2/3,
(15-8)*2/3, ( 7-8)*2/3, (13-8)*2/3, ( 5-8)*2/3
};
static constexpr int8_t Bayer63[16] = {
( 0-8)*2/6, ( 8-8)*2/6, ( 2-8)*2/6, (10-8)*2/6,
(12-8)*2/6, ( 4-8)*2/6, (14-8)*2/6, ( 6-8)*2/6,
( 3-8)*2/6, (11-8)*2/6, ( 1-8)*2/6, ( 9-8)*2/6,
(15-8)*2/6, ( 7-8)*2/6, (13-8)*2/6, ( 5-8)*2/6
};
for( int i=0; i<16; i++ )
{
uint32_t col;
memcpy( &col, data, 4 );
uint8_t r = col & 0xFF;
uint8_t g = ( col >> 8 ) & 0xFF;
uint8_t b = ( col >> 16 ) & 0xFF;
r = clampu8( r + Bayer31[i] );
g = clampu8( g + Bayer63[i] );
b = clampu8( b + Bayer31[i] );
col = r | ( g << 8 ) | ( b << 16 );
memcpy( data, &col, 4 );
data += 4;
}
#endif
}

21
thirdparty/etcpak/Dither.hpp vendored Normal file
View File

@@ -0,0 +1,21 @@
#ifndef __DITHER_HPP__
#define __DITHER_HPP__
#include <stddef.h>
#include <stdint.h>
#ifdef __AVX2__
# ifdef _MSC_VER
# include <intrin.h>
# else
# include <x86intrin.h>
# endif
#endif
void Dither( uint8_t* data );
#ifdef __AVX2__
void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 );
#endif
#endif

20
thirdparty/etcpak/ForceInline.hpp vendored Normal file
View File

@@ -0,0 +1,20 @@
#ifndef __FORCEINLINE_HPP__
#define __FORCEINLINE_HPP__
#if defined(__GNUC__)
# define etcpak_force_inline __attribute__((always_inline)) inline
#elif defined(_MSC_VER)
# define etcpak_force_inline __forceinline
#else
# define etcpak_force_inline inline
#endif
#if defined(__GNUC__)
# define etcpak_no_inline __attribute__((noinline))
#elif defined(_MSC_VER)
# define etcpak_no_inline __declspec(noinline)
#else
# define etcpak_no_inline
#endif
#endif

26
thirdparty/etcpak/LICENSE.txt vendored Normal file
View File

@@ -0,0 +1,26 @@
etcpak, an extremely fast ETC compression utility (https://github.com/wolfpld/etcpak)
Copyright (c) 2013-2022, Bartosz Taudul <wolf@nereid.pl>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the <organization> nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

92
thirdparty/etcpak/Math.hpp vendored Normal file
View File

@@ -0,0 +1,92 @@
#ifndef __DARKRL__MATH_HPP__
#define __DARKRL__MATH_HPP__
#include <algorithm>
#include <cmath>
#include <stdint.h>
#include "ForceInline.hpp"
template<typename T>
static etcpak_force_inline T AlignPOT( T val )
{
if( val == 0 ) return 1;
val--;
for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
{
val |= val >> i;
}
return val + 1;
}
static etcpak_force_inline int CountSetBits( uint32_t val )
{
val -= ( val >> 1 ) & 0x55555555;
val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
val += val >> 8;
val += val >> 16;
return val & 0x0000003f;
}
static etcpak_force_inline int CountLeadingZeros( uint32_t val )
{
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return 32 - CountSetBits( val );
}
static etcpak_force_inline float sRGB2linear( float v )
{
const float a = 0.055f;
if( v <= 0.04045f )
{
return v / 12.92f;
}
else
{
return pow( ( v + a ) / ( 1 + a ), 2.4f );
}
}
static etcpak_force_inline float linear2sRGB( float v )
{
const float a = 0.055f;
if( v <= 0.0031308f )
{
return 12.92f * v;
}
else
{
return ( 1 + a ) * pow( v, 1/2.4f ) - a;
}
}
template<class T>
static etcpak_force_inline T SmoothStep( T x )
{
return x*x*(3-2*x);
}
static etcpak_force_inline uint8_t clampu8( int32_t val )
{
if( ( val & ~0xFF ) == 0 ) return val;
return ( ( ~val ) >> 31 ) & 0xFF;
}
template<class T>
static etcpak_force_inline T sq( T val )
{
return val * val;
}
static etcpak_force_inline int mul8bit( int a, int b )
{
int t = a*b + 128;
return ( t + ( t >> 8 ) ) >> 8;
}
#endif

50
thirdparty/etcpak/ProcessCommon.hpp vendored Normal file
View File

@@ -0,0 +1,50 @@
#ifndef __PROCESSCOMMON_HPP__
#define __PROCESSCOMMON_HPP__
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
template<class T>
static size_t GetLeastError( const T* err, size_t num )
{
size_t idx = 0;
for( size_t i=1; i<num; i++ )
{
if( err[i] < err[idx] )
{
idx = i;
}
}
return idx;
}
static uint64_t FixByteOrder( uint64_t d )
{
return ( ( d & 0x00000000FFFFFFFF ) ) |
( ( d & 0xFF00000000000000 ) >> 24 ) |
( ( d & 0x000000FF00000000 ) << 24 ) |
( ( d & 0x00FF000000000000 ) >> 8 ) |
( ( d & 0x0000FF0000000000 ) << 8 );
}
template<class T, class S>
static uint64_t EncodeSelectors( uint64_t d, const T terr[2][8], const S tsel[16][8], const uint32_t* id )
{
size_t tidx[2];
tidx[0] = GetLeastError( terr[0], 8 );
tidx[1] = GetLeastError( terr[1], 8 );
d |= tidx[0] << 26;
d |= tidx[1] << 29;
for( int i=0; i<16; i++ )
{
uint64_t t = tsel[i][tidx[id[i]%2]];
d |= ( t & 0x1 ) << ( i + 32 );
d |= ( t & 0x2 ) << ( i + 47 );
}
return d;
}
#endif

1086
thirdparty/etcpak/ProcessDxtc.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

14
thirdparty/etcpak/ProcessDxtc.hpp vendored Normal file
View File

@@ -0,0 +1,14 @@
#ifndef __PROCESSDXT1_HPP__
#define __PROCESSDXT1_HPP__
#include <stddef.h>
#include <stdint.h>
void CompressBc1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc3( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc4( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
#endif

4210
thirdparty/etcpak/ProcessRGB.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

14
thirdparty/etcpak/ProcessRGB.hpp vendored Normal file
View File

@@ -0,0 +1,14 @@
#ifndef __PROCESSRGB_HPP__
#define __PROCESSRGB_HPP__
#include <stdint.h>
void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
void CompressEacR( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressEacRg( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
#endif

223
thirdparty/etcpak/Tables.cpp vendored Normal file
View File

@@ -0,0 +1,223 @@
#include "Tables.hpp"
const int32_t g_table[8][4] = {
{ 2, 8, -2, -8 },
{ 5, 17, -5, -17 },
{ 9, 29, -9, -29 },
{ 13, 42, -13, -42 },
{ 18, 60, -18, -60 },
{ 24, 80, -24, -80 },
{ 33, 106, -33, -106 },
{ 47, 183, -47, -183 }
};
const int64_t g_table256[8][4] = {
{ 2*256, 8*256, -2*256, -8*256 },
{ 5*256, 17*256, -5*256, -17*256 },
{ 9*256, 29*256, -9*256, -29*256 },
{ 13*256, 42*256, -13*256, -42*256 },
{ 18*256, 60*256, -18*256, -60*256 },
{ 24*256, 80*256, -24*256, -80*256 },
{ 33*256, 106*256, -33*256, -106*256 },
{ 47*256, 183*256, -47*256, -183*256 }
};
const uint32_t g_id[4][16] = {
{ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
{ 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
{ 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
};
const uint32_t g_avg2[16] = {
0x00,
0x11,
0x22,
0x33,
0x44,
0x55,
0x66,
0x77,
0x88,
0x99,
0xAA,
0xBB,
0xCC,
0xDD,
0xEE,
0xFF
};
const uint32_t g_flags[64] = {
0x80800402, 0x80800402, 0x80800402, 0x80800402,
0x80800402, 0x80800402, 0x80800402, 0x8080E002,
0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
0x80000402, 0x80000402, 0x80000402, 0x80000402,
0x80000402, 0x80000402, 0x80000402, 0x8000E002,
0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
0x00800402, 0x00800402, 0x00800402, 0x00800402,
0x00800402, 0x00800402, 0x00800402, 0x0080E002,
0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
0x00000402, 0x00000402, 0x00000402, 0x00000402,
0x00000402, 0x00000402, 0x00000402, 0x0000E002,
0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
};
const int32_t g_alpha[16][8] = {
{ -3, -6, -9, -15, 2, 5, 8, 14 },
{ -3, -7, -10, -13, 2, 6, 9, 12 },
{ -2, -5, -8, -13, 1, 4, 7, 12 },
{ -2, -4, -6, -13, 1, 3, 5, 12 },
{ -3, -6, -8, -12, 2, 5, 7, 11 },
{ -3, -7, -9, -11, 2, 6, 8, 10 },
{ -4, -7, -8, -11, 3, 6, 7, 10 },
{ -3, -5, -8, -11, 2, 4, 7, 10 },
{ -2, -6, -8, -10, 1, 5, 7, 9 },
{ -2, -5, -8, -10, 1, 4, 7, 9 },
{ -2, -4, -8, -10, 1, 3, 7, 9 },
{ -2, -5, -7, -10, 1, 4, 6, 9 },
{ -3, -4, -7, -10, 2, 3, 6, 9 },
{ -1, -2, -3, -10, 0, 1, 2, 9 },
{ -4, -6, -8, -9, 3, 5, 7, 8 },
{ -3, -5, -7, -9, 2, 4, 6, 8 }
};
const int32_t g_alpha11Mul[16] = { 1, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120 };
const int32_t g_alphaRange[16] = {
0x100FF / ( 1 + g_alpha[0][7] - g_alpha[0][3] ),
0x100FF / ( 1 + g_alpha[1][7] - g_alpha[1][3] ),
0x100FF / ( 1 + g_alpha[2][7] - g_alpha[2][3] ),
0x100FF / ( 1 + g_alpha[3][7] - g_alpha[3][3] ),
0x100FF / ( 1 + g_alpha[4][7] - g_alpha[4][3] ),
0x100FF / ( 1 + g_alpha[5][7] - g_alpha[5][3] ),
0x100FF / ( 1 + g_alpha[6][7] - g_alpha[6][3] ),
0x100FF / ( 1 + g_alpha[7][7] - g_alpha[7][3] ),
0x100FF / ( 1 + g_alpha[8][7] - g_alpha[8][3] ),
0x100FF / ( 1 + g_alpha[9][7] - g_alpha[9][3] ),
0x100FF / ( 1 + g_alpha[10][7] - g_alpha[10][3] ),
0x100FF / ( 1 + g_alpha[11][7] - g_alpha[11][3] ),
0x100FF / ( 1 + g_alpha[12][7] - g_alpha[12][3] ),
0x100FF / ( 1 + g_alpha[13][7] - g_alpha[13][3] ),
0x100FF / ( 1 + g_alpha[14][7] - g_alpha[14][3] ),
0x100FF / ( 1 + g_alpha[15][7] - g_alpha[15][3] ),
};
#ifdef __SSE4_1__
const __m128i g_table_SIMD[2] =
{
_mm_setr_epi16( 2, 5, 9, 13, 18, 24, 33, 47),
_mm_setr_epi16( 8, 17, 29, 42, 60, 80, 106, 183)
};
const __m128i g_table128_SIMD[2] =
{
_mm_setr_epi16( 2*128, 5*128, 9*128, 13*128, 18*128, 24*128, 33*128, 47*128),
_mm_setr_epi16( 8*128, 17*128, 29*128, 42*128, 60*128, 80*128, 106*128, 183*128)
};
const __m128i g_table256_SIMD[4] =
{
_mm_setr_epi32( 2*256, 5*256, 9*256, 13*256),
_mm_setr_epi32( 8*256, 17*256, 29*256, 42*256),
_mm_setr_epi32( 18*256, 24*256, 33*256, 47*256),
_mm_setr_epi32( 60*256, 80*256, 106*256, 183*256)
};
const __m128i g_alpha_SIMD[16] = {
_mm_setr_epi16( g_alpha[ 0][0], g_alpha[ 0][1], g_alpha[ 0][2], g_alpha[ 0][3], g_alpha[ 0][4], g_alpha[ 0][5], g_alpha[ 0][6], g_alpha[ 0][7] ),
_mm_setr_epi16( g_alpha[ 1][0], g_alpha[ 1][1], g_alpha[ 1][2], g_alpha[ 1][3], g_alpha[ 1][4], g_alpha[ 1][5], g_alpha[ 1][6], g_alpha[ 1][7] ),
_mm_setr_epi16( g_alpha[ 2][0], g_alpha[ 2][1], g_alpha[ 2][2], g_alpha[ 2][3], g_alpha[ 2][4], g_alpha[ 2][5], g_alpha[ 2][6], g_alpha[ 2][7] ),
_mm_setr_epi16( g_alpha[ 3][0], g_alpha[ 3][1], g_alpha[ 3][2], g_alpha[ 3][3], g_alpha[ 3][4], g_alpha[ 3][5], g_alpha[ 3][6], g_alpha[ 3][7] ),
_mm_setr_epi16( g_alpha[ 4][0], g_alpha[ 4][1], g_alpha[ 4][2], g_alpha[ 4][3], g_alpha[ 4][4], g_alpha[ 4][5], g_alpha[ 4][6], g_alpha[ 4][7] ),
_mm_setr_epi16( g_alpha[ 5][0], g_alpha[ 5][1], g_alpha[ 5][2], g_alpha[ 5][3], g_alpha[ 5][4], g_alpha[ 5][5], g_alpha[ 5][6], g_alpha[ 5][7] ),
_mm_setr_epi16( g_alpha[ 6][0], g_alpha[ 6][1], g_alpha[ 6][2], g_alpha[ 6][3], g_alpha[ 6][4], g_alpha[ 6][5], g_alpha[ 6][6], g_alpha[ 6][7] ),
_mm_setr_epi16( g_alpha[ 7][0], g_alpha[ 7][1], g_alpha[ 7][2], g_alpha[ 7][3], g_alpha[ 7][4], g_alpha[ 7][5], g_alpha[ 7][6], g_alpha[ 7][7] ),
_mm_setr_epi16( g_alpha[ 8][0], g_alpha[ 8][1], g_alpha[ 8][2], g_alpha[ 8][3], g_alpha[ 8][4], g_alpha[ 8][5], g_alpha[ 8][6], g_alpha[ 8][7] ),
_mm_setr_epi16( g_alpha[ 9][0], g_alpha[ 9][1], g_alpha[ 9][2], g_alpha[ 9][3], g_alpha[ 9][4], g_alpha[ 9][5], g_alpha[ 9][6], g_alpha[ 9][7] ),
_mm_setr_epi16( g_alpha[10][0], g_alpha[10][1], g_alpha[10][2], g_alpha[10][3], g_alpha[10][4], g_alpha[10][5], g_alpha[10][6], g_alpha[10][7] ),
_mm_setr_epi16( g_alpha[11][0], g_alpha[11][1], g_alpha[11][2], g_alpha[11][3], g_alpha[11][4], g_alpha[11][5], g_alpha[11][6], g_alpha[11][7] ),
_mm_setr_epi16( g_alpha[12][0], g_alpha[12][1], g_alpha[12][2], g_alpha[12][3], g_alpha[12][4], g_alpha[12][5], g_alpha[12][6], g_alpha[12][7] ),
_mm_setr_epi16( g_alpha[13][0], g_alpha[13][1], g_alpha[13][2], g_alpha[13][3], g_alpha[13][4], g_alpha[13][5], g_alpha[13][6], g_alpha[13][7] ),
_mm_setr_epi16( g_alpha[14][0], g_alpha[14][1], g_alpha[14][2], g_alpha[14][3], g_alpha[14][4], g_alpha[14][5], g_alpha[14][6], g_alpha[14][7] ),
_mm_setr_epi16( g_alpha[15][0], g_alpha[15][1], g_alpha[15][2], g_alpha[15][3], g_alpha[15][4], g_alpha[15][5], g_alpha[15][6], g_alpha[15][7] ),
};
const __m128i g_alphaRange_SIMD = _mm_setr_epi16(
g_alphaRange[0],
g_alphaRange[1],
g_alphaRange[4],
g_alphaRange[5],
g_alphaRange[8],
g_alphaRange[14],
0,
0 );
#endif
#ifdef __AVX2__
const __m256i g_alpha_AVX[8] = {
_mm256_setr_epi16( g_alpha[ 0][0], g_alpha[ 1][0], g_alpha[ 2][0], g_alpha[ 3][0], g_alpha[ 4][0], g_alpha[ 5][0], g_alpha[ 6][0], g_alpha[ 7][0], g_alpha[ 8][0], g_alpha[ 9][0], g_alpha[10][0], g_alpha[11][0], g_alpha[12][0], g_alpha[13][0], g_alpha[14][0], g_alpha[15][0] ),
_mm256_setr_epi16( g_alpha[ 0][1], g_alpha[ 1][1], g_alpha[ 2][1], g_alpha[ 3][1], g_alpha[ 4][1], g_alpha[ 5][1], g_alpha[ 6][1], g_alpha[ 7][1], g_alpha[ 8][1], g_alpha[ 9][1], g_alpha[10][1], g_alpha[11][1], g_alpha[12][1], g_alpha[13][1], g_alpha[14][1], g_alpha[15][1] ),
_mm256_setr_epi16( g_alpha[ 0][2], g_alpha[ 1][2], g_alpha[ 2][2], g_alpha[ 3][2], g_alpha[ 4][2], g_alpha[ 5][2], g_alpha[ 6][2], g_alpha[ 7][2], g_alpha[ 8][2], g_alpha[ 9][2], g_alpha[10][2], g_alpha[11][2], g_alpha[12][2], g_alpha[13][2], g_alpha[14][2], g_alpha[15][2] ),
_mm256_setr_epi16( g_alpha[ 0][3], g_alpha[ 1][3], g_alpha[ 2][3], g_alpha[ 3][3], g_alpha[ 4][3], g_alpha[ 5][3], g_alpha[ 6][3], g_alpha[ 7][3], g_alpha[ 8][3], g_alpha[ 9][3], g_alpha[10][3], g_alpha[11][3], g_alpha[12][3], g_alpha[13][3], g_alpha[14][3], g_alpha[15][3] ),
_mm256_setr_epi16( g_alpha[ 0][4], g_alpha[ 1][4], g_alpha[ 2][4], g_alpha[ 3][4], g_alpha[ 4][4], g_alpha[ 5][4], g_alpha[ 6][4], g_alpha[ 7][4], g_alpha[ 8][4], g_alpha[ 9][4], g_alpha[10][4], g_alpha[11][4], g_alpha[12][4], g_alpha[13][4], g_alpha[14][4], g_alpha[15][4] ),
_mm256_setr_epi16( g_alpha[ 0][5], g_alpha[ 1][5], g_alpha[ 2][5], g_alpha[ 3][5], g_alpha[ 4][5], g_alpha[ 5][5], g_alpha[ 6][5], g_alpha[ 7][5], g_alpha[ 8][5], g_alpha[ 9][5], g_alpha[10][5], g_alpha[11][5], g_alpha[12][5], g_alpha[13][5], g_alpha[14][5], g_alpha[15][5] ),
_mm256_setr_epi16( g_alpha[ 0][6], g_alpha[ 1][6], g_alpha[ 2][6], g_alpha[ 3][6], g_alpha[ 4][6], g_alpha[ 5][6], g_alpha[ 6][6], g_alpha[ 7][6], g_alpha[ 8][6], g_alpha[ 9][6], g_alpha[10][6], g_alpha[11][6], g_alpha[12][6], g_alpha[13][6], g_alpha[14][6], g_alpha[15][6] ),
_mm256_setr_epi16( g_alpha[ 0][7], g_alpha[ 1][7], g_alpha[ 2][7], g_alpha[ 3][7], g_alpha[ 4][7], g_alpha[ 5][7], g_alpha[ 6][7], g_alpha[ 7][7], g_alpha[ 8][7], g_alpha[ 9][7], g_alpha[10][7], g_alpha[11][7], g_alpha[12][7], g_alpha[13][7], g_alpha[14][7], g_alpha[15][7] ),
};
const __m256i g_alphaRange_AVX = _mm256_setr_epi16(
g_alphaRange[ 0], g_alphaRange[ 1], g_alphaRange[ 2], g_alphaRange[ 3], g_alphaRange[ 4], g_alphaRange[ 5], g_alphaRange[ 6], g_alphaRange[ 7],
g_alphaRange[ 8], g_alphaRange[ 9], g_alphaRange[10], g_alphaRange[11], g_alphaRange[12], g_alphaRange[13], g_alphaRange[14], g_alphaRange[15]
);
#endif
#ifdef __ARM_NEON
const int16x8_t g_table128_NEON[2] =
{
{ 2*128, 5*128, 9*128, 13*128, 18*128, 24*128, 33*128, 47*128 },
{ 8*128, 17*128, 29*128, 42*128, 60*128, 80*128, 106*128, 183*128 }
};
const int32x4_t g_table256_NEON[4] =
{
{ 2*256, 5*256, 9*256, 13*256 },
{ 8*256, 17*256, 29*256, 42*256 },
{ 18*256, 24*256, 33*256, 47*256 },
{ 60*256, 80*256, 106*256, 183*256 }
};
const int16x8_t g_alpha_NEON[16] =
{
{ -3, -6, -9, -15, 2, 5, 8, 14 },
{ -3, -7, -10, -13, 2, 6, 9, 12 },
{ -2, -5, -8, -13, 1, 4, 7, 12 },
{ -2, -4, -6, -13, 1, 3, 5, 12 },
{ -3, -6, -8, -12, 2, 5, 7, 11 },
{ -3, -7, -9, -11, 2, 6, 8, 10 },
{ -4, -7, -8, -11, 3, 6, 7, 10 },
{ -3, -5, -8, -11, 2, 4, 7, 10 },
{ -2, -6, -8, -10, 1, 5, 7, 9 },
{ -2, -5, -8, -10, 1, 4, 7, 9 },
{ -2, -4, -8, -10, 1, 3, 7, 9 },
{ -2, -5, -7, -10, 1, 4, 6, 9 },
{ -3, -4, -7, -10, 2, 3, 6, 9 },
{ -1, -2, -3, -10, 0, 1, 2, 9 },
{ -4, -6, -8, -9, 3, 5, 7, 8 },
{ -3, -5, -7, -9, 2, 4, 6, 8 }
};
const int16x8_t g_alphaRange_NEON =
{
(int16_t)g_alphaRange[0],
(int16_t)g_alphaRange[1],
(int16_t)g_alphaRange[4],
(int16_t)g_alphaRange[5],
(int16_t)g_alphaRange[8],
(int16_t)g_alphaRange[14],
0,
0
};
#endif

50
thirdparty/etcpak/Tables.hpp vendored Normal file
View File

@@ -0,0 +1,50 @@
#ifndef __TABLES_HPP__
#define __TABLES_HPP__
#include <stdint.h>
#ifdef __AVX2__
# include <immintrin.h>
#endif
#ifdef __SSE4_1__
# include <smmintrin.h>
#endif
#ifdef __ARM_NEON
# include <arm_neon.h>
#endif
extern const int32_t g_table[8][4];
extern const int64_t g_table256[8][4];
extern const uint32_t g_id[4][16];
extern const uint32_t g_avg2[16];
extern const uint32_t g_flags[64];
extern const int32_t g_alpha[16][8];
extern const int32_t g_alpha11Mul[16];
extern const int32_t g_alphaRange[16];
#ifdef __SSE4_1__
extern const __m128i g_table_SIMD[2];
extern const __m128i g_table128_SIMD[2];
extern const __m128i g_table256_SIMD[4];
extern const __m128i g_alpha_SIMD[16];
extern const __m128i g_alphaRange_SIMD;
#endif
#ifdef __AVX2__
extern const __m256i g_alpha_AVX[8];
extern const __m256i g_alphaRange_AVX;
#endif
#ifdef __ARM_NEON
extern const int16x8_t g_table128_NEON[2];
extern const int32x4_t g_table256_NEON[4];
extern const int16x8_t g_alpha_NEON[16];
extern const int16x8_t g_alphaRange_NEON;
#endif
#endif

222
thirdparty/etcpak/Vector.hpp vendored Normal file
View File

@@ -0,0 +1,222 @@
#ifndef __DARKRL__VECTOR_HPP__
#define __DARKRL__VECTOR_HPP__
#include <assert.h>
#include <algorithm>
#include <math.h>
#include <stdint.h>
#include "Math.hpp"
template<class T>
struct Vector2
{
Vector2() : x( 0 ), y( 0 ) {}
Vector2( T v ) : x( v ), y( v ) {}
Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
Vector2<T>& operator+=( const Vector2<T>& rhs )
{
x += rhs.x;
y += rhs.y;
return *this;
}
Vector2<T>& operator-=( const Vector2<T>& rhs )
{
x -= rhs.x;
y -= rhs.y;
return *this;
}
Vector2<T>& operator*=( const Vector2<T>& rhs )
{
x *= rhs.x;
y *= rhs.y;
return *this;
}
T x, y;
};
template<class T>
Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
{
return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
}
template<class T>
Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
{
return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
}
template<class T>
Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
{
return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
}
template<class T>
Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
{
return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
}
typedef Vector2<int32_t> v2i;
typedef Vector2<float> v2f;
template<class T>
struct Vector3
{
Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
Vector3( T v ) : x( v ), y( v ), z( v ) {}
Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
template<class Y>
Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
void Clamp()
{
x = std::min( T(1), std::max( T(0), x ) );
y = std::min( T(1), std::max( T(0), y ) );
z = std::min( T(1), std::max( T(0), z ) );
}
bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
T& operator[]( unsigned int idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
const T& operator[]( unsigned int idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
Vector3<T> operator+=( const Vector3<T>& rhs )
{
x += rhs.x;
y += rhs.y;
z += rhs.z;
return *this;
}
Vector3<T> operator*=( const Vector3<T>& rhs )
{
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
return *this;
}
Vector3<T> operator*=( const float& rhs )
{
x *= rhs;
y *= rhs;
z *= rhs;
return *this;
}
T x, y, z;
T padding;
};
template<class T>
Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
}
template<class T>
Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
}
template<class T>
Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
}
template<class T>
Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
{
return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
}
template<class T>
Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
{
return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
}
template<class T>
bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return lhs.Luminance() < rhs.Luminance();
}
typedef Vector3<int32_t> v3i;
typedef Vector3<float> v3f;
typedef Vector3<uint8_t> v3b;
static inline v3b v3f_to_v3b( const v3f& v )
{
return v3b( uint8_t( std::min( 1.f, v.x ) * 255 ), uint8_t( std::min( 1.f, v.y ) * 255 ), uint8_t( std::min( 1.f, v.z ) * 255 ) );
}
template<class T>
Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
{
return v1 + ( v2 - v1 ) * amount;
}
template<>
inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
{
return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
}
template<class T>
Vector3<T> Desaturate( const Vector3<T>& v )
{
T l = v.Luminance();
return Vector3<T>( l, l, l );
}
template<class T>
Vector3<T> Desaturate( const Vector3<T>& v, float mul )
{
T l = T( v.Luminance() * mul );
return Vector3<T>( l, l, l );
}
template<class T>
Vector3<T> pow( const Vector3<T>& base, float exponent )
{
return Vector3<T>(
pow( base.x, exponent ),
pow( base.y, exponent ),
pow( base.z, exponent ) );
}
template<class T>
Vector3<T> sRGB2linear( const Vector3<T>& v )
{
return Vector3<T>(
sRGB2linear( v.x ),
sRGB2linear( v.y ),
sRGB2linear( v.z ) );
}
template<class T>
Vector3<T> linear2sRGB( const Vector3<T>& v )
{
return Vector3<T>(
linear2sRGB( v.x ),
linear2sRGB( v.y ),
linear2sRGB( v.z ) );
}
#endif

View File

@@ -0,0 +1,52 @@
diff --git a/thirdparty/etcpak/ProcessDxtc.cpp b/thirdparty/etcpak/ProcessDxtc.cpp
index 5373b75cdc..e1bc6a5cb6 100644
--- a/thirdparty/etcpak/ProcessDxtc.cpp
+++ b/thirdparty/etcpak/ProcessDxtc.cpp
@@ -1,4 +1,3 @@
-#include "bc7enc.h"
#include "Dither.hpp"
#include "ForceInline.hpp"
#include "ProcessDxtc.hpp"
@@ -1085,29 +1084,3 @@ void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t wi
#endif
} while( --blocks );
}
-
-void CompressBc7( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, const bc7enc_compress_block_params* params )
-{
- int i = 0;
- auto ptr = dst;
- do
- {
- uint32_t rgba[4*4];
-
- auto tmp = (char*)rgba;
- memcpy( tmp, src + width * 0, 4*4 );
- memcpy( tmp + 4*4, src + width * 1, 4*4 );
- memcpy( tmp + 8*4, src + width * 2, 4*4 );
- memcpy( tmp + 12*4, src + width * 3, 4*4 );
- src += 4;
- if( ++i == width/4 )
- {
- src += width * 3;
- i = 0;
- }
-
- bc7enc_compress_block( ptr, rgba, params );
- ptr += 2;
- }
- while( --blocks );
-}
diff --git a/thirdparty/etcpak/ProcessDxtc.hpp b/thirdparty/etcpak/ProcessDxtc.hpp
index 7655bb33be..8145493872 100644
--- a/thirdparty/etcpak/ProcessDxtc.hpp
+++ b/thirdparty/etcpak/ProcessDxtc.hpp
@@ -11,8 +11,4 @@ void CompressBc3( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t wi
void CompressBc4( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
-struct bc7enc_compress_block_params;
-
-void CompressBc7( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, const bc7enc_compress_block_params* params );
-
#endif