initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
5
thirdparty/etcpak/AUTHORS.txt
vendored
Normal file
5
thirdparty/etcpak/AUTHORS.txt
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
Bartosz Taudul <wolf@nereid.pl>
|
||||
Daniel Jungmann <el.3d.source@gmail.com>
|
||||
Florian Penzkofer <fp@nullptr.de>
|
||||
Jae-Ho Nah <nahjaeho@gmail.com>
|
||||
Marcin Ławicki <marcin.lawicki@gmail.com>
|
797
thirdparty/etcpak/DecodeRGB.cpp
vendored
Normal file
797
thirdparty/etcpak/DecodeRGB.cpp
vendored
Normal file
@@ -0,0 +1,797 @@
|
||||
#include "DecodeRGB.hpp"
|
||||
#include "Tables.hpp"
|
||||
#include "Math.hpp"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER
|
||||
# ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
# include <Windows.h>
|
||||
# define _bswap(x) _byteswap_ulong(x)
|
||||
# define _bswap64(x) _byteswap_uint64(x)
|
||||
# else
|
||||
# include <x86intrin.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef _bswap
|
||||
# define _bswap(x) __builtin_bswap32(x)
|
||||
# define _bswap64(x) __builtin_bswap64(x)
|
||||
#endif
|
||||
|
||||
static uint8_t table59T58H[8] = { 3,6,11,16,23,32,41,64 };
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
static etcpak_force_inline int32_t expand6(uint32_t value)
|
||||
{
|
||||
return (value << 2) | (value >> 4);
|
||||
}
|
||||
|
||||
static etcpak_force_inline int32_t expand7(uint32_t value)
|
||||
{
|
||||
return (value << 1) | (value >> 6);
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeT( uint64_t block, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
const auto r0 = ( block >> 24 ) & 0x1B;
|
||||
const auto rh0 = ( r0 >> 3 ) & 0x3;
|
||||
const auto rl0 = r0 & 0x3;
|
||||
const auto g0 = ( block >> 20 ) & 0xF;
|
||||
const auto b0 = ( block >> 16 ) & 0xF;
|
||||
|
||||
const auto r1 = ( block >> 12 ) & 0xF;
|
||||
const auto g1 = ( block >> 8 ) & 0xF;
|
||||
const auto b1 = ( block >> 4 ) & 0xF;
|
||||
|
||||
const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
|
||||
const auto cg0 = ( g0 << 4 ) | g0;
|
||||
const auto cb0 = ( b0 << 4 ) | b0;
|
||||
|
||||
const auto cr1 = ( r1 << 4 ) | r1;
|
||||
const auto cg1 = ( g1 << 4 ) | g1;
|
||||
const auto cb1 = ( b1 << 4 ) | b1;
|
||||
|
||||
const auto codeword_hi = ( block >> 2 ) & 0x3;
|
||||
const auto codeword_lo = block & 0x1;
|
||||
const auto codeword = ( codeword_hi << 1 ) | codeword_lo;
|
||||
|
||||
const auto c2r = clampu8( cr1 + table59T58H[codeword] );
|
||||
const auto c2g = clampu8( cg1 + table59T58H[codeword] );
|
||||
const auto c2b = clampu8( cb1 + table59T58H[codeword] );
|
||||
|
||||
const auto c3r = clampu8( cr1 - table59T58H[codeword] );
|
||||
const auto c3g = clampu8( cg1 - table59T58H[codeword] );
|
||||
const auto c3b = clampu8( cb1 - table59T58H[codeword] );
|
||||
|
||||
const uint32_t col_tab[4] = {
|
||||
uint32_t( cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000 ),
|
||||
uint32_t( c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000 ),
|
||||
uint32_t( cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000 ),
|
||||
uint32_t( c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000 )
|
||||
};
|
||||
|
||||
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
|
||||
for( uint8_t j = 0; j < 4; j++ )
|
||||
{
|
||||
for( uint8_t i = 0; i < 4; i++ )
|
||||
{
|
||||
//2bit indices distributed on two lane 16bit numbers
|
||||
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1) | ( ( indexes >> ( j + i * 4 ) ) & 0x1);
|
||||
dst[j * w + i] = col_tab[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeTAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
const auto r0 = ( block >> 24 ) & 0x1B;
|
||||
const auto rh0 = ( r0 >> 3 ) & 0x3;
|
||||
const auto rl0 = r0 & 0x3;
|
||||
const auto g0 = ( block >> 20 ) & 0xF;
|
||||
const auto b0 = ( block >> 16 ) & 0xF;
|
||||
|
||||
const auto r1 = ( block >> 12 ) & 0xF;
|
||||
const auto g1 = ( block >> 8 ) & 0xF;
|
||||
const auto b1 = ( block >> 4 ) & 0xF;
|
||||
|
||||
const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0);
|
||||
const auto cg0 = ( g0 << 4 ) | g0;
|
||||
const auto cb0 = ( b0 << 4 ) | b0;
|
||||
|
||||
const auto cr1 = ( r1 << 4 ) | r1;
|
||||
const auto cg1 = ( g1 << 4 ) | g1;
|
||||
const auto cb1 = ( b1 << 4 ) | b1;
|
||||
|
||||
const auto codeword_hi = ( block >> 2 ) & 0x3;
|
||||
const auto codeword_lo = block & 0x1;
|
||||
const auto codeword = (codeword_hi << 1) | codeword_lo;
|
||||
|
||||
const int32_t base = alpha >> 56;
|
||||
const int32_t mul = ( alpha >> 52 ) & 0xF;
|
||||
const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
|
||||
|
||||
const auto c2r = clampu8( cr1 + table59T58H[codeword] );
|
||||
const auto c2g = clampu8( cg1 + table59T58H[codeword] );
|
||||
const auto c2b = clampu8( cb1 + table59T58H[codeword] );
|
||||
|
||||
const auto c3r = clampu8( cr1 - table59T58H[codeword] );
|
||||
const auto c3g = clampu8( cg1 - table59T58H[codeword] );
|
||||
const auto c3b = clampu8( cb1 - table59T58H[codeword] );
|
||||
|
||||
const uint32_t col_tab[4] = {
|
||||
uint32_t( cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) ),
|
||||
uint32_t( c2r | ( c2g << 8 ) | ( c2b << 16 ) ),
|
||||
uint32_t( cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) ),
|
||||
uint32_t( c3r | ( c3g << 8 ) | ( c3b << 16 ) )
|
||||
};
|
||||
|
||||
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
|
||||
for( uint8_t j = 0; j < 4; j++ )
|
||||
{
|
||||
for( uint8_t i = 0; i < 4; i++ )
|
||||
{
|
||||
//2bit indices distributed on two lane 16bit numbers
|
||||
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
|
||||
const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12 ) ) & 0x7];
|
||||
const uint32_t a = clampu8( base + amod * mul );
|
||||
dst[j * w + i] = col_tab[index] | ( a << 24 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeH( uint64_t block, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
|
||||
|
||||
const auto r0444 = ( block >> 27 ) & 0xF;
|
||||
const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
|
||||
const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
|
||||
|
||||
const auto r1444 = ( block >> 11 ) & 0xF;
|
||||
const auto g1444 = ( block >> 7 ) & 0xF;
|
||||
const auto b1444 = ( block >> 3 ) & 0xF;
|
||||
|
||||
const auto r0 = ( r0444 << 4 ) | r0444;
|
||||
const auto g0 = ( g0444 << 4 ) | g0444;
|
||||
const auto b0 = ( b0444 << 4 ) | b0444;
|
||||
|
||||
const auto r1 = ( r1444 << 4 ) | r1444;
|
||||
const auto g1 = ( g1444 << 4 ) | g1444;
|
||||
const auto b1 = ( b1444 << 4 ) | b1444;
|
||||
|
||||
const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
|
||||
const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
|
||||
const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
|
||||
const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
|
||||
const auto codeword = codeword_hi | codeword_lo;
|
||||
|
||||
const uint32_t col_tab[] = {
|
||||
uint32_t( clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ) ),
|
||||
uint32_t( clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ) ),
|
||||
uint32_t( clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ) ),
|
||||
uint32_t( clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) )
|
||||
};
|
||||
|
||||
for( uint8_t j = 0; j < 4; j++ )
|
||||
{
|
||||
for( uint8_t i = 0; i < 4; i++ )
|
||||
{
|
||||
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
|
||||
dst[j * w + i] = col_tab[index] | 0xFF000000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeHAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF;
|
||||
|
||||
const auto r0444 = ( block >> 27 ) & 0xF;
|
||||
const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 );
|
||||
const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 );
|
||||
|
||||
const auto r1444 = ( block >> 11 ) & 0xF;
|
||||
const auto g1444 = ( block >> 7 ) & 0xF;
|
||||
const auto b1444 = ( block >> 3 ) & 0xF;
|
||||
|
||||
const auto r0 = ( r0444 << 4 ) | r0444;
|
||||
const auto g0 = ( g0444 << 4 ) | g0444;
|
||||
const auto b0 = ( b0444 << 4 ) | b0444;
|
||||
|
||||
const auto r1 = ( r1444 << 4 ) | r1444;
|
||||
const auto g1 = ( g1444 << 4 ) | g1444;
|
||||
const auto b1 = ( b1444 << 4 ) | b1444;
|
||||
|
||||
const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) );
|
||||
const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 );
|
||||
const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 );
|
||||
const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0;
|
||||
const auto codeword = codeword_hi | codeword_lo;
|
||||
|
||||
const int32_t base = alpha >> 56;
|
||||
const int32_t mul = ( alpha >> 52 ) & 0xF;
|
||||
const auto tbl = g_alpha[(alpha >> 48) & 0xF];
|
||||
|
||||
const uint32_t col_tab[] = {
|
||||
uint32_t( clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ) ),
|
||||
uint32_t( clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ) ),
|
||||
uint32_t( clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ) ),
|
||||
uint32_t( clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) )
|
||||
};
|
||||
|
||||
for( uint8_t j = 0; j < 4; j++ )
|
||||
{
|
||||
for( uint8_t i = 0; i < 4; i++ )
|
||||
{
|
||||
const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 );
|
||||
const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12) ) & 0x7];
|
||||
const uint32_t a = clampu8( base + amod * mul );
|
||||
dst[j * w + i] = col_tab[index] | ( a << 24 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodePlanar( uint64_t block, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
|
||||
const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
|
||||
const auto rv = expand6((block >> (13 + 32)) & 0x3F);
|
||||
|
||||
const auto bh = expand6((block >> (19 + 32)) & 0x3F);
|
||||
const auto gh = expand7((block >> (25 + 32)) & 0x7F);
|
||||
|
||||
const auto rh0 = (block >> (32 - 32)) & 0x01;
|
||||
const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
|
||||
const auto rh = expand6(rh0 | rh1);
|
||||
|
||||
const auto bo0 = (block >> (39 - 32)) & 0x07;
|
||||
const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
|
||||
const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
|
||||
const auto bo = expand6(bo0 | bo1 | bo2);
|
||||
const auto go0 = (block >> (49 - 32)) & 0x3F;
|
||||
const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
|
||||
const auto go = expand7(go0 | go1);
|
||||
const auto ro = expand6((block >> (57 - 32)) & 0x3F);
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
|
||||
int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
|
||||
init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
|
||||
int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
|
||||
init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 ) | ( uint64_t(0xFFF) << 48 );
|
||||
int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
|
||||
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
uint8x8_t c = vqshrun_n_s16( col, 2 );
|
||||
vst1_lane_u32( dst+j*w+i, vreinterpret_u32_u8( c ), 0 );
|
||||
col = vaddq_s16( col, chco );
|
||||
}
|
||||
col = vaddq_s16( col, cvco );
|
||||
}
|
||||
#elif defined __AVX2__
|
||||
const auto R0 = 4*ro+2;
|
||||
const auto G0 = 4*go+2;
|
||||
const auto B0 = 4*bo+2;
|
||||
const auto RHO = rh-ro;
|
||||
const auto GHO = gh-go;
|
||||
const auto BHO = bh-bo;
|
||||
|
||||
__m256i cvco = _mm256_setr_epi16( rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0 );
|
||||
__m256i col = _mm256_setr_epi16( R0, G0, B0, 0xFFF, R0+RHO, G0+GHO, B0+BHO, 0xFFF, R0+2*RHO, G0+2*GHO, B0+2*BHO, 0xFFF, R0+3*RHO, G0+3*GHO, B0+3*BHO, 0xFFF );
|
||||
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
__m256i c = _mm256_srai_epi16( col, 2 );
|
||||
__m128i s = _mm_packus_epi16( _mm256_castsi256_si128( c ), _mm256_extracti128_si256( c, 1 ) );
|
||||
_mm_storeu_si128( (__m128i*)(dst+j*w), s );
|
||||
col = _mm256_add_epi16( col, cvco );
|
||||
}
|
||||
#elif defined __SSE4_1__
|
||||
__m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
|
||||
__m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
|
||||
__m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0xFFF, 0, 0, 0, 0 );
|
||||
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
__m128i c = _mm_srai_epi16( col, 2 );
|
||||
__m128i s = _mm_packus_epi16( c, c );
|
||||
dst[j*w+i] = _mm_cvtsi128_si32( s );
|
||||
col = _mm_add_epi16( col, chco );
|
||||
}
|
||||
col = _mm_add_epi16( col, cvco );
|
||||
}
|
||||
#else
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
|
||||
const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
|
||||
const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
|
||||
if( ( ( r | g | b ) & ~0xFF ) == 0 )
|
||||
{
|
||||
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto rc = clampu8( r );
|
||||
const auto gc = clampu8( g );
|
||||
const auto bc = clampu8( b );
|
||||
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodePlanarAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
const auto bv = expand6((block >> ( 0 + 32)) & 0x3F);
|
||||
const auto gv = expand7((block >> ( 6 + 32)) & 0x7F);
|
||||
const auto rv = expand6((block >> (13 + 32)) & 0x3F);
|
||||
|
||||
const auto bh = expand6((block >> (19 + 32)) & 0x3F);
|
||||
const auto gh = expand7((block >> (25 + 32)) & 0x7F);
|
||||
|
||||
const auto rh0 = (block >> (32 - 32)) & 0x01;
|
||||
const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1;
|
||||
const auto rh = expand6(rh0 | rh1);
|
||||
|
||||
const auto bo0 = (block >> (39 - 32)) & 0x07;
|
||||
const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3;
|
||||
const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5;
|
||||
const auto bo = expand6(bo0 | bo1 | bo2);
|
||||
const auto go0 = (block >> (49 - 32)) & 0x3F;
|
||||
const auto go1 = ((block >> (56 - 32)) & 0x01) << 6;
|
||||
const auto go = expand7(go0 | go1);
|
||||
const auto ro = expand6((block >> (57 - 32)) & 0x3F);
|
||||
|
||||
const int32_t base = alpha >> 56;
|
||||
const int32_t mul = ( alpha >> 52 ) & 0xF;
|
||||
const auto tbl = g_alpha[( alpha >> 48 ) & 0xF];
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 );
|
||||
int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
|
||||
init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 );
|
||||
int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
|
||||
init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 );
|
||||
int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) );
|
||||
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t a = clampu8( base + amod * mul );
|
||||
uint8x8_t c = vqshrun_n_s16( col, 2 );
|
||||
dst[j*w+i] = vget_lane_u32( vreinterpret_u32_u8( c ), 0 ) | ( a << 24 );
|
||||
col = vaddq_s16( col, chco );
|
||||
}
|
||||
col = vaddq_s16( col, cvco );
|
||||
}
|
||||
#elif defined __SSE4_1__
|
||||
__m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 );
|
||||
__m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 );
|
||||
__m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0, 0, 0, 0, 0 );
|
||||
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t a = clampu8( base + amod * mul );
|
||||
__m128i c = _mm_srai_epi16( col, 2 );
|
||||
__m128i s = _mm_packus_epi16( c, c );
|
||||
dst[j*w+i] = _mm_cvtsi128_si32( s ) | ( a << 24 );
|
||||
col = _mm_add_epi16( col, chco );
|
||||
}
|
||||
col = _mm_add_epi16( col, cvco );
|
||||
}
|
||||
#else
|
||||
for (auto j = 0; j < 4; j++)
|
||||
{
|
||||
for (auto i = 0; i < 4; i++)
|
||||
{
|
||||
const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2;
|
||||
const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2;
|
||||
const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2;
|
||||
const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t a = clampu8( base + amod * mul );
|
||||
if( ( ( r | g | b ) & ~0xFF ) == 0 )
|
||||
{
|
||||
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto rc = clampu8( r );
|
||||
const auto gc = clampu8( g );
|
||||
const auto bc = clampu8( b );
|
||||
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline uint64_t ConvertByteOrder( uint64_t d )
|
||||
{
|
||||
uint32_t word[2];
|
||||
memcpy( word, &d, 8 );
|
||||
word[0] = _bswap( word[0] );
|
||||
word[1] = _bswap( word[1] );
|
||||
memcpy( &d, word, 8 );
|
||||
return d;
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeRGBPart( uint64_t d, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
d = ConvertByteOrder( d );
|
||||
|
||||
uint32_t br[2], bg[2], bb[2];
|
||||
|
||||
if( d & 0x2 )
|
||||
{
|
||||
int32_t dr, dg, db;
|
||||
|
||||
uint32_t r0 = ( d & 0xF8000000 ) >> 27;
|
||||
uint32_t g0 = ( d & 0x00F80000 ) >> 19;
|
||||
uint32_t b0 = ( d & 0x0000F800 ) >> 11;
|
||||
|
||||
dr = ( int32_t(d) << 5 ) >> 29;
|
||||
dg = ( int32_t(d) << 13 ) >> 29;
|
||||
db = ( int32_t(d) << 21 ) >> 29;
|
||||
|
||||
int32_t r1 = int32_t(r0) + dr;
|
||||
int32_t g1 = int32_t(g0) + dg;
|
||||
int32_t b1 = int32_t(b0) + db;
|
||||
|
||||
// T mode
|
||||
if ( (r1 < 0) || (r1 > 31) )
|
||||
{
|
||||
DecodeT( d, dst, w );
|
||||
return;
|
||||
}
|
||||
|
||||
// H mode
|
||||
if ((g1 < 0) || (g1 > 31))
|
||||
{
|
||||
DecodeH( d, dst, w );
|
||||
return;
|
||||
}
|
||||
|
||||
// P mode
|
||||
if( (b1 < 0) || (b1 > 31) )
|
||||
{
|
||||
DecodePlanar( d, dst, w );
|
||||
return;
|
||||
}
|
||||
|
||||
br[0] = ( r0 << 3 ) | ( r0 >> 2 );
|
||||
br[1] = ( r1 << 3 ) | ( r1 >> 2 );
|
||||
bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
|
||||
bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
|
||||
bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
|
||||
bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
|
||||
br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
|
||||
bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
|
||||
bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
|
||||
bb[0] = ( ( d & 0x0000F000 ) >> 8 ) | ( ( d & 0x0000F000 ) >> 12 );
|
||||
bb[1] = ( ( d & 0x00000F00 ) >> 4 ) | ( ( d & 0x00000F00 ) >> 8 );
|
||||
}
|
||||
|
||||
unsigned int tcw[2];
|
||||
tcw[0] = ( d & 0xE0 ) >> 5;
|
||||
tcw[1] = ( d & 0x1C ) >> 2;
|
||||
|
||||
uint32_t b1 = ( d >> 32 ) & 0xFFFF;
|
||||
uint32_t b2 = ( d >> 48 );
|
||||
|
||||
b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
|
||||
b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
|
||||
b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
|
||||
b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
|
||||
|
||||
b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
|
||||
b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
|
||||
b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
|
||||
b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
|
||||
|
||||
uint32_t idx = b1 | ( b2 << 1 );
|
||||
|
||||
if( d & 0x1 )
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
const auto mod = g_table[tcw[j/2]][idx & 0x3];
|
||||
const auto r = br[j/2] + mod;
|
||||
const auto g = bg[j/2] + mod;
|
||||
const auto b = bb[j/2] + mod;
|
||||
if( ( ( r | g | b ) & ~0xFF ) == 0 )
|
||||
{
|
||||
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto rc = clampu8( r );
|
||||
const auto gc = clampu8( g );
|
||||
const auto bc = clampu8( b );
|
||||
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
|
||||
}
|
||||
idx >>= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
const auto tbl = g_table[tcw[i/2]];
|
||||
const auto cr = br[i/2];
|
||||
const auto cg = bg[i/2];
|
||||
const auto cb = bb[i/2];
|
||||
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
const auto mod = tbl[idx & 0x3];
|
||||
const auto r = cr + mod;
|
||||
const auto g = cg + mod;
|
||||
const auto b = cb + mod;
|
||||
if( ( ( r | g | b ) & ~0xFF ) == 0 )
|
||||
{
|
||||
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto rc = clampu8( r );
|
||||
const auto gc = clampu8( g );
|
||||
const auto bc = clampu8( b );
|
||||
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000;
|
||||
}
|
||||
idx >>= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeRGBAPart( uint64_t d, uint64_t alpha, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
d = ConvertByteOrder( d );
|
||||
alpha = _bswap64( alpha );
|
||||
|
||||
uint32_t br[2], bg[2], bb[2];
|
||||
|
||||
if( d & 0x2 )
|
||||
{
|
||||
int32_t dr, dg, db;
|
||||
|
||||
uint32_t r0 = ( d & 0xF8000000 ) >> 27;
|
||||
uint32_t g0 = ( d & 0x00F80000 ) >> 19;
|
||||
uint32_t b0 = ( d & 0x0000F800 ) >> 11;
|
||||
|
||||
dr = ( int32_t(d) << 5 ) >> 29;
|
||||
dg = ( int32_t(d) << 13 ) >> 29;
|
||||
db = ( int32_t(d) << 21 ) >> 29;
|
||||
|
||||
int32_t r1 = int32_t(r0) + dr;
|
||||
int32_t g1 = int32_t(g0) + dg;
|
||||
int32_t b1 = int32_t(b0) + db;
|
||||
|
||||
// T mode
|
||||
if ( (r1 < 0) || (r1 > 31) )
|
||||
{
|
||||
DecodeTAlpha( d, alpha, dst, w );
|
||||
return;
|
||||
}
|
||||
|
||||
// H mode
|
||||
if ( (g1 < 0) || (g1 > 31) )
|
||||
{
|
||||
DecodeHAlpha( d, alpha, dst, w );
|
||||
return;
|
||||
}
|
||||
|
||||
// P mode
|
||||
if ( (b1 < 0) || (b1 > 31) )
|
||||
{
|
||||
DecodePlanarAlpha( d, alpha, dst, w );
|
||||
return;
|
||||
}
|
||||
|
||||
br[0] = ( r0 << 3 ) | ( r0 >> 2 );
|
||||
br[1] = ( r1 << 3 ) | ( r1 >> 2 );
|
||||
bg[0] = ( g0 << 3 ) | ( g0 >> 2 );
|
||||
bg[1] = ( g1 << 3 ) | ( g1 >> 2 );
|
||||
bb[0] = ( b0 << 3 ) | ( b0 >> 2 );
|
||||
bb[1] = ( b1 << 3 ) | ( b1 >> 2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 );
|
||||
br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 );
|
||||
bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 );
|
||||
bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 );
|
||||
bb[0] = ( ( d & 0x0000F000 ) >> 8 ) | ( ( d & 0x0000F000 ) >> 12 );
|
||||
bb[1] = ( ( d & 0x00000F00 ) >> 4 ) | ( ( d & 0x00000F00 ) >> 8 );
|
||||
}
|
||||
|
||||
unsigned int tcw[2];
|
||||
tcw[0] = ( d & 0xE0 ) >> 5;
|
||||
tcw[1] = ( d & 0x1C ) >> 2;
|
||||
|
||||
uint32_t b1 = ( d >> 32 ) & 0xFFFF;
|
||||
uint32_t b2 = ( d >> 48 );
|
||||
|
||||
b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF;
|
||||
b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F;
|
||||
b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333;
|
||||
b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555;
|
||||
|
||||
b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF;
|
||||
b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F;
|
||||
b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333;
|
||||
b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555;
|
||||
|
||||
uint32_t idx = b1 | ( b2 << 1 );
|
||||
|
||||
const int32_t base = alpha >> 56;
|
||||
const int32_t mul = ( alpha >> 52 ) & 0xF;
|
||||
const auto atbl = g_alpha[( alpha >> 48 ) & 0xF];
|
||||
|
||||
if( d & 0x1 )
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
const auto mod = g_table[tcw[j/2]][idx & 0x3];
|
||||
const auto r = br[j/2] + mod;
|
||||
const auto g = bg[j/2] + mod;
|
||||
const auto b = bb[j/2] + mod;
|
||||
const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t a = clampu8( base + amod * mul );
|
||||
if( ( ( r | g | b ) & ~0xFF ) == 0 )
|
||||
{
|
||||
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto rc = clampu8( r );
|
||||
const auto gc = clampu8( g );
|
||||
const auto bc = clampu8( b );
|
||||
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
|
||||
}
|
||||
idx >>= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
const auto tbl = g_table[tcw[i/2]];
|
||||
const auto cr = br[i/2];
|
||||
const auto cg = bg[i/2];
|
||||
const auto cb = bb[i/2];
|
||||
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
const auto mod = tbl[idx & 0x3];
|
||||
const auto r = cr + mod;
|
||||
const auto g = cg + mod;
|
||||
const auto b = cb + mod;
|
||||
const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t a = clampu8( base + amod * mul );
|
||||
if( ( ( r | g | b ) & ~0xFF ) == 0 )
|
||||
{
|
||||
dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto rc = clampu8( r );
|
||||
const auto gc = clampu8( g );
|
||||
const auto bc = clampu8( b );
|
||||
dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 );
|
||||
}
|
||||
idx >>= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeRPart( uint64_t r, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
r = _bswap64( r );
|
||||
|
||||
const int32_t base = ( r >> 56 )*8+4;
|
||||
const int32_t mul = ( r >> 52 ) & 0xF;
|
||||
const auto atbl = g_alpha[( r >> 48 ) & 0xF];
|
||||
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
for ( int j=0; j<4; j++ )
|
||||
{
|
||||
const auto amod = atbl[(r >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t rc = clampu8( ( base + amod * g_alpha11Mul[mul] )/8 );
|
||||
dst[j*w+i] = rc | 0xFF000000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline void DecodeRGPart( uint64_t r, uint64_t g, uint32_t* dst, uint32_t w )
|
||||
{
|
||||
r = _bswap64( r );
|
||||
g = _bswap64( g );
|
||||
|
||||
const int32_t rbase = ( r >> 56 )*8+4;
|
||||
const int32_t rmul = ( r >> 52 ) & 0xF;
|
||||
const auto rtbl = g_alpha[( r >> 48 ) & 0xF];
|
||||
|
||||
const int32_t gbase = ( g >> 56 )*8+4;
|
||||
const int32_t gmul = ( g >> 52 ) & 0xF;
|
||||
const auto gtbl = g_alpha[( g >> 48 ) & 0xF];
|
||||
|
||||
for( int i=0; i<4; i++ )
|
||||
{
|
||||
for( int j=0; j<4; j++ )
|
||||
{
|
||||
const auto rmod = rtbl[(r >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t rc = clampu8( ( rbase + rmod * g_alpha11Mul[rmul] )/8 );
|
||||
|
||||
const auto gmod = gtbl[(g >> ( 45 - j*3 - i*12 )) & 0x7];
|
||||
const uint32_t gc = clampu8( ( gbase + gmod * g_alpha11Mul[gmul] )/8 );
|
||||
|
||||
dst[j*w+i] = rc | (gc << 8) | 0xFF000000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeRBlock( const void* src, void* dst, size_t width )
|
||||
{
|
||||
uint64_t* srcPtr = (uint64_t*)src;
|
||||
uint64_t r = *srcPtr++;
|
||||
DecodeRPart( r, (uint32_t*)dst, width );
|
||||
}
|
||||
|
||||
void DecodeRGBlock( const void* src, void* dst, size_t width )
|
||||
{
|
||||
uint64_t* srcPtr = (uint64_t*)src;
|
||||
uint64_t r = *srcPtr++;
|
||||
uint64_t g = *srcPtr++;
|
||||
DecodeRGPart( r, g, (uint32_t*)dst, width );
|
||||
}
|
||||
|
||||
void DecodeRGBBlock( const void* src, void* dst, size_t width )
|
||||
{
|
||||
uint64_t* srcPtr = (uint64_t*)src;
|
||||
uint64_t d = *srcPtr++;
|
||||
DecodeRGBPart( d, (uint32_t*)dst, width );
|
||||
}
|
||||
|
||||
void DecodeRGBABlock( const void* src, void* dst, size_t width )
|
||||
{
|
||||
uint64_t* srcPtr = (uint64_t*)src;
|
||||
uint64_t a = *srcPtr++;
|
||||
uint64_t d = *srcPtr++;
|
||||
DecodeRGBAPart( d, a, (uint32_t*)dst, width );
|
||||
}
|
12
thirdparty/etcpak/DecodeRGB.hpp
vendored
Normal file
12
thirdparty/etcpak/DecodeRGB.hpp
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef __DECODERGB_HPP__
|
||||
#define __DECODERGB_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void DecodeRBlock( const void* src, void* dst, size_t width );
|
||||
void DecodeRGBlock( const void* src, void* dst, size_t width );
|
||||
void DecodeRGBBlock( const void* src, void* dst, size_t width );
|
||||
void DecodeRGBABlock( const void* src, void* dst, size_t width );
|
||||
|
||||
#endif
|
120
thirdparty/etcpak/Dither.cpp
vendored
Normal file
120
thirdparty/etcpak/Dither.cpp
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
|
||||
#include "Dither.hpp"
|
||||
#include "Math.hpp"
|
||||
#ifdef __SSE4_1__
|
||||
# ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
# include <Windows.h>
|
||||
# else
|
||||
# include <x86intrin.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 )
|
||||
{
|
||||
static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
|
||||
static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
|
||||
static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
|
||||
static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
|
||||
|
||||
const __m256i BayerAdd0 = _mm256_setr_epi8(
|
||||
a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
|
||||
a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
|
||||
);
|
||||
const __m256i BayerAdd1 = _mm256_setr_epi8(
|
||||
a31[8], a63[8], a31[8], 0, a31[9], a63[9], a31[9], 0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
|
||||
a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
|
||||
);
|
||||
const __m256i BayerSub0 = _mm256_setr_epi8(
|
||||
s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
|
||||
s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
|
||||
);
|
||||
const __m256i BayerSub1 = _mm256_setr_epi8(
|
||||
s31[8], s63[8], s31[8], 0, s31[9], s63[9], s31[9], 0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
|
||||
s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
|
||||
);
|
||||
|
||||
__m256i l0 = _mm256_inserti128_si256( _mm256_castsi128_si256( px0 ), px1, 1 );
|
||||
__m256i l1 = _mm256_inserti128_si256( _mm256_castsi128_si256( px2 ), px3, 1 );
|
||||
|
||||
__m256i a0 = _mm256_adds_epu8( l0, BayerAdd0 );
|
||||
__m256i a1 = _mm256_adds_epu8( l1, BayerAdd1 );
|
||||
__m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
|
||||
__m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
|
||||
|
||||
_mm256_storeu_si256( (__m256i*)(data ), s0 );
|
||||
_mm256_storeu_si256( (__m256i*)(data+32), s1 );
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
void Dither( uint8_t* data )
|
||||
{
|
||||
#ifdef __AVX2__
|
||||
static constexpr uint8_t a31[] = { 0, 0, 0, 1, 2, 0, 4, 0, 0, 2, 0, 0, 4, 0, 3, 0 };
|
||||
static constexpr uint8_t a63[] = { 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 1, 0 };
|
||||
static constexpr uint8_t s31[] = { 5, 0, 4, 0, 0, 2, 0, 1, 3, 0, 4, 0, 0, 0, 0, 2 };
|
||||
static constexpr uint8_t s63[] = { 2, 0, 2, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1 };
|
||||
|
||||
const __m256i BayerAdd0 = _mm256_setr_epi8(
|
||||
a31[0], a63[0], a31[0], 0, a31[1], a63[1], a31[1], 0, a31[2], a63[2], a31[2], 0, a31[3], a63[3], a31[3], 0,
|
||||
a31[4], a63[4], a31[4], 0, a31[5], a63[5], a31[5], 0, a31[6], a63[6], a31[6], 0, a31[7], a63[7], a31[7], 0
|
||||
);
|
||||
const __m256i BayerAdd1 = _mm256_setr_epi8(
|
||||
a31[8], a63[8], a31[8], 0, a31[9], a63[9], a31[9], 0, a31[10], a63[10], a31[10], 0, a31[11], a63[11], a31[11], 0,
|
||||
a31[12], a63[12], a31[12], 0, a31[13], a63[13], a31[13], 0, a31[14], a63[14], a31[14], 0, a31[15], a63[15], a31[15], 0
|
||||
);
|
||||
const __m256i BayerSub0 = _mm256_setr_epi8(
|
||||
s31[0], s63[0], s31[0], 0, s31[1], s63[1], s31[1], 0, s31[2], s63[2], s31[2], 0, s31[3], s63[3], s31[3], 0,
|
||||
s31[4], s63[4], s31[4], 0, s31[5], s63[5], s31[5], 0, s31[6], s63[6], s31[6], 0, s31[7], s63[7], s31[7], 0
|
||||
);
|
||||
const __m256i BayerSub1 = _mm256_setr_epi8(
|
||||
s31[8], s63[8], s31[8], 0, s31[9], s63[9], s31[9], 0, s31[10], s63[10], s31[10], 0, s31[11], s63[11], s31[11], 0,
|
||||
s31[12], s63[12], s31[12], 0, s31[13], s63[13], s31[13], 0, s31[14], s63[14], s31[14], 0, s31[15], s63[15], s31[15], 0
|
||||
);
|
||||
|
||||
__m256i px0 = _mm256_loadu_si256( (__m256i*)(data ) );
|
||||
__m256i px1 = _mm256_loadu_si256( (__m256i*)(data+32) );
|
||||
|
||||
__m256i a0 = _mm256_adds_epu8( px0, BayerAdd0 );
|
||||
__m256i a1 = _mm256_adds_epu8( px1, BayerAdd1 );
|
||||
__m256i s0 = _mm256_subs_epu8( a0, BayerSub0 );
|
||||
__m256i s1 = _mm256_subs_epu8( a1, BayerSub1 );
|
||||
|
||||
_mm256_storeu_si256( (__m256i*)(data ), s0 );
|
||||
_mm256_storeu_si256( (__m256i*)(data+32), s1 );
|
||||
#else
|
||||
static constexpr int8_t Bayer31[16] = {
|
||||
( 0-8)*2/3, ( 8-8)*2/3, ( 2-8)*2/3, (10-8)*2/3,
|
||||
(12-8)*2/3, ( 4-8)*2/3, (14-8)*2/3, ( 6-8)*2/3,
|
||||
( 3-8)*2/3, (11-8)*2/3, ( 1-8)*2/3, ( 9-8)*2/3,
|
||||
(15-8)*2/3, ( 7-8)*2/3, (13-8)*2/3, ( 5-8)*2/3
|
||||
};
|
||||
static constexpr int8_t Bayer63[16] = {
|
||||
( 0-8)*2/6, ( 8-8)*2/6, ( 2-8)*2/6, (10-8)*2/6,
|
||||
(12-8)*2/6, ( 4-8)*2/6, (14-8)*2/6, ( 6-8)*2/6,
|
||||
( 3-8)*2/6, (11-8)*2/6, ( 1-8)*2/6, ( 9-8)*2/6,
|
||||
(15-8)*2/6, ( 7-8)*2/6, (13-8)*2/6, ( 5-8)*2/6
|
||||
};
|
||||
|
||||
for( int i=0; i<16; i++ )
|
||||
{
|
||||
uint32_t col;
|
||||
memcpy( &col, data, 4 );
|
||||
uint8_t r = col & 0xFF;
|
||||
uint8_t g = ( col >> 8 ) & 0xFF;
|
||||
uint8_t b = ( col >> 16 ) & 0xFF;
|
||||
|
||||
r = clampu8( r + Bayer31[i] );
|
||||
g = clampu8( g + Bayer63[i] );
|
||||
b = clampu8( b + Bayer31[i] );
|
||||
|
||||
col = r | ( g << 8 ) | ( b << 16 );
|
||||
memcpy( data, &col, 4 );
|
||||
data += 4;
|
||||
}
|
||||
#endif
|
||||
}
|
21
thirdparty/etcpak/Dither.hpp
vendored
Normal file
21
thirdparty/etcpak/Dither.hpp
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef __DITHER_HPP__
|
||||
#define __DITHER_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __AVX2__
|
||||
# ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
# else
|
||||
# include <x86intrin.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
void Dither( uint8_t* data );
|
||||
|
||||
#ifdef __AVX2__
|
||||
void DitherAvx2( uint8_t* data, __m128i px0, __m128i px1, __m128i px2, __m128i px3 );
|
||||
#endif
|
||||
|
||||
#endif
|
20
thirdparty/etcpak/ForceInline.hpp
vendored
Normal file
20
thirdparty/etcpak/ForceInline.hpp
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef __FORCEINLINE_HPP__
|
||||
#define __FORCEINLINE_HPP__
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define etcpak_force_inline __attribute__((always_inline)) inline
|
||||
#elif defined(_MSC_VER)
|
||||
# define etcpak_force_inline __forceinline
|
||||
#else
|
||||
# define etcpak_force_inline inline
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define etcpak_no_inline __attribute__((noinline))
|
||||
#elif defined(_MSC_VER)
|
||||
# define etcpak_no_inline __declspec(noinline)
|
||||
#else
|
||||
# define etcpak_no_inline
|
||||
#endif
|
||||
|
||||
#endif
|
26
thirdparty/etcpak/LICENSE.txt
vendored
Normal file
26
thirdparty/etcpak/LICENSE.txt
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
etcpak, an extremely fast ETC compression utility (https://github.com/wolfpld/etcpak)
|
||||
|
||||
Copyright (c) 2013-2022, Bartosz Taudul <wolf@nereid.pl>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the <organization> nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
92
thirdparty/etcpak/Math.hpp
vendored
Normal file
92
thirdparty/etcpak/Math.hpp
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
#ifndef __DARKRL__MATH_HPP__
|
||||
#define __DARKRL__MATH_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "ForceInline.hpp"
|
||||
|
||||
template<typename T>
|
||||
static etcpak_force_inline T AlignPOT( T val )
|
||||
{
|
||||
if( val == 0 ) return 1;
|
||||
val--;
|
||||
for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
|
||||
{
|
||||
val |= val >> i;
|
||||
}
|
||||
return val + 1;
|
||||
}
|
||||
|
||||
static etcpak_force_inline int CountSetBits( uint32_t val )
|
||||
{
|
||||
val -= ( val >> 1 ) & 0x55555555;
|
||||
val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
|
||||
val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
|
||||
val += val >> 8;
|
||||
val += val >> 16;
|
||||
return val & 0x0000003f;
|
||||
}
|
||||
|
||||
static etcpak_force_inline int CountLeadingZeros( uint32_t val )
|
||||
{
|
||||
val |= val >> 1;
|
||||
val |= val >> 2;
|
||||
val |= val >> 4;
|
||||
val |= val >> 8;
|
||||
val |= val >> 16;
|
||||
return 32 - CountSetBits( val );
|
||||
}
|
||||
|
||||
static etcpak_force_inline float sRGB2linear( float v )
|
||||
{
|
||||
const float a = 0.055f;
|
||||
if( v <= 0.04045f )
|
||||
{
|
||||
return v / 12.92f;
|
||||
}
|
||||
else
|
||||
{
|
||||
return pow( ( v + a ) / ( 1 + a ), 2.4f );
|
||||
}
|
||||
}
|
||||
|
||||
static etcpak_force_inline float linear2sRGB( float v )
|
||||
{
|
||||
const float a = 0.055f;
|
||||
if( v <= 0.0031308f )
|
||||
{
|
||||
return 12.92f * v;
|
||||
}
|
||||
else
|
||||
{
|
||||
return ( 1 + a ) * pow( v, 1/2.4f ) - a;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static etcpak_force_inline T SmoothStep( T x )
|
||||
{
|
||||
return x*x*(3-2*x);
|
||||
}
|
||||
|
||||
static etcpak_force_inline uint8_t clampu8( int32_t val )
|
||||
{
|
||||
if( ( val & ~0xFF ) == 0 ) return val;
|
||||
return ( ( ~val ) >> 31 ) & 0xFF;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static etcpak_force_inline T sq( T val )
|
||||
{
|
||||
return val * val;
|
||||
}
|
||||
|
||||
static etcpak_force_inline int mul8bit( int a, int b )
|
||||
{
|
||||
int t = a*b + 128;
|
||||
return ( t + ( t >> 8 ) ) >> 8;
|
||||
}
|
||||
|
||||
#endif
|
50
thirdparty/etcpak/ProcessCommon.hpp
vendored
Normal file
50
thirdparty/etcpak/ProcessCommon.hpp
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
#ifndef __PROCESSCOMMON_HPP__
|
||||
#define __PROCESSCOMMON_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
template<class T>
|
||||
static size_t GetLeastError( const T* err, size_t num )
|
||||
{
|
||||
size_t idx = 0;
|
||||
for( size_t i=1; i<num; i++ )
|
||||
{
|
||||
if( err[i] < err[idx] )
|
||||
{
|
||||
idx = i;
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
static uint64_t FixByteOrder( uint64_t d )
|
||||
{
|
||||
return ( ( d & 0x00000000FFFFFFFF ) ) |
|
||||
( ( d & 0xFF00000000000000 ) >> 24 ) |
|
||||
( ( d & 0x000000FF00000000 ) << 24 ) |
|
||||
( ( d & 0x00FF000000000000 ) >> 8 ) |
|
||||
( ( d & 0x0000FF0000000000 ) << 8 );
|
||||
}
|
||||
|
||||
template<class T, class S>
|
||||
static uint64_t EncodeSelectors( uint64_t d, const T terr[2][8], const S tsel[16][8], const uint32_t* id )
|
||||
{
|
||||
size_t tidx[2];
|
||||
tidx[0] = GetLeastError( terr[0], 8 );
|
||||
tidx[1] = GetLeastError( terr[1], 8 );
|
||||
|
||||
d |= tidx[0] << 26;
|
||||
d |= tidx[1] << 29;
|
||||
for( int i=0; i<16; i++ )
|
||||
{
|
||||
uint64_t t = tsel[i][tidx[id[i]%2]];
|
||||
d |= ( t & 0x1 ) << ( i + 32 );
|
||||
d |= ( t & 0x2 ) << ( i + 47 );
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
#endif
|
1086
thirdparty/etcpak/ProcessDxtc.cpp
vendored
Normal file
1086
thirdparty/etcpak/ProcessDxtc.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
14
thirdparty/etcpak/ProcessDxtc.hpp
vendored
Normal file
14
thirdparty/etcpak/ProcessDxtc.hpp
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef __PROCESSDXT1_HPP__
|
||||
#define __PROCESSDXT1_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void CompressBc1( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
void CompressBc1Dither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
void CompressBc3( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
|
||||
void CompressBc4( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
|
||||
#endif
|
4210
thirdparty/etcpak/ProcessRGB.cpp
vendored
Normal file
4210
thirdparty/etcpak/ProcessRGB.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
14
thirdparty/etcpak/ProcessRGB.hpp
vendored
Normal file
14
thirdparty/etcpak/ProcessRGB.hpp
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef __PROCESSRGB_HPP__
|
||||
#define __PROCESSRGB_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void CompressEtc1Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
void CompressEtc1RgbDither( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
void CompressEtc2Rgb( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
|
||||
void CompressEtc2Rgba( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, bool useHeuristics );
|
||||
|
||||
void CompressEacR( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
void CompressEacRg( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
|
||||
#endif
|
223
thirdparty/etcpak/Tables.cpp
vendored
Normal file
223
thirdparty/etcpak/Tables.cpp
vendored
Normal file
@@ -0,0 +1,223 @@
|
||||
#include "Tables.hpp"
|
||||
|
||||
const int32_t g_table[8][4] = {
|
||||
{ 2, 8, -2, -8 },
|
||||
{ 5, 17, -5, -17 },
|
||||
{ 9, 29, -9, -29 },
|
||||
{ 13, 42, -13, -42 },
|
||||
{ 18, 60, -18, -60 },
|
||||
{ 24, 80, -24, -80 },
|
||||
{ 33, 106, -33, -106 },
|
||||
{ 47, 183, -47, -183 }
|
||||
};
|
||||
|
||||
const int64_t g_table256[8][4] = {
|
||||
{ 2*256, 8*256, -2*256, -8*256 },
|
||||
{ 5*256, 17*256, -5*256, -17*256 },
|
||||
{ 9*256, 29*256, -9*256, -29*256 },
|
||||
{ 13*256, 42*256, -13*256, -42*256 },
|
||||
{ 18*256, 60*256, -18*256, -60*256 },
|
||||
{ 24*256, 80*256, -24*256, -80*256 },
|
||||
{ 33*256, 106*256, -33*256, -106*256 },
|
||||
{ 47*256, 183*256, -47*256, -183*256 }
|
||||
};
|
||||
|
||||
const uint32_t g_id[4][16] = {
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
|
||||
{ 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
|
||||
{ 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
|
||||
};
|
||||
|
||||
const uint32_t g_avg2[16] = {
|
||||
0x00,
|
||||
0x11,
|
||||
0x22,
|
||||
0x33,
|
||||
0x44,
|
||||
0x55,
|
||||
0x66,
|
||||
0x77,
|
||||
0x88,
|
||||
0x99,
|
||||
0xAA,
|
||||
0xBB,
|
||||
0xCC,
|
||||
0xDD,
|
||||
0xEE,
|
||||
0xFF
|
||||
};
|
||||
|
||||
const uint32_t g_flags[64] = {
|
||||
0x80800402, 0x80800402, 0x80800402, 0x80800402,
|
||||
0x80800402, 0x80800402, 0x80800402, 0x8080E002,
|
||||
0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
|
||||
0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
|
||||
0x80000402, 0x80000402, 0x80000402, 0x80000402,
|
||||
0x80000402, 0x80000402, 0x80000402, 0x8000E002,
|
||||
0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
|
||||
0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
|
||||
0x00800402, 0x00800402, 0x00800402, 0x00800402,
|
||||
0x00800402, 0x00800402, 0x00800402, 0x0080E002,
|
||||
0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
|
||||
0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
|
||||
0x00000402, 0x00000402, 0x00000402, 0x00000402,
|
||||
0x00000402, 0x00000402, 0x00000402, 0x0000E002,
|
||||
0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
|
||||
0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
|
||||
};
|
||||
|
||||
const int32_t g_alpha[16][8] = {
|
||||
{ -3, -6, -9, -15, 2, 5, 8, 14 },
|
||||
{ -3, -7, -10, -13, 2, 6, 9, 12 },
|
||||
{ -2, -5, -8, -13, 1, 4, 7, 12 },
|
||||
{ -2, -4, -6, -13, 1, 3, 5, 12 },
|
||||
{ -3, -6, -8, -12, 2, 5, 7, 11 },
|
||||
{ -3, -7, -9, -11, 2, 6, 8, 10 },
|
||||
{ -4, -7, -8, -11, 3, 6, 7, 10 },
|
||||
{ -3, -5, -8, -11, 2, 4, 7, 10 },
|
||||
{ -2, -6, -8, -10, 1, 5, 7, 9 },
|
||||
{ -2, -5, -8, -10, 1, 4, 7, 9 },
|
||||
{ -2, -4, -8, -10, 1, 3, 7, 9 },
|
||||
{ -2, -5, -7, -10, 1, 4, 6, 9 },
|
||||
{ -3, -4, -7, -10, 2, 3, 6, 9 },
|
||||
{ -1, -2, -3, -10, 0, 1, 2, 9 },
|
||||
{ -4, -6, -8, -9, 3, 5, 7, 8 },
|
||||
{ -3, -5, -7, -9, 2, 4, 6, 8 }
|
||||
};
|
||||
|
||||
const int32_t g_alpha11Mul[16] = { 1, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120 };
|
||||
|
||||
const int32_t g_alphaRange[16] = {
|
||||
0x100FF / ( 1 + g_alpha[0][7] - g_alpha[0][3] ),
|
||||
0x100FF / ( 1 + g_alpha[1][7] - g_alpha[1][3] ),
|
||||
0x100FF / ( 1 + g_alpha[2][7] - g_alpha[2][3] ),
|
||||
0x100FF / ( 1 + g_alpha[3][7] - g_alpha[3][3] ),
|
||||
0x100FF / ( 1 + g_alpha[4][7] - g_alpha[4][3] ),
|
||||
0x100FF / ( 1 + g_alpha[5][7] - g_alpha[5][3] ),
|
||||
0x100FF / ( 1 + g_alpha[6][7] - g_alpha[6][3] ),
|
||||
0x100FF / ( 1 + g_alpha[7][7] - g_alpha[7][3] ),
|
||||
0x100FF / ( 1 + g_alpha[8][7] - g_alpha[8][3] ),
|
||||
0x100FF / ( 1 + g_alpha[9][7] - g_alpha[9][3] ),
|
||||
0x100FF / ( 1 + g_alpha[10][7] - g_alpha[10][3] ),
|
||||
0x100FF / ( 1 + g_alpha[11][7] - g_alpha[11][3] ),
|
||||
0x100FF / ( 1 + g_alpha[12][7] - g_alpha[12][3] ),
|
||||
0x100FF / ( 1 + g_alpha[13][7] - g_alpha[13][3] ),
|
||||
0x100FF / ( 1 + g_alpha[14][7] - g_alpha[14][3] ),
|
||||
0x100FF / ( 1 + g_alpha[15][7] - g_alpha[15][3] ),
|
||||
};
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
const __m128i g_table_SIMD[2] =
|
||||
{
|
||||
_mm_setr_epi16( 2, 5, 9, 13, 18, 24, 33, 47),
|
||||
_mm_setr_epi16( 8, 17, 29, 42, 60, 80, 106, 183)
|
||||
};
|
||||
const __m128i g_table128_SIMD[2] =
|
||||
{
|
||||
_mm_setr_epi16( 2*128, 5*128, 9*128, 13*128, 18*128, 24*128, 33*128, 47*128),
|
||||
_mm_setr_epi16( 8*128, 17*128, 29*128, 42*128, 60*128, 80*128, 106*128, 183*128)
|
||||
};
|
||||
const __m128i g_table256_SIMD[4] =
|
||||
{
|
||||
_mm_setr_epi32( 2*256, 5*256, 9*256, 13*256),
|
||||
_mm_setr_epi32( 8*256, 17*256, 29*256, 42*256),
|
||||
_mm_setr_epi32( 18*256, 24*256, 33*256, 47*256),
|
||||
_mm_setr_epi32( 60*256, 80*256, 106*256, 183*256)
|
||||
};
|
||||
|
||||
const __m128i g_alpha_SIMD[16] = {
|
||||
_mm_setr_epi16( g_alpha[ 0][0], g_alpha[ 0][1], g_alpha[ 0][2], g_alpha[ 0][3], g_alpha[ 0][4], g_alpha[ 0][5], g_alpha[ 0][6], g_alpha[ 0][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 1][0], g_alpha[ 1][1], g_alpha[ 1][2], g_alpha[ 1][3], g_alpha[ 1][4], g_alpha[ 1][5], g_alpha[ 1][6], g_alpha[ 1][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 2][0], g_alpha[ 2][1], g_alpha[ 2][2], g_alpha[ 2][3], g_alpha[ 2][4], g_alpha[ 2][5], g_alpha[ 2][6], g_alpha[ 2][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 3][0], g_alpha[ 3][1], g_alpha[ 3][2], g_alpha[ 3][3], g_alpha[ 3][4], g_alpha[ 3][5], g_alpha[ 3][6], g_alpha[ 3][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 4][0], g_alpha[ 4][1], g_alpha[ 4][2], g_alpha[ 4][3], g_alpha[ 4][4], g_alpha[ 4][5], g_alpha[ 4][6], g_alpha[ 4][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 5][0], g_alpha[ 5][1], g_alpha[ 5][2], g_alpha[ 5][3], g_alpha[ 5][4], g_alpha[ 5][5], g_alpha[ 5][6], g_alpha[ 5][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 6][0], g_alpha[ 6][1], g_alpha[ 6][2], g_alpha[ 6][3], g_alpha[ 6][4], g_alpha[ 6][5], g_alpha[ 6][6], g_alpha[ 6][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 7][0], g_alpha[ 7][1], g_alpha[ 7][2], g_alpha[ 7][3], g_alpha[ 7][4], g_alpha[ 7][5], g_alpha[ 7][6], g_alpha[ 7][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 8][0], g_alpha[ 8][1], g_alpha[ 8][2], g_alpha[ 8][3], g_alpha[ 8][4], g_alpha[ 8][5], g_alpha[ 8][6], g_alpha[ 8][7] ),
|
||||
_mm_setr_epi16( g_alpha[ 9][0], g_alpha[ 9][1], g_alpha[ 9][2], g_alpha[ 9][3], g_alpha[ 9][4], g_alpha[ 9][5], g_alpha[ 9][6], g_alpha[ 9][7] ),
|
||||
_mm_setr_epi16( g_alpha[10][0], g_alpha[10][1], g_alpha[10][2], g_alpha[10][3], g_alpha[10][4], g_alpha[10][5], g_alpha[10][6], g_alpha[10][7] ),
|
||||
_mm_setr_epi16( g_alpha[11][0], g_alpha[11][1], g_alpha[11][2], g_alpha[11][3], g_alpha[11][4], g_alpha[11][5], g_alpha[11][6], g_alpha[11][7] ),
|
||||
_mm_setr_epi16( g_alpha[12][0], g_alpha[12][1], g_alpha[12][2], g_alpha[12][3], g_alpha[12][4], g_alpha[12][5], g_alpha[12][6], g_alpha[12][7] ),
|
||||
_mm_setr_epi16( g_alpha[13][0], g_alpha[13][1], g_alpha[13][2], g_alpha[13][3], g_alpha[13][4], g_alpha[13][5], g_alpha[13][6], g_alpha[13][7] ),
|
||||
_mm_setr_epi16( g_alpha[14][0], g_alpha[14][1], g_alpha[14][2], g_alpha[14][3], g_alpha[14][4], g_alpha[14][5], g_alpha[14][6], g_alpha[14][7] ),
|
||||
_mm_setr_epi16( g_alpha[15][0], g_alpha[15][1], g_alpha[15][2], g_alpha[15][3], g_alpha[15][4], g_alpha[15][5], g_alpha[15][6], g_alpha[15][7] ),
|
||||
};
|
||||
|
||||
const __m128i g_alphaRange_SIMD = _mm_setr_epi16(
|
||||
g_alphaRange[0],
|
||||
g_alphaRange[1],
|
||||
g_alphaRange[4],
|
||||
g_alphaRange[5],
|
||||
g_alphaRange[8],
|
||||
g_alphaRange[14],
|
||||
0,
|
||||
0 );
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
const __m256i g_alpha_AVX[8] = {
|
||||
_mm256_setr_epi16( g_alpha[ 0][0], g_alpha[ 1][0], g_alpha[ 2][0], g_alpha[ 3][0], g_alpha[ 4][0], g_alpha[ 5][0], g_alpha[ 6][0], g_alpha[ 7][0], g_alpha[ 8][0], g_alpha[ 9][0], g_alpha[10][0], g_alpha[11][0], g_alpha[12][0], g_alpha[13][0], g_alpha[14][0], g_alpha[15][0] ),
|
||||
_mm256_setr_epi16( g_alpha[ 0][1], g_alpha[ 1][1], g_alpha[ 2][1], g_alpha[ 3][1], g_alpha[ 4][1], g_alpha[ 5][1], g_alpha[ 6][1], g_alpha[ 7][1], g_alpha[ 8][1], g_alpha[ 9][1], g_alpha[10][1], g_alpha[11][1], g_alpha[12][1], g_alpha[13][1], g_alpha[14][1], g_alpha[15][1] ),
|
||||
_mm256_setr_epi16( g_alpha[ 0][2], g_alpha[ 1][2], g_alpha[ 2][2], g_alpha[ 3][2], g_alpha[ 4][2], g_alpha[ 5][2], g_alpha[ 6][2], g_alpha[ 7][2], g_alpha[ 8][2], g_alpha[ 9][2], g_alpha[10][2], g_alpha[11][2], g_alpha[12][2], g_alpha[13][2], g_alpha[14][2], g_alpha[15][2] ),
|
||||
_mm256_setr_epi16( g_alpha[ 0][3], g_alpha[ 1][3], g_alpha[ 2][3], g_alpha[ 3][3], g_alpha[ 4][3], g_alpha[ 5][3], g_alpha[ 6][3], g_alpha[ 7][3], g_alpha[ 8][3], g_alpha[ 9][3], g_alpha[10][3], g_alpha[11][3], g_alpha[12][3], g_alpha[13][3], g_alpha[14][3], g_alpha[15][3] ),
|
||||
_mm256_setr_epi16( g_alpha[ 0][4], g_alpha[ 1][4], g_alpha[ 2][4], g_alpha[ 3][4], g_alpha[ 4][4], g_alpha[ 5][4], g_alpha[ 6][4], g_alpha[ 7][4], g_alpha[ 8][4], g_alpha[ 9][4], g_alpha[10][4], g_alpha[11][4], g_alpha[12][4], g_alpha[13][4], g_alpha[14][4], g_alpha[15][4] ),
|
||||
_mm256_setr_epi16( g_alpha[ 0][5], g_alpha[ 1][5], g_alpha[ 2][5], g_alpha[ 3][5], g_alpha[ 4][5], g_alpha[ 5][5], g_alpha[ 6][5], g_alpha[ 7][5], g_alpha[ 8][5], g_alpha[ 9][5], g_alpha[10][5], g_alpha[11][5], g_alpha[12][5], g_alpha[13][5], g_alpha[14][5], g_alpha[15][5] ),
|
||||
_mm256_setr_epi16( g_alpha[ 0][6], g_alpha[ 1][6], g_alpha[ 2][6], g_alpha[ 3][6], g_alpha[ 4][6], g_alpha[ 5][6], g_alpha[ 6][6], g_alpha[ 7][6], g_alpha[ 8][6], g_alpha[ 9][6], g_alpha[10][6], g_alpha[11][6], g_alpha[12][6], g_alpha[13][6], g_alpha[14][6], g_alpha[15][6] ),
|
||||
_mm256_setr_epi16( g_alpha[ 0][7], g_alpha[ 1][7], g_alpha[ 2][7], g_alpha[ 3][7], g_alpha[ 4][7], g_alpha[ 5][7], g_alpha[ 6][7], g_alpha[ 7][7], g_alpha[ 8][7], g_alpha[ 9][7], g_alpha[10][7], g_alpha[11][7], g_alpha[12][7], g_alpha[13][7], g_alpha[14][7], g_alpha[15][7] ),
|
||||
};
|
||||
|
||||
const __m256i g_alphaRange_AVX = _mm256_setr_epi16(
|
||||
g_alphaRange[ 0], g_alphaRange[ 1], g_alphaRange[ 2], g_alphaRange[ 3], g_alphaRange[ 4], g_alphaRange[ 5], g_alphaRange[ 6], g_alphaRange[ 7],
|
||||
g_alphaRange[ 8], g_alphaRange[ 9], g_alphaRange[10], g_alphaRange[11], g_alphaRange[12], g_alphaRange[13], g_alphaRange[14], g_alphaRange[15]
|
||||
);
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
const int16x8_t g_table128_NEON[2] =
|
||||
{
|
||||
{ 2*128, 5*128, 9*128, 13*128, 18*128, 24*128, 33*128, 47*128 },
|
||||
{ 8*128, 17*128, 29*128, 42*128, 60*128, 80*128, 106*128, 183*128 }
|
||||
};
|
||||
|
||||
const int32x4_t g_table256_NEON[4] =
|
||||
{
|
||||
{ 2*256, 5*256, 9*256, 13*256 },
|
||||
{ 8*256, 17*256, 29*256, 42*256 },
|
||||
{ 18*256, 24*256, 33*256, 47*256 },
|
||||
{ 60*256, 80*256, 106*256, 183*256 }
|
||||
};
|
||||
|
||||
const int16x8_t g_alpha_NEON[16] =
|
||||
{
|
||||
{ -3, -6, -9, -15, 2, 5, 8, 14 },
|
||||
{ -3, -7, -10, -13, 2, 6, 9, 12 },
|
||||
{ -2, -5, -8, -13, 1, 4, 7, 12 },
|
||||
{ -2, -4, -6, -13, 1, 3, 5, 12 },
|
||||
{ -3, -6, -8, -12, 2, 5, 7, 11 },
|
||||
{ -3, -7, -9, -11, 2, 6, 8, 10 },
|
||||
{ -4, -7, -8, -11, 3, 6, 7, 10 },
|
||||
{ -3, -5, -8, -11, 2, 4, 7, 10 },
|
||||
{ -2, -6, -8, -10, 1, 5, 7, 9 },
|
||||
{ -2, -5, -8, -10, 1, 4, 7, 9 },
|
||||
{ -2, -4, -8, -10, 1, 3, 7, 9 },
|
||||
{ -2, -5, -7, -10, 1, 4, 6, 9 },
|
||||
{ -3, -4, -7, -10, 2, 3, 6, 9 },
|
||||
{ -1, -2, -3, -10, 0, 1, 2, 9 },
|
||||
{ -4, -6, -8, -9, 3, 5, 7, 8 },
|
||||
{ -3, -5, -7, -9, 2, 4, 6, 8 }
|
||||
};
|
||||
|
||||
const int16x8_t g_alphaRange_NEON =
|
||||
{
|
||||
(int16_t)g_alphaRange[0],
|
||||
(int16_t)g_alphaRange[1],
|
||||
(int16_t)g_alphaRange[4],
|
||||
(int16_t)g_alphaRange[5],
|
||||
(int16_t)g_alphaRange[8],
|
||||
(int16_t)g_alphaRange[14],
|
||||
0,
|
||||
0
|
||||
};
|
||||
#endif
|
50
thirdparty/etcpak/Tables.hpp
vendored
Normal file
50
thirdparty/etcpak/Tables.hpp
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
#ifndef __TABLES_HPP__
|
||||
#define __TABLES_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __AVX2__
|
||||
# include <immintrin.h>
|
||||
#endif
|
||||
#ifdef __SSE4_1__
|
||||
# include <smmintrin.h>
|
||||
#endif
|
||||
#ifdef __ARM_NEON
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
extern const int32_t g_table[8][4];
|
||||
extern const int64_t g_table256[8][4];
|
||||
|
||||
extern const uint32_t g_id[4][16];
|
||||
|
||||
extern const uint32_t g_avg2[16];
|
||||
|
||||
extern const uint32_t g_flags[64];
|
||||
|
||||
extern const int32_t g_alpha[16][8];
|
||||
extern const int32_t g_alpha11Mul[16];
|
||||
extern const int32_t g_alphaRange[16];
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
extern const __m128i g_table_SIMD[2];
|
||||
extern const __m128i g_table128_SIMD[2];
|
||||
extern const __m128i g_table256_SIMD[4];
|
||||
|
||||
extern const __m128i g_alpha_SIMD[16];
|
||||
extern const __m128i g_alphaRange_SIMD;
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
extern const __m256i g_alpha_AVX[8];
|
||||
extern const __m256i g_alphaRange_AVX;
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
extern const int16x8_t g_table128_NEON[2];
|
||||
extern const int32x4_t g_table256_NEON[4];
|
||||
extern const int16x8_t g_alpha_NEON[16];
|
||||
extern const int16x8_t g_alphaRange_NEON;
|
||||
#endif
|
||||
|
||||
#endif
|
222
thirdparty/etcpak/Vector.hpp
vendored
Normal file
222
thirdparty/etcpak/Vector.hpp
vendored
Normal file
@@ -0,0 +1,222 @@
|
||||
#ifndef __DARKRL__VECTOR_HPP__
|
||||
#define __DARKRL__VECTOR_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <algorithm>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "Math.hpp"
|
||||
|
||||
template<class T>
|
||||
struct Vector2
|
||||
{
|
||||
Vector2() : x( 0 ), y( 0 ) {}
|
||||
Vector2( T v ) : x( v ), y( v ) {}
|
||||
Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
|
||||
|
||||
bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
|
||||
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
|
||||
|
||||
Vector2<T>& operator+=( const Vector2<T>& rhs )
|
||||
{
|
||||
x += rhs.x;
|
||||
y += rhs.y;
|
||||
return *this;
|
||||
}
|
||||
Vector2<T>& operator-=( const Vector2<T>& rhs )
|
||||
{
|
||||
x -= rhs.x;
|
||||
y -= rhs.y;
|
||||
return *this;
|
||||
}
|
||||
Vector2<T>& operator*=( const Vector2<T>& rhs )
|
||||
{
|
||||
x *= rhs.x;
|
||||
y *= rhs.y;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T x, y;
|
||||
};
|
||||
|
||||
template<class T>
|
||||
Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
|
||||
{
|
||||
return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
|
||||
{
|
||||
return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
|
||||
{
|
||||
return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
|
||||
{
|
||||
return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
|
||||
}
|
||||
|
||||
|
||||
typedef Vector2<int32_t> v2i;
|
||||
typedef Vector2<float> v2f;
|
||||
|
||||
|
||||
template<class T>
|
||||
struct Vector3
|
||||
{
|
||||
Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
|
||||
Vector3( T v ) : x( v ), y( v ), z( v ) {}
|
||||
Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
|
||||
template<class Y>
|
||||
Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
|
||||
|
||||
T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
|
||||
void Clamp()
|
||||
{
|
||||
x = std::min( T(1), std::max( T(0), x ) );
|
||||
y = std::min( T(1), std::max( T(0), y ) );
|
||||
z = std::min( T(1), std::max( T(0), z ) );
|
||||
}
|
||||
|
||||
bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
|
||||
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
|
||||
|
||||
T& operator[]( unsigned int idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
|
||||
const T& operator[]( unsigned int idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
|
||||
|
||||
Vector3<T> operator+=( const Vector3<T>& rhs )
|
||||
{
|
||||
x += rhs.x;
|
||||
y += rhs.y;
|
||||
z += rhs.z;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector3<T> operator*=( const Vector3<T>& rhs )
|
||||
{
|
||||
x *= rhs.x;
|
||||
y *= rhs.y;
|
||||
z *= rhs.z;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector3<T> operator*=( const float& rhs )
|
||||
{
|
||||
x *= rhs;
|
||||
y *= rhs;
|
||||
z *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T x, y, z;
|
||||
T padding;
|
||||
};
|
||||
|
||||
template<class T>
|
||||
Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
|
||||
{
|
||||
return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
|
||||
{
|
||||
return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
|
||||
{
|
||||
return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
|
||||
{
|
||||
return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
|
||||
{
|
||||
return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
|
||||
{
|
||||
return lhs.Luminance() < rhs.Luminance();
|
||||
}
|
||||
|
||||
typedef Vector3<int32_t> v3i;
|
||||
typedef Vector3<float> v3f;
|
||||
typedef Vector3<uint8_t> v3b;
|
||||
|
||||
|
||||
static inline v3b v3f_to_v3b( const v3f& v )
|
||||
{
|
||||
return v3b( uint8_t( std::min( 1.f, v.x ) * 255 ), uint8_t( std::min( 1.f, v.y ) * 255 ), uint8_t( std::min( 1.f, v.z ) * 255 ) );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
|
||||
{
|
||||
return v1 + ( v2 - v1 ) * amount;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
|
||||
{
|
||||
return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> Desaturate( const Vector3<T>& v )
|
||||
{
|
||||
T l = v.Luminance();
|
||||
return Vector3<T>( l, l, l );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> Desaturate( const Vector3<T>& v, float mul )
|
||||
{
|
||||
T l = T( v.Luminance() * mul );
|
||||
return Vector3<T>( l, l, l );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> pow( const Vector3<T>& base, float exponent )
|
||||
{
|
||||
return Vector3<T>(
|
||||
pow( base.x, exponent ),
|
||||
pow( base.y, exponent ),
|
||||
pow( base.z, exponent ) );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> sRGB2linear( const Vector3<T>& v )
|
||||
{
|
||||
return Vector3<T>(
|
||||
sRGB2linear( v.x ),
|
||||
sRGB2linear( v.y ),
|
||||
sRGB2linear( v.z ) );
|
||||
}
|
||||
|
||||
template<class T>
|
||||
Vector3<T> linear2sRGB( const Vector3<T>& v )
|
||||
{
|
||||
return Vector3<T>(
|
||||
linear2sRGB( v.x ),
|
||||
linear2sRGB( v.y ),
|
||||
linear2sRGB( v.z ) );
|
||||
}
|
||||
|
||||
#endif
|
52
thirdparty/etcpak/patches/0001-remove-bc7enc.patch
vendored
Normal file
52
thirdparty/etcpak/patches/0001-remove-bc7enc.patch
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
diff --git a/thirdparty/etcpak/ProcessDxtc.cpp b/thirdparty/etcpak/ProcessDxtc.cpp
|
||||
index 5373b75cdc..e1bc6a5cb6 100644
|
||||
--- a/thirdparty/etcpak/ProcessDxtc.cpp
|
||||
+++ b/thirdparty/etcpak/ProcessDxtc.cpp
|
||||
@@ -1,4 +1,3 @@
|
||||
-#include "bc7enc.h"
|
||||
#include "Dither.hpp"
|
||||
#include "ForceInline.hpp"
|
||||
#include "ProcessDxtc.hpp"
|
||||
@@ -1085,29 +1084,3 @@ void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t wi
|
||||
#endif
|
||||
} while( --blocks );
|
||||
}
|
||||
-
|
||||
-void CompressBc7( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, const bc7enc_compress_block_params* params )
|
||||
-{
|
||||
- int i = 0;
|
||||
- auto ptr = dst;
|
||||
- do
|
||||
- {
|
||||
- uint32_t rgba[4*4];
|
||||
-
|
||||
- auto tmp = (char*)rgba;
|
||||
- memcpy( tmp, src + width * 0, 4*4 );
|
||||
- memcpy( tmp + 4*4, src + width * 1, 4*4 );
|
||||
- memcpy( tmp + 8*4, src + width * 2, 4*4 );
|
||||
- memcpy( tmp + 12*4, src + width * 3, 4*4 );
|
||||
- src += 4;
|
||||
- if( ++i == width/4 )
|
||||
- {
|
||||
- src += width * 3;
|
||||
- i = 0;
|
||||
- }
|
||||
-
|
||||
- bc7enc_compress_block( ptr, rgba, params );
|
||||
- ptr += 2;
|
||||
- }
|
||||
- while( --blocks );
|
||||
-}
|
||||
diff --git a/thirdparty/etcpak/ProcessDxtc.hpp b/thirdparty/etcpak/ProcessDxtc.hpp
|
||||
index 7655bb33be..8145493872 100644
|
||||
--- a/thirdparty/etcpak/ProcessDxtc.hpp
|
||||
+++ b/thirdparty/etcpak/ProcessDxtc.hpp
|
||||
@@ -11,8 +11,4 @@ void CompressBc3( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t wi
|
||||
void CompressBc4( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
void CompressBc5( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width );
|
||||
|
||||
-struct bc7enc_compress_block_params;
|
||||
-
|
||||
-void CompressBc7( const uint32_t* src, uint64_t* dst, uint32_t blocks, size_t width, const bc7enc_compress_block_params* params );
|
||||
-
|
||||
#endif
|
Reference in New Issue
Block a user