initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
429
thirdparty/amd-fsr2/shaders/ffx_common_types.h
vendored
Normal file
429
thirdparty/amd-fsr2/shaders/ffx_common_types.h
vendored
Normal file
@@ -0,0 +1,429 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
#ifndef FFX_COMMON_TYPES_H
|
||||
#define FFX_COMMON_TYPES_H
|
||||
|
||||
#if defined(FFX_CPU)
|
||||
#define FFX_PARAMETER_IN
|
||||
#define FFX_PARAMETER_OUT
|
||||
#define FFX_PARAMETER_INOUT
|
||||
#elif defined(FFX_HLSL)
|
||||
#define FFX_PARAMETER_IN in
|
||||
#define FFX_PARAMETER_OUT out
|
||||
#define FFX_PARAMETER_INOUT inout
|
||||
#elif defined(FFX_GLSL)
|
||||
#define FFX_PARAMETER_IN in
|
||||
#define FFX_PARAMETER_OUT out
|
||||
#define FFX_PARAMETER_INOUT inout
|
||||
#endif // #if defined(FFX_CPU)
|
||||
|
||||
#if defined(FFX_CPU)
|
||||
/// A typedef for a boolean value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef bool FfxBoolean;
|
||||
|
||||
/// A typedef for a unsigned 8bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint8_t FfxUInt8;
|
||||
|
||||
/// A typedef for a unsigned 16bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint16_t FfxUInt16;
|
||||
|
||||
/// A typedef for a unsigned 32bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32;
|
||||
|
||||
/// A typedef for a unsigned 64bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint64_t FfxUInt64;
|
||||
|
||||
/// A typedef for a signed 8bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int8_t FfxInt8;
|
||||
|
||||
/// A typedef for a signed 16bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int16_t FfxInt16;
|
||||
|
||||
/// A typedef for a signed 32bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int32_t FfxInt32;
|
||||
|
||||
/// A typedef for a signed 64bit integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef int64_t FfxInt64;
|
||||
|
||||
/// A typedef for a floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32;
|
||||
|
||||
/// A typedef for a 2-dimensional floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32x2[2];
|
||||
|
||||
/// A typedef for a 3-dimensional floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32x3[3];
|
||||
|
||||
/// A typedef for a 4-dimensional floating point value.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef float FfxFloat32x4[4];
|
||||
|
||||
/// A typedef for a 2-dimensional 32bit unsigned integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32x2[2];
|
||||
|
||||
/// A typedef for a 3-dimensional 32bit unsigned integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32x3[3];
|
||||
|
||||
/// A typedef for a 4-dimensional 32bit unsigned integer.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
typedef uint32_t FfxUInt32x4[4];
|
||||
#endif // #if defined(FFX_CPU)
|
||||
|
||||
#if defined(FFX_HLSL)
|
||||
/// A typedef for a boolean value.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef bool FfxBoolean;
|
||||
|
||||
#if FFX_HLSL_6_2
|
||||
typedef float32_t FfxFloat32;
|
||||
typedef float32_t2 FfxFloat32x2;
|
||||
typedef float32_t3 FfxFloat32x3;
|
||||
typedef float32_t4 FfxFloat32x4;
|
||||
|
||||
/// A typedef for a unsigned 32bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef uint32_t FfxUInt32;
|
||||
typedef uint32_t2 FfxUInt32x2;
|
||||
typedef uint32_t3 FfxUInt32x3;
|
||||
typedef uint32_t4 FfxUInt32x4;
|
||||
typedef int32_t FfxInt32;
|
||||
typedef int32_t2 FfxInt32x2;
|
||||
typedef int32_t3 FfxInt32x3;
|
||||
typedef int32_t4 FfxInt32x4;
|
||||
#else
|
||||
#define FfxFloat32 float
|
||||
#define FfxFloat32x2 float2
|
||||
#define FfxFloat32x3 float3
|
||||
#define FfxFloat32x4 float4
|
||||
|
||||
/// A typedef for a unsigned 32bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef uint FfxUInt32;
|
||||
typedef uint2 FfxUInt32x2;
|
||||
typedef uint3 FfxUInt32x3;
|
||||
typedef uint4 FfxUInt32x4;
|
||||
typedef int FfxInt32;
|
||||
typedef int2 FfxInt32x2;
|
||||
typedef int3 FfxInt32x3;
|
||||
typedef int4 FfxInt32x4;
|
||||
#endif // #if defined(FFX_HLSL_6_2)
|
||||
|
||||
#if FFX_HALF
|
||||
#if FFX_HLSL_6_2
|
||||
typedef float16_t FfxFloat16;
|
||||
typedef float16_t2 FfxFloat16x2;
|
||||
typedef float16_t3 FfxFloat16x3;
|
||||
typedef float16_t4 FfxFloat16x4;
|
||||
|
||||
/// A typedef for an unsigned 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef uint16_t FfxUInt16;
|
||||
typedef uint16_t2 FfxUInt16x2;
|
||||
typedef uint16_t3 FfxUInt16x3;
|
||||
typedef uint16_t4 FfxUInt16x4;
|
||||
|
||||
/// A typedef for a signed 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef int16_t FfxInt16;
|
||||
typedef int16_t2 FfxInt16x2;
|
||||
typedef int16_t3 FfxInt16x3;
|
||||
typedef int16_t4 FfxInt16x4;
|
||||
#else
|
||||
typedef min16float FfxFloat16;
|
||||
typedef min16float2 FfxFloat16x2;
|
||||
typedef min16float3 FfxFloat16x3;
|
||||
typedef min16float4 FfxFloat16x4;
|
||||
|
||||
/// A typedef for an unsigned 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef min16uint FfxUInt16;
|
||||
typedef min16uint2 FfxUInt16x2;
|
||||
typedef min16uint3 FfxUInt16x3;
|
||||
typedef min16uint4 FfxUInt16x4;
|
||||
|
||||
/// A typedef for a signed 16bit integer.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
typedef min16int FfxInt16;
|
||||
typedef min16int2 FfxInt16x2;
|
||||
typedef min16int3 FfxInt16x3;
|
||||
typedef min16int4 FfxInt16x4;
|
||||
#endif // FFX_HLSL_6_2
|
||||
#endif // FFX_HALF
|
||||
#endif // #if defined(FFX_HLSL)
|
||||
|
||||
#if defined(FFX_GLSL)
|
||||
/// A typedef for a boolean value.
|
||||
///
|
||||
/// @ingroup GPU
|
||||
#define FfxBoolean bool
|
||||
#define FfxFloat32 float
|
||||
#define FfxFloat32x2 vec2
|
||||
#define FfxFloat32x3 vec3
|
||||
#define FfxFloat32x4 vec4
|
||||
#define FfxUInt32 uint
|
||||
#define FfxUInt32x2 uvec2
|
||||
#define FfxUInt32x3 uvec3
|
||||
#define FfxUInt32x4 uvec4
|
||||
#define FfxInt32 int
|
||||
#define FfxInt32x2 ivec2
|
||||
#define FfxInt32x3 ivec3
|
||||
#define FfxInt32x4 ivec4
|
||||
#if FFX_HALF
|
||||
#define FfxFloat16 float16_t
|
||||
#define FfxFloat16x2 f16vec2
|
||||
#define FfxFloat16x3 f16vec3
|
||||
#define FfxFloat16x4 f16vec4
|
||||
#define FfxUInt16 uint16_t
|
||||
#define FfxUInt16x2 u16vec2
|
||||
#define FfxUInt16x3 u16vec3
|
||||
#define FfxUInt16x4 u16vec4
|
||||
#define FfxInt16 int16_t
|
||||
#define FfxInt16x2 i16vec2
|
||||
#define FfxInt16x3 i16vec3
|
||||
#define FfxInt16x4 i16vec4
|
||||
#endif // FFX_HALF
|
||||
#endif // #if defined(FFX_GLSL)
|
||||
|
||||
// Global toggles:
|
||||
// #define FFX_HALF (1)
|
||||
// #define FFX_HLSL_6_2 (1)
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#if FFX_HLSL_6_2
|
||||
|
||||
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
|
||||
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType##16_t, COL> TypeName;
|
||||
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
|
||||
|
||||
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
|
||||
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType##16_t, COL> TypeName;
|
||||
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
|
||||
|
||||
#else //FFX_HLSL_6_2
|
||||
|
||||
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName;
|
||||
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<min16##BaseComponentType, COL> TypeName;
|
||||
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<min16##BaseComponentType, ROW, COL> TypeName;
|
||||
|
||||
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType );
|
||||
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL );
|
||||
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
|
||||
|
||||
#endif //FFX_HLSL_6_2
|
||||
|
||||
#else //FFX_HALF
|
||||
|
||||
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
|
||||
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType, COL> TypeName;
|
||||
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
|
||||
|
||||
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
|
||||
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType, COL> TypeName;
|
||||
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
|
||||
|
||||
#endif //FFX_HALF
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
// Common typedefs:
|
||||
#if defined(FFX_HLSL)
|
||||
FFX_MIN16_SCALAR( FFX_MIN16_F , float );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 );
|
||||
|
||||
FFX_MIN16_SCALAR( FFX_MIN16_I, int );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 );
|
||||
|
||||
FFX_MIN16_SCALAR( FFX_MIN16_U, uint );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 );
|
||||
FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 );
|
||||
|
||||
FFX_16BIT_SCALAR( FFX_F16_t , float );
|
||||
FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 );
|
||||
FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 );
|
||||
FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 );
|
||||
|
||||
FFX_16BIT_SCALAR( FFX_I16_t, int );
|
||||
FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 );
|
||||
FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 );
|
||||
FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 );
|
||||
|
||||
FFX_16BIT_SCALAR( FFX_U16_t, uint );
|
||||
FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 );
|
||||
FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 );
|
||||
FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 );
|
||||
|
||||
#define TYPEDEF_MIN16_TYPES(Prefix) \
|
||||
typedef FFX_MIN16_F Prefix##_F; \
|
||||
typedef FFX_MIN16_F2 Prefix##_F2; \
|
||||
typedef FFX_MIN16_F3 Prefix##_F3; \
|
||||
typedef FFX_MIN16_F4 Prefix##_F4; \
|
||||
typedef FFX_MIN16_I Prefix##_I; \
|
||||
typedef FFX_MIN16_I2 Prefix##_I2; \
|
||||
typedef FFX_MIN16_I3 Prefix##_I3; \
|
||||
typedef FFX_MIN16_I4 Prefix##_I4; \
|
||||
typedef FFX_MIN16_U Prefix##_U; \
|
||||
typedef FFX_MIN16_U2 Prefix##_U2; \
|
||||
typedef FFX_MIN16_U3 Prefix##_U3; \
|
||||
typedef FFX_MIN16_U4 Prefix##_U4;
|
||||
|
||||
#define TYPEDEF_16BIT_TYPES(Prefix) \
|
||||
typedef FFX_16BIT_F Prefix##_F; \
|
||||
typedef FFX_16BIT_F2 Prefix##_F2; \
|
||||
typedef FFX_16BIT_F3 Prefix##_F3; \
|
||||
typedef FFX_16BIT_F4 Prefix##_F4; \
|
||||
typedef FFX_16BIT_I Prefix##_I; \
|
||||
typedef FFX_16BIT_I2 Prefix##_I2; \
|
||||
typedef FFX_16BIT_I3 Prefix##_I3; \
|
||||
typedef FFX_16BIT_I4 Prefix##_I4; \
|
||||
typedef FFX_16BIT_U Prefix##_U; \
|
||||
typedef FFX_16BIT_U2 Prefix##_U2; \
|
||||
typedef FFX_16BIT_U3 Prefix##_U3; \
|
||||
typedef FFX_16BIT_U4 Prefix##_U4;
|
||||
|
||||
#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \
|
||||
typedef FfxFloat32 Prefix##_F; \
|
||||
typedef FfxFloat32x2 Prefix##_F2; \
|
||||
typedef FfxFloat32x3 Prefix##_F3; \
|
||||
typedef FfxFloat32x4 Prefix##_F4; \
|
||||
typedef FfxInt32 Prefix##_I; \
|
||||
typedef FfxInt32x2 Prefix##_I2; \
|
||||
typedef FfxInt32x3 Prefix##_I3; \
|
||||
typedef FfxInt32x4 Prefix##_I4; \
|
||||
typedef FfxUInt32 Prefix##_U; \
|
||||
typedef FfxUInt32x2 Prefix##_U2; \
|
||||
typedef FfxUInt32x3 Prefix##_U3; \
|
||||
typedef FfxUInt32x4 Prefix##_U4;
|
||||
#endif // #if defined(FFX_HLSL)
|
||||
|
||||
#if defined(FFX_GLSL)
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#define FFX_MIN16_F float16_t
|
||||
#define FFX_MIN16_F2 f16vec2
|
||||
#define FFX_MIN16_F3 f16vec3
|
||||
#define FFX_MIN16_F4 f16vec4
|
||||
|
||||
#define FFX_MIN16_I int16_t
|
||||
#define FFX_MIN16_I2 i16vec2
|
||||
#define FFX_MIN16_I3 i16vec3
|
||||
#define FFX_MIN16_I4 i16vec4
|
||||
|
||||
#define FFX_MIN16_U uint16_t
|
||||
#define FFX_MIN16_U2 u16vec2
|
||||
#define FFX_MIN16_U3 u16vec3
|
||||
#define FFX_MIN16_U4 u16vec4
|
||||
|
||||
#define FFX_16BIT_F float16_t
|
||||
#define FFX_16BIT_F2 f16vec2
|
||||
#define FFX_16BIT_F3 f16vec3
|
||||
#define FFX_16BIT_F4 f16vec4
|
||||
|
||||
#define FFX_16BIT_I int16_t
|
||||
#define FFX_16BIT_I2 i16vec2
|
||||
#define FFX_16BIT_I3 i16vec3
|
||||
#define FFX_16BIT_I4 i16vec4
|
||||
|
||||
#define FFX_16BIT_U uint16_t
|
||||
#define FFX_16BIT_U2 u16vec2
|
||||
#define FFX_16BIT_U3 u16vec3
|
||||
#define FFX_16BIT_U4 u16vec4
|
||||
|
||||
#else // FFX_HALF
|
||||
|
||||
#define FFX_MIN16_F float
|
||||
#define FFX_MIN16_F2 vec2
|
||||
#define FFX_MIN16_F3 vec3
|
||||
#define FFX_MIN16_F4 vec4
|
||||
|
||||
#define FFX_MIN16_I int
|
||||
#define FFX_MIN16_I2 ivec2
|
||||
#define FFX_MIN16_I3 ivec3
|
||||
#define FFX_MIN16_I4 ivec4
|
||||
|
||||
#define FFX_MIN16_U uint
|
||||
#define FFX_MIN16_U2 uvec2
|
||||
#define FFX_MIN16_U3 uvec3
|
||||
#define FFX_MIN16_U4 uvec4
|
||||
|
||||
#define FFX_16BIT_F float
|
||||
#define FFX_16BIT_F2 vec2
|
||||
#define FFX_16BIT_F3 vec3
|
||||
#define FFX_16BIT_F4 vec4
|
||||
|
||||
#define FFX_16BIT_I int
|
||||
#define FFX_16BIT_I2 ivec2
|
||||
#define FFX_16BIT_I3 ivec3
|
||||
#define FFX_16BIT_I4 ivec4
|
||||
|
||||
#define FFX_16BIT_U uint
|
||||
#define FFX_16BIT_U2 uvec2
|
||||
#define FFX_16BIT_U3 uvec3
|
||||
#define FFX_16BIT_U4 uvec4
|
||||
|
||||
#endif // FFX_HALF
|
||||
|
||||
#endif // #if defined(FFX_GLSL)
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
||||
#endif // #ifndef FFX_COMMON_TYPES_H
|
52
thirdparty/amd-fsr2/shaders/ffx_core.h
vendored
Normal file
52
thirdparty/amd-fsr2/shaders/ffx_core.h
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
/// @defgroup Core
|
||||
/// @defgroup HLSL
|
||||
/// @defgroup GLSL
|
||||
/// @defgroup GPU
|
||||
/// @defgroup CPU
|
||||
/// @defgroup CAS
|
||||
/// @defgroup FSR1
|
||||
|
||||
#if !defined(FFX_CORE_H)
|
||||
#define FFX_CORE_H
|
||||
|
||||
#include "ffx_common_types.h"
|
||||
|
||||
#if defined(FFX_CPU)
|
||||
#include "ffx_core_cpu.h"
|
||||
#endif // #if defined(FFX_CPU)
|
||||
|
||||
#if defined(FFX_GLSL) && defined(FFX_GPU)
|
||||
#include "ffx_core_glsl.h"
|
||||
#endif // #if defined(FFX_GLSL) && defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_HLSL) && defined(FFX_GPU)
|
||||
#include "ffx_core_hlsl.h"
|
||||
#endif // #if defined(FFX_HLSL) && defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#include "ffx_core_gpu_common.h"
|
||||
#include "ffx_core_gpu_common_half.h"
|
||||
#include "ffx_core_portability.h"
|
||||
#endif // #if defined(FFX_GPU)
|
||||
#endif // #if !defined(FFX_CORE_H)
|
332
thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
vendored
Normal file
332
thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
vendored
Normal file
@@ -0,0 +1,332 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
/// A define for a true value in a boolean expression.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
#define FFX_TRUE (1)
|
||||
|
||||
/// A define for a false value in a boolean expression.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
#define FFX_FALSE (0)
|
||||
|
||||
#if !defined(FFX_STATIC)
|
||||
/// A define to abstract declaration of static variables and functions.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
#define FFX_STATIC static
|
||||
#endif // #if !defined(FFX_STATIC)
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
#endif
|
||||
|
||||
/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer.
|
||||
///
|
||||
/// @param [in] x A 32bit floating value.
|
||||
///
|
||||
/// @returns
|
||||
/// An unsigned 32bit integer value containing the bit pattern of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
|
||||
{
|
||||
union
|
||||
{
|
||||
FfxFloat32 f;
|
||||
FfxUInt32 u;
|
||||
} bits;
|
||||
|
||||
bits.f = x;
|
||||
return bits.u;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
|
||||
{
|
||||
return a[0] * b[0] + a[1] * b[1];
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
|
||||
{
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
|
||||
{
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
|
||||
}
|
||||
|
||||
/// Compute the linear interopation between two values.
|
||||
///
|
||||
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
|
||||
/// following math:
|
||||
///
|
||||
/// (1 - t) * x + t * y
|
||||
///
|
||||
/// @param [in] x The first value to lerp between.
|
||||
/// @param [in] y The second value to lerp between.
|
||||
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
|
||||
///
|
||||
/// @returns
|
||||
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
|
||||
{
|
||||
return y * t + (-x * t + x);
|
||||
}
|
||||
|
||||
/// Compute the reciprocal of a value.
|
||||
///
|
||||
/// @param [in] x The value to compute the reciprocal for.
|
||||
///
|
||||
/// @returns
|
||||
/// The reciprocal value of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a)
|
||||
{
|
||||
return 1.0f / a;
|
||||
}
|
||||
|
||||
/// Compute the square root of a value.
|
||||
///
|
||||
/// @param [in] x The first value to compute the min of.
|
||||
///
|
||||
/// @returns
|
||||
/// The the square root of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
|
||||
{
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
|
||||
{
|
||||
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
|
||||
}
|
||||
|
||||
/// Compute the factional part of a decimal value.
|
||||
///
|
||||
/// This function calculates <c><i>x - floor(x)</i></c>.
|
||||
///
|
||||
/// @param [in] x The value to compute the fractional part from.
|
||||
///
|
||||
/// @returns
|
||||
/// The fractional part of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a)
|
||||
{
|
||||
return a - floor(a);
|
||||
}
|
||||
|
||||
/// Compute the reciprocal square root of a value.
|
||||
///
|
||||
/// @param [in] x The value to compute the reciprocal for.
|
||||
///
|
||||
/// @returns
|
||||
/// The reciprocal square root value of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a)
|
||||
{
|
||||
return ffxReciprocal(ffxSqrt(a));
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
|
||||
{
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
|
||||
{
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
/// Clamp a value to a [0..1] range.
|
||||
///
|
||||
/// @param [in] x The value to clamp to [0..1] range.
|
||||
///
|
||||
/// @returns
|
||||
/// The clamped version of <c><i>x</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a)
|
||||
{
|
||||
return ffxMin(1.0f, ffxMax(0.0f, a));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d[0] = a[0] + b;
|
||||
d[1] = a[1] + b;
|
||||
d[2] = a[2] + b;
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d[0] = a[0];
|
||||
d[1] = a[1];
|
||||
d[2] = a[2];
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
|
||||
{
|
||||
d[0] = a[0] * b[0];
|
||||
d[1] = a[1] * b[1];
|
||||
d[2] = a[2] * b[2];
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d[0] = a[0] * b;
|
||||
d[1] = a[1] * b;
|
||||
d[2] = a[2] * b;
|
||||
return;
|
||||
}
|
||||
|
||||
FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d[0] = ffxReciprocal(a[0]);
|
||||
d[1] = ffxReciprocal(a[1]);
|
||||
d[2] = ffxReciprocal(a[2]);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Convert FfxFloat32 to half (in lower 16-bits of output).
|
||||
///
|
||||
/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
|
||||
///
|
||||
/// The function supports denormals.
|
||||
///
|
||||
/// Some conversion rules are to make computations possibly "safer" on the GPU,
|
||||
/// -INF & -NaN -> -65504
|
||||
/// +INF & +NaN -> +65504
|
||||
///
|
||||
/// @param [in] f The 32bit floating point value to convert.
|
||||
///
|
||||
/// @returns
|
||||
/// The closest 16bit floating point value to <c><i>f</i></c>.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
|
||||
{
|
||||
static FfxUInt16 base[512] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400,
|
||||
0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000,
|
||||
0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002,
|
||||
0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
|
||||
0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff
|
||||
};
|
||||
|
||||
static FfxUInt8 shift[512] = {
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
FfxFloat32 f;
|
||||
FfxUInt32 u;
|
||||
} bits;
|
||||
|
||||
bits.f = f;
|
||||
FfxUInt32 u = bits.u;
|
||||
FfxUInt32 i = u >> 23;
|
||||
return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]);
|
||||
}
|
||||
|
||||
/// Pack 2x32-bit floating point values in a single 32bit value.
|
||||
///
|
||||
/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
|
||||
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
|
||||
/// 32bit unsigned integer respectively.
|
||||
///
|
||||
/// @param [in] value A 2-dimensional floating point value to convert and pack.
|
||||
///
|
||||
/// @returns
|
||||
/// A packed 32bit value containing 2 16bit floating point values.
|
||||
///
|
||||
/// @ingroup CPU
|
||||
FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a)
|
||||
{
|
||||
return f32tof16(a[0]) + (f32tof16(a[1]) << 16);
|
||||
}
|
1669
thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
vendored
Normal file
1669
thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2784
thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
vendored
Normal file
2784
thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2978
thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
vendored
Normal file
2978
thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1502
thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
vendored
Normal file
1502
thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
50
thirdparty/amd-fsr2/shaders/ffx_core_portability.h
vendored
Normal file
50
thirdparty/amd-fsr2/shaders/ffx_core_portability.h
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d = a + ffxBroadcast3(b);
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d = a;
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
|
||||
{
|
||||
d = a * b;
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
|
||||
{
|
||||
d = a * ffxBroadcast3(b);
|
||||
return d;
|
||||
}
|
||||
|
||||
FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
|
||||
{
|
||||
d = rcp(a);
|
||||
return d;
|
||||
}
|
1250
thirdparty/amd-fsr2/shaders/ffx_fsr1.h
vendored
Normal file
1250
thirdparty/amd-fsr2/shaders/ffx_fsr1.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
295
thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
vendored
Normal file
295
thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
vendored
Normal file
@@ -0,0 +1,295 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_ACCUMULATE_H
|
||||
#define FFX_FSR2_ACCUMULATE_H
|
||||
|
||||
FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
return length(fMotionVector * DisplaySize());
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector)
|
||||
{
|
||||
return length(fMotionVector * FFX_MIN16_F2(DisplaySize()));
|
||||
}
|
||||
#endif
|
||||
|
||||
void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
|
||||
{
|
||||
// Aviod invalid values when accumulation and upsampled weight is 0
|
||||
fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
//YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
|
||||
fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz)));
|
||||
fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor)));
|
||||
#endif
|
||||
|
||||
const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation;
|
||||
fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha);
|
||||
|
||||
fHistoryColor = YCoCgToRGB(fHistoryColor);
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
fHistoryColor = InverseTonemap(fHistoryColor);
|
||||
#endif
|
||||
}
|
||||
|
||||
void RectifyHistory(
|
||||
const AccumulationPassCommonParams params,
|
||||
RectificationBox clippingBox,
|
||||
FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
|
||||
FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
|
||||
FfxFloat32 fLockContributionThisFrame,
|
||||
FfxFloat32 fTemporalReactiveFactor,
|
||||
FfxFloat32 fLumaInstabilityFactor)
|
||||
{
|
||||
FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f));
|
||||
|
||||
const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
|
||||
const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
|
||||
FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT);
|
||||
|
||||
FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
|
||||
FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
|
||||
FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
|
||||
FfxFloat32x3 boxCenter = clippingBox.boxCenter;
|
||||
FfxFloat32 boxVecSize = length(clippingBox.boxVec);
|
||||
|
||||
boxMin = ffxMax(clippingBox.aabbMin, boxMin);
|
||||
boxMax = ffxMin(clippingBox.aabbMax, boxMax);
|
||||
|
||||
if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) {
|
||||
|
||||
const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax);
|
||||
|
||||
FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
|
||||
|
||||
const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor;
|
||||
const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f);
|
||||
fHistoryContribution *= fReactiveContribution;
|
||||
|
||||
// Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
|
||||
fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution));
|
||||
|
||||
// Scale accumulation using rectification info
|
||||
const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f));
|
||||
fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution));
|
||||
}
|
||||
}
|
||||
|
||||
void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
|
||||
{
|
||||
StoreUpscaledOutput(iPxHrPos, fUpscaledColor);
|
||||
}
|
||||
|
||||
void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight)
|
||||
{
|
||||
// we expect similar motion for next frame
|
||||
// kill lock if that location is outside screen, avoid locks to be clamped to screen borders
|
||||
FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
|
||||
if (IsUvInside(fEstimatedUvNextFrame) == false) {
|
||||
KillLock(fLockStatus);
|
||||
}
|
||||
else {
|
||||
// Decrease lock lifetime
|
||||
const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame);
|
||||
const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax);
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
|
||||
}
|
||||
|
||||
StoreLockStatus(params.iPxHrPos, fLockStatus);
|
||||
}
|
||||
|
||||
|
||||
FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState)
|
||||
{
|
||||
// Always assume max accumulation was reached
|
||||
FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
|
||||
|
||||
fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10)))));
|
||||
|
||||
fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20))));
|
||||
|
||||
return fBaseAccumulation.xxx;
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff)
|
||||
{
|
||||
const FfxFloat32 fUnormThreshold = 1.0f / 255.0f;
|
||||
const FfxInt32 N_MINUS_1 = 0;
|
||||
const FfxInt32 N_MINUS_2 = 1;
|
||||
const FfxInt32 N_MINUS_3 = 2;
|
||||
const FfxInt32 N_MINUS_4 = 3;
|
||||
|
||||
FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x;
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma));
|
||||
#endif
|
||||
|
||||
fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f;
|
||||
|
||||
const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false);
|
||||
FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f);
|
||||
|
||||
FfxFloat32 fLumaInstability = 0.0f;
|
||||
FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]);
|
||||
|
||||
FfxFloat32 fMin = abs(fDiffs0);
|
||||
|
||||
if (fMin >= fUnormThreshold)
|
||||
{
|
||||
for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
|
||||
FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
|
||||
|
||||
if (sign(fDiffs0) == sign(fDiffs1)) {
|
||||
|
||||
// Scale difference to protect historically similar values
|
||||
const FfxFloat32 fMinBias = 1.0f;
|
||||
fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias);
|
||||
}
|
||||
}
|
||||
|
||||
const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
|
||||
const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
|
||||
|
||||
fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
|
||||
fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold);
|
||||
|
||||
fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f));
|
||||
}
|
||||
|
||||
//shift history
|
||||
fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3];
|
||||
fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2];
|
||||
fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1];
|
||||
fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma;
|
||||
|
||||
StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory);
|
||||
|
||||
return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0);
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor)
|
||||
{
|
||||
FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor);
|
||||
|
||||
fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity)));
|
||||
|
||||
fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
|
||||
|
||||
// Force reactive factor for new samples
|
||||
fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
|
||||
|
||||
if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) {
|
||||
fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f;
|
||||
}
|
||||
|
||||
return fNewFactor;
|
||||
}
|
||||
|
||||
AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos)
|
||||
{
|
||||
AccumulationPassCommonParams params;
|
||||
|
||||
params.iPxHrPos = iPxHrPos;
|
||||
const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize();
|
||||
params.fHrUv = fHrUv;
|
||||
|
||||
const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize();
|
||||
params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize());
|
||||
|
||||
params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
|
||||
params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
|
||||
|
||||
ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample);
|
||||
|
||||
params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler));
|
||||
|
||||
const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler);
|
||||
params.fDilatedReactiveFactor = fDilatedReactiveMasks.x;
|
||||
params.fAccumulationMask = fDilatedReactiveMasks.y;
|
||||
params.bIsResetFrame = (0 == FrameIndex());
|
||||
|
||||
params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame);
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
void Accumulate(FfxInt32x2 iPxHrPos)
|
||||
{
|
||||
const AccumulationPassCommonParams params = InitParams(iPxHrPos);
|
||||
|
||||
FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0);
|
||||
FfxFloat32x2 fLockStatus;
|
||||
InitializeNewLockSample(fLockStatus);
|
||||
|
||||
FfxFloat32 fTemporalReactiveFactor = 0.0f;
|
||||
FfxBoolean bInMotionLastFrame = FFX_FALSE;
|
||||
LockState lockState = { FFX_FALSE , FFX_FALSE };
|
||||
if (params.bIsExistingSample && !params.bIsResetFrame) {
|
||||
ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame);
|
||||
lockState = ReprojectHistoryLockStatus(params, fLockStatus);
|
||||
}
|
||||
|
||||
FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
|
||||
|
||||
FfxFloat32 fLuminanceDiff = 0.0f;
|
||||
FfxFloat32 fLockContributionThisFrame = 0.0f;
|
||||
UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff);
|
||||
|
||||
// Load upsampled input color
|
||||
RectificationBox clippingBox;
|
||||
FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
|
||||
|
||||
const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff);
|
||||
|
||||
|
||||
FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
|
||||
|
||||
if (params.bIsNewSample) {
|
||||
fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz);
|
||||
}
|
||||
else {
|
||||
RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor);
|
||||
|
||||
Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight);
|
||||
}
|
||||
|
||||
fHistoryColor = UnprepareRgb(fHistoryColor, Exposure());
|
||||
|
||||
FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w);
|
||||
|
||||
// Get new temporal reactive factor
|
||||
fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor);
|
||||
|
||||
StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor));
|
||||
|
||||
// Output final color when RCAS is disabled
|
||||
#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0
|
||||
WriteUpscaledOutput(iPxHrPos, fHistoryColor);
|
||||
#endif
|
||||
StoreNewLocks(iPxHrPos, 0);
|
||||
}
|
||||
|
||||
#endif // FFX_FSR2_ACCUMULATE_H
|
91
thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
vendored
Normal file
91
thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
// Needed for rw_upscaled_output declaration
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
|
||||
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2
|
||||
#else
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2
|
||||
#endif
|
||||
#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3
|
||||
#define FSR2_BIND_SRV_LOCK_STATUS 4
|
||||
//#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5
|
||||
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6
|
||||
#define FSR2_BIND_SRV_LUMA_INSTABILITY 7
|
||||
#define FSR2_BIND_SRV_LANCZOS_LUT 8
|
||||
#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9
|
||||
#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 10
|
||||
#define FSR2_BIND_SRV_AUTO_EXPOSURE 11
|
||||
#define FSR2_BIND_SRV_LUMA_HISTORY 12
|
||||
|
||||
#define FSR2_BIND_UAV_INTERNAL_UPSCALED 13
|
||||
#define FSR2_BIND_UAV_LOCK_STATUS 14
|
||||
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 15
|
||||
#define FSR2_BIND_UAV_NEW_LOCKS 16
|
||||
#define FSR2_BIND_UAV_LUMA_HISTORY 17
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 18
|
||||
|
||||
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 5
|
||||
#endif
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_upsample.h"
|
||||
#include "ffx_fsr2_postprocess_lock_status.h"
|
||||
#include "ffx_fsr2_reproject.h"
|
||||
#include "ffx_fsr2_accumulate.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
uvec2 uGroupId = gl_WorkGroupID.xy;
|
||||
const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT;
|
||||
uGroupId.y = GroupRows - uGroupId.y - 1;
|
||||
|
||||
uvec2 uDispatchThreadId = uGroupId * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
|
||||
|
||||
Accumulate(ivec2(uDispatchThreadId));
|
||||
}
|
93
thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
vendored
Normal file
93
thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 1
|
||||
#define FSR2_BIND_UAV_AUTOREACTIVE 2
|
||||
#define FSR2_BIND_CB_REACTIVE 3
|
||||
#define FSR2_BIND_CB_FSR2 4
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha;
|
||||
// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha;
|
||||
// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask;
|
||||
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
#if defined(FSR2_BIND_CB_REACTIVE)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
|
||||
{
|
||||
float scale;
|
||||
float threshold;
|
||||
float binaryValue;
|
||||
uint flags;
|
||||
} cbGenerateReactive;
|
||||
#endif
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy;
|
||||
|
||||
FfxFloat32x3 ColorPreAlpha = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb;
|
||||
FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb;
|
||||
|
||||
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0)
|
||||
{
|
||||
ColorPreAlpha = Tonemap(ColorPreAlpha);
|
||||
ColorPostAlpha = Tonemap(ColorPostAlpha);
|
||||
}
|
||||
|
||||
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0)
|
||||
{
|
||||
ColorPreAlpha = InverseTonemap(ColorPreAlpha);
|
||||
ColorPostAlpha = InverseTonemap(ColorPostAlpha);
|
||||
}
|
||||
|
||||
FfxFloat32 out_reactive_value = 0.f;
|
||||
FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha);
|
||||
|
||||
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta);
|
||||
out_reactive_value *= cbGenerateReactive.scale;
|
||||
|
||||
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value;
|
||||
|
||||
imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value));
|
||||
}
|
698
thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
vendored
Normal file
698
thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
vendored
Normal file
@@ -0,0 +1,698 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
#include "ffx_fsr2_resources.h"
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#include "ffx_core.h"
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#ifndef FFX_FSR2_PREFER_WAVE64
|
||||
#define FFX_FSR2_PREFER_WAVE64
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FSR2_BIND_CB_FSR2)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t
|
||||
{
|
||||
FfxInt32x2 iRenderSize;
|
||||
FfxInt32x2 iMaxRenderSize;
|
||||
FfxInt32x2 iDisplaySize;
|
||||
FfxInt32x2 iInputColorResourceDimensions;
|
||||
FfxInt32x2 iLumaMipDimensions;
|
||||
FfxInt32 iLumaMipLevelToUse;
|
||||
FfxInt32 iFrameIndex;
|
||||
|
||||
FfxFloat32x4 fDeviceToViewDepth;
|
||||
FfxFloat32x2 fJitter;
|
||||
FfxFloat32x2 fMotionVectorScale;
|
||||
FfxFloat32x2 fDownscaleFactor;
|
||||
FfxFloat32x2 fMotionVectorJitterCancellation;
|
||||
FfxFloat32 fPreExposure;
|
||||
FfxFloat32 fPreviousFramePreExposure;
|
||||
FfxFloat32 fTanHalfFOV;
|
||||
FfxFloat32 fJitterSequenceLength;
|
||||
FfxFloat32 fDeltaTime;
|
||||
FfxFloat32 fDynamicResChangeFactor;
|
||||
FfxFloat32 fViewSpaceToMetersFactor;
|
||||
|
||||
FfxFloat32 fPad;
|
||||
mat4 mReprojectionMatrix;
|
||||
} cbFSR2;
|
||||
#endif
|
||||
|
||||
FfxInt32x2 RenderSize()
|
||||
{
|
||||
return cbFSR2.iRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 MaxRenderSize()
|
||||
{
|
||||
return cbFSR2.iMaxRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 DisplaySize()
|
||||
{
|
||||
return cbFSR2.iDisplaySize;
|
||||
}
|
||||
|
||||
FfxInt32x2 InputColorResourceDimensions()
|
||||
{
|
||||
return cbFSR2.iInputColorResourceDimensions;
|
||||
}
|
||||
|
||||
FfxInt32x2 LumaMipDimensions()
|
||||
{
|
||||
return cbFSR2.iLumaMipDimensions;
|
||||
}
|
||||
|
||||
FfxInt32 LumaMipLevelToUse()
|
||||
{
|
||||
return cbFSR2.iLumaMipLevelToUse;
|
||||
}
|
||||
|
||||
FfxInt32 FrameIndex()
|
||||
{
|
||||
return cbFSR2.iFrameIndex;
|
||||
}
|
||||
|
||||
FfxFloat32x4 DeviceToViewSpaceTransformFactors()
|
||||
{
|
||||
return cbFSR2.fDeviceToViewDepth;
|
||||
}
|
||||
|
||||
FfxFloat32x2 Jitter()
|
||||
{
|
||||
return cbFSR2.fJitter;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorScale()
|
||||
{
|
||||
return cbFSR2.fMotionVectorScale;
|
||||
}
|
||||
|
||||
FfxFloat32x2 DownscaleFactor()
|
||||
{
|
||||
return cbFSR2.fDownscaleFactor;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorJitterCancellation()
|
||||
{
|
||||
return cbFSR2.fMotionVectorJitterCancellation;
|
||||
}
|
||||
|
||||
FfxFloat32 PreExposure()
|
||||
{
|
||||
return cbFSR2.fPreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 PreviousFramePreExposure()
|
||||
{
|
||||
return cbFSR2.fPreviousFramePreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 TanHalfFoV()
|
||||
{
|
||||
return cbFSR2.fTanHalfFOV;
|
||||
}
|
||||
|
||||
FfxFloat32 JitterSequenceLength()
|
||||
{
|
||||
return cbFSR2.fJitterSequenceLength;
|
||||
}
|
||||
|
||||
FfxFloat32 DeltaTime()
|
||||
{
|
||||
return cbFSR2.fDeltaTime;
|
||||
}
|
||||
|
||||
FfxFloat32 DynamicResChangeFactor()
|
||||
{
|
||||
return cbFSR2.fDynamicResChangeFactor;
|
||||
}
|
||||
|
||||
FfxFloat32 ViewSpaceToMetersFactor()
|
||||
{
|
||||
return cbFSR2.fViewSpaceToMetersFactor;
|
||||
}
|
||||
|
||||
layout (set = 0, binding = 0) uniform sampler s_PointClamp;
|
||||
layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
|
||||
|
||||
// SRVs
|
||||
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors;
|
||||
#endif
|
||||
#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_NEW_LOCKS)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_RCAS_INPUT)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
|
||||
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
|
||||
layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha;
|
||||
#endif
|
||||
|
||||
// UAV
|
||||
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_DEPTH
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LOCK_STATUS
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status;
|
||||
#endif
|
||||
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_NEW_LOCKS
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
|
||||
layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
|
||||
#endif
|
||||
|
||||
#if defined FSR2_BIND_UAV_AUTOREACTIVE
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha;
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
|
||||
layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha;
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
|
||||
FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel)
|
||||
{
|
||||
return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
|
||||
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel)
|
||||
{
|
||||
return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
|
||||
FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_depth, iPxPos, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
|
||||
FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
|
||||
{
|
||||
#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP
|
||||
return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f);
|
||||
#else
|
||||
return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
|
||||
FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
|
||||
FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_color_jittered, iPxPos, 0).rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
|
||||
FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0f).rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
|
||||
FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_prepared_input_color, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
|
||||
FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
|
||||
{
|
||||
FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
|
||||
|
||||
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
|
||||
if (bInvalidMotionVector)
|
||||
{
|
||||
FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
|
||||
FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
|
||||
fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
|
||||
|
||||
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
|
||||
fUvMotionVector -= MotionVectorJitterCancellation();
|
||||
#endif
|
||||
|
||||
return fUvMotionVector;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
|
||||
FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory)
|
||||
{
|
||||
return texelFetch(r_internal_upscaled_color, iPxHistory, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LUMA_HISTORY)
|
||||
void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
|
||||
{
|
||||
imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
|
||||
FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
|
||||
void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory)
|
||||
{
|
||||
imageStore(rw_internal_upscaled_color, iPxHistory, fHistory);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
|
||||
void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
|
||||
{
|
||||
imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT)
|
||||
void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor)
|
||||
{
|
||||
imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
|
||||
FfxFloat32x2 LoadLockStatus(FfxInt32x2 iPxPos)
|
||||
{
|
||||
FfxFloat32x2 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rg;
|
||||
|
||||
return fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_STATUS)
|
||||
void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x2 fLockstatus)
|
||||
{
|
||||
imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
|
||||
FfxFloat32 LoadLockInputLuma(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_lock_input_luma, iPxPos, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
|
||||
void StoreLockInputLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma)
|
||||
{
|
||||
imageStore(rw_lock_input_luma, iPxPos, vec4(fLuma, 0, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_NEW_LOCKS)
|
||||
FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_new_locks, iPxPos, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS)
|
||||
FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return imageLoad(rw_new_locks, iPxPos).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS)
|
||||
void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock)
|
||||
{
|
||||
imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR)
|
||||
void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
|
||||
{
|
||||
imageStore(rw_prepared_input_color, iPxPos, fTonemapped);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
|
||||
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_prepared_input_color, s_LinearClamp), fUV, 0.0f).w;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
|
||||
FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
|
||||
{
|
||||
FfxFloat32x2 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rg;
|
||||
return fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DEPTH)
|
||||
FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_input_depth, iPxInput, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
|
||||
{
|
||||
FfxUInt32 uDepth = floatBitsToUint(fDepth);
|
||||
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth);
|
||||
#else
|
||||
imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
|
||||
void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
|
||||
{
|
||||
imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
|
||||
void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
|
||||
{
|
||||
//FfxUInt32 uDepth = f32tof16(fDepth);
|
||||
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS)
|
||||
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
|
||||
FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
|
||||
FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
|
||||
FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_previous_dilated_motion_vectors, iPxInput, 0).rg;
|
||||
}
|
||||
|
||||
FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_previous_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).xy;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
|
||||
FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
|
||||
{
|
||||
return texelFetch(r_dilatedDepth, iPxInput, 0).r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
|
||||
FfxFloat32 Exposure()
|
||||
{
|
||||
FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
|
||||
FfxFloat32 AutoExposure()
|
||||
{
|
||||
FfxFloat32 exposure = texelFetch(r_auto_exposure, FfxInt32x2(0, 0), 0).x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
|
||||
{
|
||||
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
|
||||
return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x;
|
||||
#else
|
||||
return 0.f;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
|
||||
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
|
||||
{
|
||||
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
|
||||
return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
|
||||
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
|
||||
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS)
|
||||
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
|
||||
{
|
||||
imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
|
||||
{
|
||||
return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
|
||||
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_opaque_only, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
|
||||
FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
|
||||
FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_AUTOREACTIVE)
|
||||
#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION)
|
||||
void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
|
||||
{
|
||||
imageStore(rw_output_autoreactive, iPxPos, vec4(FfxFloat32(fReactive.x), 0.0f, 0.0f, 0.0f));
|
||||
|
||||
imageStore(rw_output_autocomposition, iPxPos, vec4(FfxFloat32(fReactive.y), 0.0f, 0.0f, 0.0f));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR)
|
||||
void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
imageStore(rw_output_prev_color_pre_alpha, iPxPos, vec4(color, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR)
|
||||
void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
imageStore(rw_output_prev_color_post_alpha, iPxPos, vec4(color, 0.0f));
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
799
thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
vendored
Normal file
799
thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
vendored
Normal file
@@ -0,0 +1,799 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "ffx_fsr2_resources.h"
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#ifdef __hlsl_dx_compiler
|
||||
#pragma dxc diagnostic push
|
||||
#pragma dxc diagnostic ignored "-Wambig-lit-shift"
|
||||
#endif //__hlsl_dx_compiler
|
||||
#include "ffx_core.h"
|
||||
#ifdef __hlsl_dx_compiler
|
||||
#pragma dxc diagnostic pop
|
||||
#endif //__hlsl_dx_compiler
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#ifndef FFX_FSR2_PREFER_WAVE64
|
||||
#define FFX_FSR2_PREFER_WAVE64
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#pragma warning(disable: 3205) // conversion from larger type to smaller
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
|
||||
#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
|
||||
#define DECLARE_CB_REGISTER(regIndex) b##regIndex
|
||||
#define FFX_FSR2_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex))
|
||||
#define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
|
||||
#define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
|
||||
|
||||
#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL)
|
||||
cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2)
|
||||
{
|
||||
FfxInt32x2 iRenderSize;
|
||||
FfxInt32x2 iMaxRenderSize;
|
||||
FfxInt32x2 iDisplaySize;
|
||||
FfxInt32x2 iInputColorResourceDimensions;
|
||||
FfxInt32x2 iLumaMipDimensions;
|
||||
FfxInt32 iLumaMipLevelToUse;
|
||||
FfxInt32 iFrameIndex;
|
||||
|
||||
FfxFloat32x4 fDeviceToViewDepth;
|
||||
FfxFloat32x2 fJitter;
|
||||
FfxFloat32x2 fMotionVectorScale;
|
||||
FfxFloat32x2 fDownscaleFactor;
|
||||
FfxFloat32x2 fMotionVectorJitterCancellation;
|
||||
FfxFloat32 fPreExposure;
|
||||
FfxFloat32 fPreviousFramePreExposure;
|
||||
FfxFloat32 fTanHalfFOV;
|
||||
FfxFloat32 fJitterSequenceLength;
|
||||
FfxFloat32 fDeltaTime;
|
||||
FfxFloat32 fDynamicResChangeFactor;
|
||||
FfxFloat32 fViewSpaceToMetersFactor;
|
||||
};
|
||||
|
||||
#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size.
|
||||
#endif
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p)
|
||||
#define FFX_FSR2_ROOTSIG_STR(p) #p
|
||||
#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
|
||||
"StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
|
||||
"StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
|
||||
|
||||
#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
|
||||
|
||||
#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
|
||||
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
|
||||
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \
|
||||
"StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
|
||||
"StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
|
||||
"addressU = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressV = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"addressW = TEXTURE_ADDRESS_CLAMP, " \
|
||||
"comparisonFunc = COMPARISON_NEVER, " \
|
||||
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
|
||||
#if defined(FFX_FSR2_EMBED_ROOTSIG)
|
||||
#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG
|
||||
#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG
|
||||
#else
|
||||
#define FFX_FSR2_EMBED_ROOTSIG_CONTENT
|
||||
#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT
|
||||
#endif // #if FFX_FSR2_EMBED_ROOTSIG
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
/* Define getter functions in the order they are defined in the CB! */
|
||||
FfxInt32x2 RenderSize()
|
||||
{
|
||||
return iRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 MaxRenderSize()
|
||||
{
|
||||
return iMaxRenderSize;
|
||||
}
|
||||
|
||||
FfxInt32x2 DisplaySize()
|
||||
{
|
||||
return iDisplaySize;
|
||||
}
|
||||
|
||||
FfxInt32x2 InputColorResourceDimensions()
|
||||
{
|
||||
return iInputColorResourceDimensions;
|
||||
}
|
||||
|
||||
FfxInt32x2 LumaMipDimensions()
|
||||
{
|
||||
return iLumaMipDimensions;
|
||||
}
|
||||
|
||||
FfxInt32 LumaMipLevelToUse()
|
||||
{
|
||||
return iLumaMipLevelToUse;
|
||||
}
|
||||
|
||||
FfxInt32 FrameIndex()
|
||||
{
|
||||
return iFrameIndex;
|
||||
}
|
||||
|
||||
FfxFloat32x2 Jitter()
|
||||
{
|
||||
return fJitter;
|
||||
}
|
||||
|
||||
FfxFloat32x4 DeviceToViewSpaceTransformFactors()
|
||||
{
|
||||
return fDeviceToViewDepth;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorScale()
|
||||
{
|
||||
return fMotionVectorScale;
|
||||
}
|
||||
|
||||
FfxFloat32x2 DownscaleFactor()
|
||||
{
|
||||
return fDownscaleFactor;
|
||||
}
|
||||
|
||||
FfxFloat32x2 MotionVectorJitterCancellation()
|
||||
{
|
||||
return fMotionVectorJitterCancellation;
|
||||
}
|
||||
|
||||
FfxFloat32 PreExposure()
|
||||
{
|
||||
return fPreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 PreviousFramePreExposure()
|
||||
{
|
||||
return fPreviousFramePreExposure;
|
||||
}
|
||||
|
||||
FfxFloat32 TanHalfFoV()
|
||||
{
|
||||
return fTanHalfFOV;
|
||||
}
|
||||
|
||||
FfxFloat32 JitterSequenceLength()
|
||||
{
|
||||
return fJitterSequenceLength;
|
||||
}
|
||||
|
||||
FfxFloat32 DeltaTime()
|
||||
{
|
||||
return fDeltaTime;
|
||||
}
|
||||
|
||||
FfxFloat32 DynamicResChangeFactor()
|
||||
{
|
||||
return fDynamicResChangeFactor;
|
||||
}
|
||||
|
||||
FfxFloat32 ViewSpaceToMetersFactor()
|
||||
{
|
||||
return fViewSpaceToMetersFactor;
|
||||
}
|
||||
|
||||
|
||||
SamplerState s_PointClamp : register(s0);
|
||||
SamplerState s_LinearClamp : register(s1);
|
||||
|
||||
// SRVs
|
||||
#if defined(FFX_INTERNAL)
|
||||
Texture2D<FfxFloat32x4> r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY);
|
||||
Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
|
||||
Texture2D<FfxFloat32x4> r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
|
||||
Texture2D<FfxFloat32> r_input_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
|
||||
Texture2D<FfxFloat32x2> r_input_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
|
||||
Texture2D<FfxFloat32x2> r_auto_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
|
||||
Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
|
||||
Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
|
||||
Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
|
||||
Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
|
||||
Texture2D<FfxFloat32x2> r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS);
|
||||
Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
|
||||
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
||||
Texture2D<unorm FfxFloat32x2> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
||||
Texture2D<FfxFloat32> r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
|
||||
Texture2D<unorm FfxFloat32> r_new_locks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
|
||||
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
||||
Texture2D<FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
||||
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
|
||||
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
|
||||
Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE);
|
||||
Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
|
||||
Texture2D<unorm FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
|
||||
Texture2D<float3> r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
|
||||
Texture2D<float3> r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
|
||||
|
||||
Texture2D<FfxFloat32x4> r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
|
||||
|
||||
// UAV declarations
|
||||
RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
|
||||
RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
|
||||
RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
|
||||
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
||||
RWTexture2D<FfxFloat32> rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
|
||||
RWTexture2D<unorm FfxFloat32> rw_new_locks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
|
||||
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
||||
RWTexture2D<FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
||||
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
|
||||
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE);
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5);
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
|
||||
RWTexture2D<FfxFloat32x2> rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
|
||||
globallycoherent RWTexture2D<FfxUInt32> rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT);
|
||||
RWTexture2D<FfxFloat32x4> rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
|
||||
|
||||
RWTexture2D<float> rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE);
|
||||
RWTexture2D<float> rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION);
|
||||
RWTexture2D<float3> rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
|
||||
RWTexture2D<float3> rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
|
||||
|
||||
#else // #if defined(FFX_INTERNAL)
|
||||
#if defined FSR2_BIND_SRV_INPUT_COLOR
|
||||
Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY
|
||||
Texture2D<FfxFloat32x4> r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS
|
||||
Texture2D<FfxFloat32x4> r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_DEPTH
|
||||
Texture2D<FfxFloat32> r_input_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INPUT_EXPOSURE
|
||||
Texture2D<FfxFloat32x2> r_input_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_AUTO_EXPOSURE
|
||||
Texture2D<FfxFloat32x2> r_auto_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_REACTIVE_MASK
|
||||
Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK
|
||||
Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH
|
||||
Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS
|
||||
Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS
|
||||
Texture2D<FfxFloat32x2> r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_DILATED_DEPTH
|
||||
Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_INTERNAL_UPSCALED
|
||||
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LOCK_STATUS
|
||||
Texture2D<unorm FfxFloat32x2> r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA
|
||||
Texture2D<FfxFloat32> r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_NEW_LOCKS
|
||||
Texture2D<unorm FfxFloat32> r_new_locks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
|
||||
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LUMA_HISTORY
|
||||
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_RCAS_INPUT
|
||||
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_LANCZOS_LUT
|
||||
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS
|
||||
Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT
|
||||
Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS
|
||||
Texture2D<unorm FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS);
|
||||
#endif
|
||||
|
||||
#if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR
|
||||
Texture2D<float3> r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR
|
||||
Texture2D<float3> r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
|
||||
#endif
|
||||
|
||||
// UAV declarations
|
||||
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
|
||||
RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
|
||||
RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_DEPTH
|
||||
RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
|
||||
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LOCK_STATUS
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA
|
||||
RWTexture2D<FfxFloat32> rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_NEW_LOCKS
|
||||
RWTexture2D<unorm FfxFloat32> rw_new_locks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
||||
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
||||
RWTexture2D<FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
|
||||
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
|
||||
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
|
||||
RWTexture2D<unorm FfxFloat32x2> rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_EXPOSURE
|
||||
RWTexture2D<FfxFloat32x2> rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
|
||||
RWTexture2D<FfxFloat32x2> rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
|
||||
globallycoherent RWTexture2D<FfxUInt32> rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC);
|
||||
#endif
|
||||
|
||||
#if defined FSR2_BIND_UAV_AUTOREACTIVE
|
||||
RWTexture2D<float> rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
|
||||
RWTexture2D<float> rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
|
||||
RWTexture2D<float3> rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR);
|
||||
#endif
|
||||
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
|
||||
RWTexture2D<float3> rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR);
|
||||
#endif
|
||||
#endif // #if defined(FFX_INTERNAL)
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
|
||||
{
|
||||
return r_imgMips.mips[mipLevel][iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
|
||||
{
|
||||
return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_input_depth[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_reactive_mask[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_transparency_and_composition_mask[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_input_color_jittered[iPxPos].rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_prepared_input_color[iPxPos].xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
|
||||
{
|
||||
FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
|
||||
|
||||
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
|
||||
|
||||
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
|
||||
fUvMotionVector -= MotionVectorJitterCancellation();
|
||||
#endif
|
||||
|
||||
return fUvMotionVector;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
|
||||
{
|
||||
return r_internal_upscaled_color[iPxHistory];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL)
|
||||
void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
|
||||
{
|
||||
rw_luma_history[iPxPos] = fLumaHistory;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FFX_INTERNAL)
|
||||
FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
|
||||
void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
|
||||
{
|
||||
rw_internal_upscaled_color[iPxHistory] = fHistory;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
|
||||
void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
|
||||
{
|
||||
rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL)
|
||||
void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
|
||||
{
|
||||
rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
|
||||
}
|
||||
#endif
|
||||
|
||||
//LOCK_LIFETIME_REMAINING == 0
|
||||
//Should make LockInitialLifetime() return a const 1.0f later
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_lock_status[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL)
|
||||
void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
|
||||
{
|
||||
rw_lock_status[iPxPos] = fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_lock_input_luma[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
|
||||
void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma)
|
||||
{
|
||||
rw_lock_input_luma[iPxPos] = fLuma;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_new_locks[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return rw_new_locks[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
|
||||
void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
|
||||
{
|
||||
rw_new_locks[iPxPos] = newLock;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
|
||||
{
|
||||
rw_prepared_input_color[iPxPos] = fTonemapped;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
|
||||
{
|
||||
FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
|
||||
return fLockStatus;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
|
||||
void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
|
||||
{
|
||||
FfxUInt32 uDepth = asuint(fDepth);
|
||||
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth);
|
||||
#else
|
||||
InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
|
||||
void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
|
||||
{
|
||||
rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL)
|
||||
void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
|
||||
{
|
||||
rw_dilatedDepth[iPxPos] = fDepth;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
rw_dilated_motion_vectors[iPxPos] = fMotionVector;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
|
||||
{
|
||||
return r_dilated_motion_vectors[iPxInput].xy;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
|
||||
{
|
||||
return r_previous_dilated_motion_vectors[iPxInput].xy;
|
||||
}
|
||||
|
||||
FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
|
||||
{
|
||||
return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
|
||||
{
|
||||
return r_dilatedDepth[iPxInput];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 Exposure()
|
||||
{
|
||||
FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 AutoExposure()
|
||||
{
|
||||
FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
|
||||
|
||||
if (exposure == 0.0f) {
|
||||
exposure = 1.0f;
|
||||
}
|
||||
|
||||
return exposure;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
|
||||
{
|
||||
#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL)
|
||||
return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
|
||||
#else
|
||||
return 0.f;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL)
|
||||
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
|
||||
{
|
||||
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
|
||||
return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
|
||||
{
|
||||
return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos)
|
||||
{
|
||||
return r_dilated_reactive_masks[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
|
||||
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
|
||||
{
|
||||
rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return r_input_opaque_only[iPxPos].xyz;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return r_input_prev_color_pre_alpha[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
|
||||
{
|
||||
return r_input_prev_color_post_alpha[iPxPos];
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL)
|
||||
#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL)
|
||||
void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
|
||||
{
|
||||
rw_output_autoreactive[iPxPos] = fReactive.x;
|
||||
|
||||
rw_output_autocomposition[iPxPos] = fReactive.y;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
rw_output_prev_color_pre_alpha[iPxPos] = color;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
|
||||
void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
|
||||
{
|
||||
rw_output_prev_color_post_alpha[iPxPos] = color;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
565
thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
vendored
Normal file
565
thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
vendored
Normal file
@@ -0,0 +1,565 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#if !defined(FFX_FSR2_COMMON_H)
|
||||
#define FFX_FSR2_COMMON_H
|
||||
|
||||
#if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
//Locks
|
||||
#define LOCK_LIFETIME_REMAINING 0
|
||||
#define LOCK_TEMPORAL_LUMA 1
|
||||
#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
|
||||
#if defined(FFX_GPU)
|
||||
FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX;
|
||||
FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f;
|
||||
FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f;
|
||||
|
||||
// treat vector truncation warnings as errors
|
||||
#pragma warning(error: 3206)
|
||||
|
||||
// suppress warnings
|
||||
#pragma warning(disable: 3205) // conversion from larger type to smaller
|
||||
#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative
|
||||
|
||||
// Reconstructed depth usage
|
||||
FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f;
|
||||
|
||||
// Accumulation
|
||||
FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f;
|
||||
FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f;
|
||||
FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples
|
||||
FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale;
|
||||
|
||||
// Auto exposure
|
||||
FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f;
|
||||
|
||||
struct AccumulationPassCommonParams
|
||||
{
|
||||
FfxInt32x2 iPxHrPos;
|
||||
FfxFloat32x2 fHrUv;
|
||||
FfxFloat32x2 fLrUv_HwSampler;
|
||||
FfxFloat32x2 fMotionVector;
|
||||
FfxFloat32x2 fReprojectedHrUv;
|
||||
FfxFloat32 fHrVelocity;
|
||||
FfxFloat32 fDepthClipFactor;
|
||||
FfxFloat32 fDilatedReactiveFactor;
|
||||
FfxFloat32 fAccumulationMask;
|
||||
|
||||
FfxBoolean bIsResetFrame;
|
||||
FfxBoolean bIsExistingSample;
|
||||
FfxBoolean bIsNewSample;
|
||||
};
|
||||
|
||||
struct LockState
|
||||
{
|
||||
FfxBoolean NewLock; //Set for both unique new and re-locked new
|
||||
FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock)
|
||||
};
|
||||
|
||||
void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus)
|
||||
{
|
||||
fLockStatus = FfxFloat32x2(0, 0);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus)
|
||||
{
|
||||
fLockStatus = FFX_MIN16_F2(0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus)
|
||||
{
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = 0;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus)
|
||||
{
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct RectificationBox
|
||||
{
|
||||
FfxFloat32x3 boxCenter;
|
||||
FfxFloat32x3 boxVec;
|
||||
FfxFloat32x3 aabbMin;
|
||||
FfxFloat32x3 aabbMax;
|
||||
FfxFloat32 fBoxCenterWeight;
|
||||
};
|
||||
#if FFX_HALF
|
||||
struct RectificationBoxMin16
|
||||
{
|
||||
FFX_MIN16_F3 boxCenter;
|
||||
FFX_MIN16_F3 boxVec;
|
||||
FFX_MIN16_F3 aabbMin;
|
||||
FFX_MIN16_F3 aabbMax;
|
||||
FFX_MIN16_F fBoxCenterWeight;
|
||||
};
|
||||
#endif
|
||||
|
||||
void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = FfxFloat32(0);
|
||||
|
||||
rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0);
|
||||
rectificationBox.boxVec = FfxFloat32x3(0, 0, 0);
|
||||
rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
|
||||
rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
|
||||
}
|
||||
#if FFX_HALF
|
||||
void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0);
|
||||
|
||||
rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0);
|
||||
rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0);
|
||||
rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
|
||||
rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
|
||||
}
|
||||
#endif
|
||||
|
||||
void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
|
||||
{
|
||||
rectificationBox.aabbMin = colorSample;
|
||||
rectificationBox.aabbMax = colorSample;
|
||||
|
||||
FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter = weightedSample;
|
||||
rectificationBox.boxVec = colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight = fSampleWeight;
|
||||
}
|
||||
|
||||
void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
|
||||
{
|
||||
if (bInitialSample) {
|
||||
RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
|
||||
} else {
|
||||
rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
|
||||
rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
|
||||
|
||||
FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter += weightedSample;
|
||||
rectificationBox.boxVec += colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight += fSampleWeight;
|
||||
}
|
||||
}
|
||||
#if FFX_HALF
|
||||
void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
|
||||
{
|
||||
rectificationBox.aabbMin = colorSample;
|
||||
rectificationBox.aabbMax = colorSample;
|
||||
|
||||
FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter = weightedSample;
|
||||
rectificationBox.boxVec = colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight = fSampleWeight;
|
||||
}
|
||||
|
||||
void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
|
||||
{
|
||||
if (bInitialSample) {
|
||||
RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
|
||||
} else {
|
||||
rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
|
||||
rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
|
||||
|
||||
FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
|
||||
rectificationBox.boxCenter += weightedSample;
|
||||
rectificationBox.boxVec += colorSample * weightedSample;
|
||||
rectificationBox.fBoxCenterWeight += fSampleWeight;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f));
|
||||
rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
|
||||
rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
|
||||
FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
|
||||
rectificationBox.boxVec = stdDev;
|
||||
}
|
||||
#if FFX_HALF
|
||||
void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
|
||||
{
|
||||
rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f));
|
||||
rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
|
||||
rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
|
||||
FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
|
||||
rectificationBox.boxVec = stdDev;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 SafeRcp3(FfxFloat32x3 v)
|
||||
{
|
||||
return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0);
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v)
|
||||
{
|
||||
return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
|
||||
{
|
||||
const FfxFloat32 m = ffxMax(v0, v1);
|
||||
return m != 0 ? ffxMin(v0, v1) / m : 0;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1)
|
||||
{
|
||||
const FFX_MIN16_F m = ffxMax(v0, v1);
|
||||
return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg)
|
||||
{
|
||||
FfxFloat32x3 fRgb;
|
||||
|
||||
fRgb = FfxFloat32x3(
|
||||
fYCoCg.x + fYCoCg.y - fYCoCg.z,
|
||||
fYCoCg.x + fYCoCg.z,
|
||||
fYCoCg.x - fYCoCg.y - fYCoCg.z);
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg)
|
||||
{
|
||||
FFX_MIN16_F3 fRgb;
|
||||
|
||||
fRgb = FFX_MIN16_F3(
|
||||
fYCoCg.x + fYCoCg.y - fYCoCg.z,
|
||||
fYCoCg.x + fYCoCg.z,
|
||||
fYCoCg.x - fYCoCg.y - fYCoCg.z);
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb)
|
||||
{
|
||||
FfxFloat32x3 fYCoCg;
|
||||
|
||||
fYCoCg = FfxFloat32x3(
|
||||
0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b,
|
||||
0.5f * fRgb.r - 0.5f * fRgb.b,
|
||||
-0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b);
|
||||
|
||||
return fYCoCg;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb)
|
||||
{
|
||||
FFX_MIN16_F3 fYCoCg;
|
||||
|
||||
fYCoCg = FFX_MIN16_F3(
|
||||
0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b,
|
||||
0.5 * fRgb.r - 0.5 * fRgb.b,
|
||||
-0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b);
|
||||
|
||||
return fYCoCg;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
|
||||
{
|
||||
return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb)
|
||||
{
|
||||
return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f));
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb)
|
||||
{
|
||||
FfxFloat32 fLuminance = RGBToLuma(fLinearRgb);
|
||||
|
||||
FfxFloat32 fPercievedLuminance = 0;
|
||||
if (fLuminance <= 216.0f / 24389.0f) {
|
||||
fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
|
||||
}
|
||||
else {
|
||||
fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
|
||||
}
|
||||
|
||||
return fPercievedLuminance * 0.01f;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb)
|
||||
{
|
||||
FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb);
|
||||
|
||||
FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0);
|
||||
if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) {
|
||||
fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f);
|
||||
}
|
||||
else {
|
||||
fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f);
|
||||
}
|
||||
|
||||
return fPercievedLuminance * FFX_MIN16_F(0.01f);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
|
||||
{
|
||||
return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
|
||||
}
|
||||
|
||||
FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
|
||||
{
|
||||
return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb)
|
||||
{
|
||||
return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx;
|
||||
}
|
||||
|
||||
FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb)
|
||||
{
|
||||
return fRgb / ffxMax(FFX_MIN16_F(FSR2_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
|
||||
{
|
||||
FfxInt32x2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
|
||||
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
|
||||
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
|
||||
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
|
||||
return result;
|
||||
|
||||
// return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1));
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
|
||||
{
|
||||
FFX_MIN16_I2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
|
||||
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
|
||||
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
|
||||
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
|
||||
return result;
|
||||
|
||||
// return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1));
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize)
|
||||
{
|
||||
const FfxFloat32x2 fSampleLocation = fUv * iTextureSize;
|
||||
const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f)));
|
||||
const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize);
|
||||
|
||||
return fClampedUv;
|
||||
}
|
||||
|
||||
FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
|
||||
{
|
||||
return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
|
||||
}
|
||||
#if FFX_HALF
|
||||
FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size)
|
||||
{
|
||||
return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size)));
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg)
|
||||
{
|
||||
Lavg = exp(Lavg);
|
||||
|
||||
const FfxFloat32 S = 100.0f; //ISO arithmetic speed
|
||||
const FfxFloat32 K = 12.5f;
|
||||
FfxFloat32 ExposureISO100 = log2((Lavg * S) / K);
|
||||
|
||||
const FfxFloat32 q = 0.65f;
|
||||
FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100);
|
||||
|
||||
return 1 / Lmax;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg)
|
||||
{
|
||||
Lavg = exp(Lavg);
|
||||
|
||||
const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed
|
||||
const FFX_MIN16_F K = FFX_MIN16_F(12.5f);
|
||||
const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K);
|
||||
|
||||
const FFX_MIN16_F q = FFX_MIN16_F(0.65f);
|
||||
const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100);
|
||||
|
||||
return FFX_MIN16_F(1) / Lmax;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
|
||||
FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
|
||||
FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
|
||||
return iPxHrPos;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos)
|
||||
{
|
||||
FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter());
|
||||
FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize());
|
||||
FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr));
|
||||
return iPxHrPos;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
|
||||
{
|
||||
return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
|
||||
}
|
||||
|
||||
FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
|
||||
|
||||
// fDeviceToViewDepth details found in ffx_fsr2.cpp
|
||||
return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
|
||||
}
|
||||
|
||||
FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor();
|
||||
}
|
||||
|
||||
FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
|
||||
|
||||
const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth);
|
||||
|
||||
const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
|
||||
const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
|
||||
const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
|
||||
|
||||
return FfxFloat32x3(X, Y, Z);
|
||||
}
|
||||
|
||||
FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
|
||||
{
|
||||
return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor();
|
||||
}
|
||||
|
||||
FfxFloat32 GetMaxDistanceInMeters()
|
||||
{
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor();
|
||||
#else
|
||||
return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor();
|
||||
#endif
|
||||
}
|
||||
|
||||
FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
|
||||
{
|
||||
fRgb /= fPreExposure;
|
||||
fRgb *= fExposure;
|
||||
|
||||
fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX);
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
|
||||
FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
|
||||
{
|
||||
fRgb /= fExposure;
|
||||
fRgb *= PreExposure();
|
||||
|
||||
return fRgb;
|
||||
}
|
||||
|
||||
|
||||
struct BilinearSamplingData
|
||||
{
|
||||
FfxInt32x2 iOffsets[4];
|
||||
FfxFloat32 fWeights[4];
|
||||
FfxInt32x2 iBasePos;
|
||||
};
|
||||
|
||||
BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
|
||||
{
|
||||
BilinearSamplingData data;
|
||||
|
||||
FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
|
||||
data.iBasePos = FfxInt32x2(floor(fPxSample));
|
||||
FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
|
||||
|
||||
data.iOffsets[0] = FfxInt32x2(0, 0);
|
||||
data.iOffsets[1] = FfxInt32x2(1, 0);
|
||||
data.iOffsets[2] = FfxInt32x2(0, 1);
|
||||
data.iOffsets[3] = FfxInt32x2(1, 1);
|
||||
|
||||
data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
|
||||
data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
|
||||
data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
|
||||
data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
struct PlaneData
|
||||
{
|
||||
FfxFloat32x3 fNormal;
|
||||
FfxFloat32 fDistanceFromOrigin;
|
||||
};
|
||||
|
||||
PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2)
|
||||
{
|
||||
PlaneData plane;
|
||||
|
||||
FfxFloat32x3 v0 = fP0 - fP1;
|
||||
FfxFloat32x3 v1 = fP0 - fP2;
|
||||
plane.fNormal = normalize(cross(v0, v1));
|
||||
plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal);
|
||||
|
||||
return plane;
|
||||
}
|
||||
|
||||
FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint)
|
||||
{
|
||||
return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin);
|
||||
}
|
||||
|
||||
#endif // #if defined(FFX_GPU)
|
||||
|
||||
#endif //!defined(FFX_FSR2_COMMON_H)
|
189
thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
vendored
Normal file
189
thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
FFX_GROUPSHARED FfxUInt32 spdCounter;
|
||||
|
||||
#ifndef SPD_PACKED_ONLY
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
|
||||
FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
|
||||
|
||||
FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
|
||||
fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
|
||||
FfxFloat32x3 fRgb = SampleInputColor(fUv);
|
||||
|
||||
fRgb /= PreExposure();
|
||||
|
||||
//compute log luma
|
||||
const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));
|
||||
|
||||
// Make sure out of screen pixels contribute no value to the end result
|
||||
const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
|
||||
|
||||
return FfxFloat32x4(result, 0, 0, 0);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
|
||||
{
|
||||
return SPD_LoadMipmap5(tex);
|
||||
}
|
||||
|
||||
void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
|
||||
{
|
||||
if (index == LumaMipLevelToUse() || index == 5)
|
||||
{
|
||||
SPD_SetMipmap(pix, index, outValue.r);
|
||||
}
|
||||
|
||||
if (index == MipCount() - 1) { //accumulate on 1x1 level
|
||||
|
||||
if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))
|
||||
{
|
||||
FfxFloat32 prev = SPD_LoadExposureBuffer().y;
|
||||
FfxFloat32 result = outValue.r;
|
||||
|
||||
if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
|
||||
{
|
||||
FfxFloat32 rate = 1.0f;
|
||||
result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
|
||||
}
|
||||
FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
|
||||
SPD_SetExposureBuffer(spdOutput);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
SPD_IncreaseAtomicCounter(spdCounter);
|
||||
}
|
||||
|
||||
FfxUInt32 SpdGetAtomicCounter()
|
||||
{
|
||||
return spdCounter;
|
||||
}
|
||||
|
||||
void SpdResetAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
SPD_ResetAtomicCounter();
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return FfxFloat32x4(
|
||||
spdIntermediateR[x][y],
|
||||
spdIntermediateG[x][y],
|
||||
spdIntermediateB[x][y],
|
||||
spdIntermediateA[x][y]);
|
||||
}
|
||||
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
|
||||
{
|
||||
spdIntermediateR[x][y] = value.x;
|
||||
spdIntermediateG[x][y] = value.y;
|
||||
spdIntermediateB[x][y] = value.z;
|
||||
spdIntermediateA[x][y] = value.w;
|
||||
}
|
||||
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
|
||||
{
|
||||
return (v0 + v1 + v2 + v3) * 0.25f;
|
||||
}
|
||||
#endif
|
||||
|
||||
// define fetch and store functions Packed
|
||||
#if FFX_HALF
|
||||
#error Callback must be implemented
|
||||
|
||||
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
|
||||
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
|
||||
|
||||
FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);
|
||||
}
|
||||
FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);
|
||||
}
|
||||
void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
if (index == LumaMipLevelToUse() || index == 5)
|
||||
{
|
||||
imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
|
||||
return;
|
||||
}
|
||||
imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
|
||||
}
|
||||
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);
|
||||
}
|
||||
FfxUInt32 SpdGetAtomicCounter()
|
||||
{
|
||||
return spdCounter;
|
||||
}
|
||||
void SpdResetAtomicCounter(FfxUInt32 slice)
|
||||
{
|
||||
rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;
|
||||
}
|
||||
FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return FfxFloat16x4(
|
||||
spdIntermediateRG[x][y].x,
|
||||
spdIntermediateRG[x][y].y,
|
||||
spdIntermediateBA[x][y].x,
|
||||
spdIntermediateBA[x][y].y);
|
||||
}
|
||||
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
|
||||
{
|
||||
spdIntermediateRG[x][y] = value.xy;
|
||||
spdIntermediateBA[x][y] = value.zw;
|
||||
}
|
||||
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
|
||||
{
|
||||
return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "ffx_spd.h"
|
||||
|
||||
void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
|
||||
{
|
||||
#if FFX_HALF
|
||||
SpdDownsampleH(
|
||||
FfxUInt32x2(WorkGroupId.xy),
|
||||
FfxUInt32(LocalThreadIndex),
|
||||
FfxUInt32(MipCount()),
|
||||
FfxUInt32(NumWorkGroups()),
|
||||
FfxUInt32(WorkGroupId.z),
|
||||
FfxUInt32x2(WorkGroupOffset()));
|
||||
#else
|
||||
SpdDownsample(
|
||||
FfxUInt32x2(WorkGroupId.xy),
|
||||
FfxUInt32(LocalThreadIndex),
|
||||
FfxUInt32(MipCount()),
|
||||
FfxUInt32(NumWorkGroups()),
|
||||
FfxUInt32(WorkGroupId.z),
|
||||
FfxUInt32x2(WorkGroupOffset()));
|
||||
#endif
|
||||
}
|
134
thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
vendored
Normal file
134
thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 0
|
||||
#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1
|
||||
#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2
|
||||
#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3
|
||||
#define FSR2_BIND_UAV_AUTO_EXPOSURE 4
|
||||
#define FSR2_BIND_CB_FSR2 5
|
||||
#define FSR2_BIND_CB_SPD 6
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
#if defined(FSR2_BIND_CB_SPD)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t
|
||||
{
|
||||
uint mips;
|
||||
uint numWorkGroups;
|
||||
uvec2 workGroupOffset;
|
||||
uvec2 renderSize;
|
||||
} cbSPD;
|
||||
|
||||
uint MipCount()
|
||||
{
|
||||
return cbSPD.mips;
|
||||
}
|
||||
|
||||
uint NumWorkGroups()
|
||||
{
|
||||
return cbSPD.numWorkGroups;
|
||||
}
|
||||
|
||||
uvec2 WorkGroupOffset()
|
||||
{
|
||||
return cbSPD.workGroupOffset;
|
||||
}
|
||||
|
||||
uvec2 SPD_RenderSize()
|
||||
{
|
||||
return cbSPD.renderSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
vec2 SPD_LoadExposureBuffer()
|
||||
{
|
||||
return imageLoad(rw_auto_exposure, ivec2(0,0)).xy;
|
||||
}
|
||||
|
||||
void SPD_SetExposureBuffer(vec2 value)
|
||||
{
|
||||
imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f));
|
||||
}
|
||||
|
||||
vec4 SPD_LoadMipmap5(ivec2 iPxPos)
|
||||
{
|
||||
return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value)
|
||||
{
|
||||
switch (slice)
|
||||
{
|
||||
case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL:
|
||||
imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
|
||||
break;
|
||||
case 5:
|
||||
imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
|
||||
break;
|
||||
default:
|
||||
|
||||
// avoid flattened side effect
|
||||
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
|
||||
imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f));
|
||||
#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
|
||||
imageStore(rw_img_mip_5, iPxPos, vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
|
||||
{
|
||||
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
|
||||
}
|
||||
|
||||
void SPD_ResetAtomicCounter()
|
||||
{
|
||||
imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0));
|
||||
}
|
||||
|
||||
#include "ffx_fsr2_compute_luminance_pyramid.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 256
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex);
|
||||
}
|
258
thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
vendored
Normal file
258
thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_DEPTH_CLIP_H
|
||||
#define FFX_FSR2_DEPTH_CLIP_H
|
||||
|
||||
FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
|
||||
|
||||
FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
|
||||
{
|
||||
FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
|
||||
BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
|
||||
|
||||
FfxFloat32 fDilatedSum = 0.0f;
|
||||
FfxFloat32 fDepth = 0.0f;
|
||||
FfxFloat32 fWeightSum = 0.0f;
|
||||
for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
|
||||
|
||||
const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
|
||||
const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
|
||||
|
||||
if (IsOnScreen(iSamplePos, RenderSize())) {
|
||||
const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
|
||||
if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
|
||||
|
||||
const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
|
||||
const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
|
||||
|
||||
const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
|
||||
|
||||
if (fDepthDiff > 0.0f) {
|
||||
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
|
||||
#else
|
||||
const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
|
||||
#endif
|
||||
|
||||
const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
|
||||
const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
|
||||
|
||||
const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
|
||||
const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
|
||||
|
||||
const FfxFloat32 Ksep = 1.37e-05f;
|
||||
const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
|
||||
const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
|
||||
|
||||
const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
|
||||
const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
|
||||
fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
|
||||
fWeightSum += fWeight;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
|
||||
{
|
||||
FfxFloat32 minconvergence = 1.0f;
|
||||
|
||||
FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
|
||||
FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
|
||||
FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
|
||||
|
||||
const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
|
||||
|
||||
if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
|
||||
for (FfxInt32 y = -1; y <= 1; ++y) {
|
||||
for (FfxInt32 x = -1; x <= 1; ++x) {
|
||||
|
||||
FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);
|
||||
|
||||
FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
|
||||
FfxFloat32 fVelocityUv = length(fMotionVector);
|
||||
|
||||
fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
|
||||
fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
|
||||
minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
|
||||
{
|
||||
const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
|
||||
FfxFloat32 fDepthMax = 0.0f;
|
||||
FfxFloat32 fDepthMin = fMaxDistInMeters;
|
||||
|
||||
FfxInt32 iMaxDistFound = 0;
|
||||
|
||||
for (FfxInt32 y = -1; y < 2; y++) {
|
||||
for (FfxInt32 x = -1; x < 2; x++) {
|
||||
|
||||
const FfxInt32x2 iOffset = FfxInt32x2(x, y);
|
||||
const FfxInt32x2 iSamplePos = iPxPos + iOffset;
|
||||
|
||||
const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
|
||||
FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
|
||||
|
||||
iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
|
||||
|
||||
fDepthMin = ffxMin(fDepthMin, fDepth);
|
||||
fDepthMax = ffxMax(fDepthMax, fDepth);
|
||||
}
|
||||
}
|
||||
|
||||
return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
|
||||
{
|
||||
const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
|
||||
|
||||
FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
|
||||
FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
|
||||
fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
|
||||
FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
|
||||
|
||||
float fPxDistance = length(fMotionVector * DisplaySize());
|
||||
return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
|
||||
}
|
||||
|
||||
void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
|
||||
{
|
||||
// Compensate for bilinear sampling in accumulation pass
|
||||
|
||||
FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
|
||||
FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);
|
||||
|
||||
float fMasksSum = 0.0f;
|
||||
|
||||
FfxFloat32x3 fColorSamples[9];
|
||||
FfxFloat32 fReactiveSamples[9];
|
||||
FfxFloat32 fTransparencyAndCompositionSamples[9];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 y = -1; y < 2; y++) {
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 x = -1; x < 2; x++) {
|
||||
|
||||
const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
|
||||
|
||||
FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
|
||||
|
||||
FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
|
||||
FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
|
||||
FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);
|
||||
|
||||
fColorSamples[sampleIdx] = fColorSample;
|
||||
fReactiveSamples[sampleIdx] = fReactiveSample;
|
||||
fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;
|
||||
|
||||
fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
|
||||
}
|
||||
}
|
||||
|
||||
if (fMasksSum > 0)
|
||||
{
|
||||
for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
|
||||
{
|
||||
FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
|
||||
FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
|
||||
FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
|
||||
|
||||
const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
|
||||
const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
|
||||
|
||||
// Increase power for non-similar samples
|
||||
const FfxFloat32 fPowerBiasMax = 6.0f;
|
||||
const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
|
||||
const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
|
||||
const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
|
||||
|
||||
fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
|
||||
}
|
||||
}
|
||||
|
||||
StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
|
||||
}
|
||||
|
||||
FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
//We assume linear data. if non-linear input (sRGB, ...),
|
||||
//then we should convert to linear first and back to sRGB on output.
|
||||
FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
|
||||
|
||||
fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
|
||||
|
||||
const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
|
||||
|
||||
return fPreparedYCoCg;
|
||||
}
|
||||
|
||||
FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
|
||||
{
|
||||
FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
|
||||
FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
|
||||
FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
|
||||
|
||||
return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
|
||||
}
|
||||
|
||||
void DepthClip(FfxInt32x2 iPxPos)
|
||||
{
|
||||
FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
|
||||
FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
|
||||
|
||||
// Discard tiny mvs
|
||||
fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
|
||||
|
||||
const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
|
||||
const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
|
||||
const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
|
||||
|
||||
// Compute prepared input color and depth clip
|
||||
FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
|
||||
FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
|
||||
StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));
|
||||
|
||||
// Compute dilated reactive mask
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
FfxInt32x2 iSamplePos = iPxPos;
|
||||
#else
|
||||
FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
|
||||
#endif
|
||||
|
||||
FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
|
||||
FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
|
||||
|
||||
PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_DEPTH_CLIPH )
|
66
thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
vendored
Normal file
66
thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0
|
||||
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1
|
||||
#define FSR2_BIND_SRV_DILATED_DEPTH 2
|
||||
#define FSR2_BIND_SRV_REACTIVE_MASK 3
|
||||
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4
|
||||
#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 6
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 7
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 8
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 9
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 10
|
||||
|
||||
#define FSR2_BIND_UAV_DEPTH_CLIP 11
|
||||
#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 12
|
||||
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 13
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 14
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_depth_clip.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
DepthClip(ivec2(gl_GlobalInvocationID.xy));
|
||||
}
|
1
thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
vendored
Normal file
1
thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
vendored
Normal file
@@ -0,0 +1 @@
|
||||
// This file doesn't exist in this version of FSR.
|
1
thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
vendored
Normal file
1
thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
vendored
Normal file
@@ -0,0 +1 @@
|
||||
// This file doesn't exist in this version of FSR.
|
115
thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
vendored
Normal file
115
thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_LOCK_H
|
||||
#define FFX_FSR2_LOCK_H
|
||||
|
||||
void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos)
|
||||
{
|
||||
if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize()))))
|
||||
{
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
const FfxUInt32 farZ = 0x0;
|
||||
#else
|
||||
const FfxUInt32 farZ = 0x3f800000;
|
||||
#endif
|
||||
SetReconstructedDepth(iPxHrPos, farZ);
|
||||
}
|
||||
}
|
||||
|
||||
FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos)
|
||||
{
|
||||
const FfxInt32 RADIUS = 1;
|
||||
|
||||
FfxFloat32 fNucleus = LoadLockInputLuma(pos);
|
||||
|
||||
FfxFloat32 similar_threshold = 1.05f;
|
||||
FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX;
|
||||
FfxFloat32 dissimilarLumaMax = 0;
|
||||
|
||||
/*
|
||||
0 1 2
|
||||
3 4 5
|
||||
6 7 8
|
||||
*/
|
||||
|
||||
#define SETBIT(x) (1U << x)
|
||||
|
||||
FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar
|
||||
|
||||
const FfxUInt32 uNumRejectionMasks = 4;
|
||||
const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
|
||||
SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left
|
||||
SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right
|
||||
SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left
|
||||
SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right
|
||||
};
|
||||
|
||||
FfxInt32 idx = 0;
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) {
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) {
|
||||
if (x == 0 && y == 0) continue;
|
||||
|
||||
FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
|
||||
|
||||
FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos);
|
||||
FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus);
|
||||
|
||||
if (difference > 0 && (difference < similar_threshold)) {
|
||||
mask |= SETBIT(idx);
|
||||
} else {
|
||||
dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma);
|
||||
dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin;
|
||||
|
||||
if (FFX_FALSE == isRidge) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 i = 0; i < 4; i++) {
|
||||
|
||||
if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ComputeLock(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
if (ComputeThinFeatureConfidence(iPxLrPos))
|
||||
{
|
||||
StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f);
|
||||
}
|
||||
|
||||
ClearResourcesForNextFrame(iPxLrPos);
|
||||
}
|
||||
|
||||
#endif // FFX_FSR2_LOCK_H
|
56
thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
vendored
Normal file
56
thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0
|
||||
#define FSR2_BIND_UAV_NEW_LOCKS 1
|
||||
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2
|
||||
#define FSR2_BIND_CB_FSR2 3
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_lock.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
uvec2 uDispatchThreadId = gl_WorkGroupID.xy * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
|
||||
|
||||
ComputeLock(ivec2(uDispatchThreadId));
|
||||
}
|
106
thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
vendored
Normal file
106
thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
|
||||
#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
|
||||
|
||||
FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample)
|
||||
{
|
||||
return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample)
|
||||
{
|
||||
return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
|
||||
#else
|
||||
DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
|
||||
#endif
|
||||
DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples)
|
||||
|
||||
FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord)
|
||||
{
|
||||
FfxFloat32 fShadingChangeLuma = 0;
|
||||
|
||||
#if 0
|
||||
fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x);
|
||||
#else
|
||||
|
||||
const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse());
|
||||
FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv);
|
||||
|
||||
fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions());
|
||||
fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse())));
|
||||
#endif
|
||||
|
||||
fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f);
|
||||
|
||||
return fShadingChangeLuma;
|
||||
}
|
||||
|
||||
void UpdateLockStatus(AccumulationPassCommonParams params,
|
||||
FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state,
|
||||
FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus,
|
||||
FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame,
|
||||
FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) {
|
||||
|
||||
const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
|
||||
|
||||
//init temporal shading change factor, init to -1 or so in reproject to know if "true new"?
|
||||
fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA];
|
||||
|
||||
FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA];
|
||||
|
||||
fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma);
|
||||
|
||||
if (state.NewLock) {
|
||||
fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
|
||||
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f;
|
||||
}
|
||||
else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) {
|
||||
fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f);
|
||||
}
|
||||
else {
|
||||
if (fLuminanceDiff > 0.1f) {
|
||||
KillLock(fLockStatus);
|
||||
}
|
||||
}
|
||||
|
||||
fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f));
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor);
|
||||
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask);
|
||||
fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f);
|
||||
|
||||
// Compute this frame lock contribution
|
||||
const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f);
|
||||
const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
|
||||
|
||||
fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution);
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H )
|
67
thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
vendored
Normal file
67
thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#define GROUP_SIZE 8
|
||||
|
||||
#define FSR_RCAS_DENOISE 1
|
||||
|
||||
void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
|
||||
{
|
||||
StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor);
|
||||
}
|
||||
|
||||
#define FSR_RCAS_F
|
||||
FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
|
||||
{
|
||||
FfxFloat32x4 fColor = LoadRCAS_Input(p);
|
||||
|
||||
fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
|
||||
|
||||
return fColor;
|
||||
}
|
||||
|
||||
void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
|
||||
|
||||
#include "ffx_fsr1.h"
|
||||
|
||||
|
||||
void CurrFilter(FFX_MIN16_U2 pos)
|
||||
{
|
||||
FfxFloat32x3 c;
|
||||
FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
|
||||
|
||||
c = UnprepareRgb(c, Exposure());
|
||||
|
||||
WriteUpscaledOutput(pos, c);
|
||||
}
|
||||
|
||||
void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
|
||||
{
|
||||
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
|
||||
FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
gxy.x += 8u;
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
gxy.y += 8u;
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
gxy.x -= 8u;
|
||||
CurrFilter(FFX_MIN16_U2(gxy));
|
||||
}
|
80
thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
vendored
Normal file
80
thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
// Needed for rw_upscaled_output declaration
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
|
||||
#define FSR2_BIND_SRV_RCAS_INPUT 1
|
||||
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2
|
||||
#define FSR2_BIND_CB_FSR2 3
|
||||
#define FSR2_BIND_CB_RCAS 4
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
//Move to prototype shader!
|
||||
#if defined(FSR2_BIND_CB_RCAS)
|
||||
layout (set = 1, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t
|
||||
{
|
||||
uvec4 rcasConfig;
|
||||
} cbRCAS;
|
||||
|
||||
uvec4 RCASConfig()
|
||||
{
|
||||
return cbRCAS.rcasConfig;
|
||||
}
|
||||
#else
|
||||
uvec4 RCASConfig()
|
||||
{
|
||||
return uvec4(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
vec4 LoadRCAS_Input(FfxInt32x2 iPxPos)
|
||||
{
|
||||
return texelFetch(r_rcas_input, iPxPos, 0);
|
||||
}
|
||||
|
||||
#include "ffx_fsr2_rcas.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 64
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz);
|
||||
}
|
145
thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
vendored
Normal file
145
thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
|
||||
#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
|
||||
|
||||
void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
|
||||
{
|
||||
fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
|
||||
|
||||
FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
|
||||
FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
|
||||
|
||||
BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
|
||||
|
||||
// Project current depth into previous frame locations.
|
||||
// Push to all pixels having some contribution if reprojection is using bilinear logic.
|
||||
for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
|
||||
|
||||
const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
|
||||
FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
|
||||
|
||||
if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
|
||||
|
||||
FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
|
||||
if (IsOnScreen(iStorePos, iPxDepthSize)) {
|
||||
StoreReconstructedDepth(iStorePos, fDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
|
||||
{
|
||||
const FfxInt32 iSampleCount = 9;
|
||||
const FfxInt32x2 iSampleOffsets[iSampleCount] = {
|
||||
FfxInt32x2(+0, +0),
|
||||
FfxInt32x2(+1, +0),
|
||||
FfxInt32x2(+0, +1),
|
||||
FfxInt32x2(+0, -1),
|
||||
FfxInt32x2(-1, +0),
|
||||
FfxInt32x2(-1, +1),
|
||||
FfxInt32x2(+1, +1),
|
||||
FfxInt32x2(-1, -1),
|
||||
FfxInt32x2(+1, -1),
|
||||
};
|
||||
|
||||
// pull out the depth loads to allow SC to batch them
|
||||
FfxFloat32 depth[9];
|
||||
FfxInt32 iSampleIndex = 0;
|
||||
FFX_UNROLL
|
||||
for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
|
||||
|
||||
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
|
||||
depth[iSampleIndex] = LoadInputDepth(iPos);
|
||||
}
|
||||
|
||||
// find closest depth
|
||||
fNearestDepthCoord = iPxPos;
|
||||
fNearestDepth = depth[0];
|
||||
FFX_UNROLL
|
||||
for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
|
||||
|
||||
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
|
||||
if (IsOnScreen(iPos, iPxSize)) {
|
||||
|
||||
FfxFloat32 fNdDepth = depth[iSampleIndex];
|
||||
#if FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||
if (fNdDepth > fNearestDepth) {
|
||||
#else
|
||||
if (fNdDepth < fNearestDepth) {
|
||||
#endif
|
||||
fNearestDepthCoord = iPos;
|
||||
fNearestDepth = fNdDepth;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
//We assume linear data. if non-linear input (sRGB, ...),
|
||||
//then we should convert to linear first and back to sRGB on output.
|
||||
FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
|
||||
|
||||
// Use internal auto exposure for locking logic
|
||||
fRgb /= PreExposure();
|
||||
fRgb *= Exposure();
|
||||
|
||||
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
|
||||
fRgb = Tonemap(fRgb);
|
||||
#endif
|
||||
|
||||
//compute luma used to lock pixels, if used elsewhere the ffxPow must be moved!
|
||||
const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0));
|
||||
|
||||
return fLockInputLuma;
|
||||
}
|
||||
|
||||
void ReconstructAndDilate(FfxInt32x2 iPxLrPos)
|
||||
{
|
||||
FfxFloat32 fDilatedDepth;
|
||||
FfxInt32x2 iNearestDepthCoord;
|
||||
|
||||
FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
|
||||
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
FfxInt32x2 iSamplePos = iPxLrPos;
|
||||
FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
|
||||
#else
|
||||
FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
|
||||
FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
|
||||
|
||||
StoreDilatedDepth(iPxLrPos, fDilatedDepth);
|
||||
StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector);
|
||||
|
||||
ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
|
||||
|
||||
FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos);
|
||||
StoreLockInputLuma(iPxLrPos, fLockInputLuma);
|
||||
}
|
||||
|
||||
|
||||
#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )
|
65
thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
vendored
Normal file
65
thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 1
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 2
|
||||
#define FSR2_BIND_SRV_INPUT_EXPOSURE 3
|
||||
#define FSR2_BIND_SRV_LUMA_HISTORY 4
|
||||
|
||||
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5
|
||||
#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6
|
||||
#define FSR2_BIND_UAV_DILATED_DEPTH 7
|
||||
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 8
|
||||
#define FSR2_BIND_UAV_LUMA_HISTORY 9
|
||||
#define FSR2_BIND_UAV_LUMA_INSTABILITY 10
|
||||
#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 11
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 12
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
#include "ffx_fsr2_sample.h"
|
||||
#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
ReconstructAndDilate(FFX_MIN16_I2(gl_GlobalInvocationID.xy));
|
||||
}
|
136
thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
vendored
Normal file
136
thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_REPROJECT_H
|
||||
#define FFX_FSR2_REPROJECT_H
|
||||
|
||||
#ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
|
||||
#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference
|
||||
#endif
|
||||
|
||||
FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample)
|
||||
{
|
||||
return LoadHistory(iPxSample);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample)
|
||||
{
|
||||
return FFX_MIN16_F4(LoadHistory(iPxSample));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory)
|
||||
DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
|
||||
#else
|
||||
DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory)
|
||||
DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
|
||||
#endif
|
||||
|
||||
FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample)
|
||||
{
|
||||
FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f);
|
||||
return fSample;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample)
|
||||
{
|
||||
FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0);
|
||||
|
||||
return fSample;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples)
|
||||
#else
|
||||
DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples)
|
||||
#endif
|
||||
#else
|
||||
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
|
||||
#else
|
||||
DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus)
|
||||
DeclareCustomTextureSample(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv)
|
||||
{
|
||||
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
|
||||
FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize()));
|
||||
#else
|
||||
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos);
|
||||
#endif
|
||||
|
||||
return fDilatedMotionVector;
|
||||
}
|
||||
|
||||
FfxBoolean IsUvInside(FfxFloat32x2 fUv)
|
||||
{
|
||||
return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f);
|
||||
}
|
||||
|
||||
void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample)
|
||||
{
|
||||
fReprojectedHrUv = params.fHrUv + params.fMotionVector;
|
||||
|
||||
bIsExistingSample = IsUvInside(fReprojectedHrUv);
|
||||
}
|
||||
|
||||
void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
|
||||
{
|
||||
FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
|
||||
|
||||
fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure());
|
||||
|
||||
fHistoryColor = RGBToYCoCg(fHistoryColor);
|
||||
|
||||
//Compute temporal reactivity info
|
||||
fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w));
|
||||
bInMotionLastFrame = (fHistory.w < 0.0f);
|
||||
}
|
||||
|
||||
LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus)
|
||||
{
|
||||
LockState state = { FFX_FALSE, FFX_FALSE };
|
||||
const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos);
|
||||
state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
|
||||
|
||||
FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0;
|
||||
|
||||
fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv);
|
||||
|
||||
if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) {
|
||||
state.WasLockedPrevFrame = true;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_REPROJECT_H )
|
105
thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
vendored
Normal file
105
thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_RESOURCES_H
|
||||
#define FFX_FSR2_RESOURCES_H
|
||||
|
||||
#if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_NULL 0
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26
|
||||
#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION 45
|
||||
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 53
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 54
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 55
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 56
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 57
|
||||
|
||||
// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12]
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4
|
||||
#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE)
|
||||
|
||||
#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 58
|
||||
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2
|
||||
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3
|
||||
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4
|
||||
#define FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8
|
||||
|
||||
#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
|
||||
|
||||
#endif //!defined( FFX_FSR2_RESOURCES_H )
|
605
thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
vendored
Normal file
605
thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
vendored
Normal file
@@ -0,0 +1,605 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_SAMPLE_H
|
||||
#define FFX_FSR2_SAMPLE_H
|
||||
|
||||
// suppress warnings
|
||||
#ifdef FFX_HLSL
|
||||
#pragma warning(disable: 4008) // potentially divide by zero
|
||||
#endif //FFX_HLSL
|
||||
|
||||
struct FetchedBilinearSamples {
|
||||
|
||||
FfxFloat32x4 fColor00;
|
||||
FfxFloat32x4 fColor10;
|
||||
|
||||
FfxFloat32x4 fColor01;
|
||||
FfxFloat32x4 fColor11;
|
||||
};
|
||||
|
||||
struct FetchedBicubicSamples {
|
||||
|
||||
FfxFloat32x4 fColor00;
|
||||
FfxFloat32x4 fColor10;
|
||||
FfxFloat32x4 fColor20;
|
||||
FfxFloat32x4 fColor30;
|
||||
|
||||
FfxFloat32x4 fColor01;
|
||||
FfxFloat32x4 fColor11;
|
||||
FfxFloat32x4 fColor21;
|
||||
FfxFloat32x4 fColor31;
|
||||
|
||||
FfxFloat32x4 fColor02;
|
||||
FfxFloat32x4 fColor12;
|
||||
FfxFloat32x4 fColor22;
|
||||
FfxFloat32x4 fColor32;
|
||||
|
||||
FfxFloat32x4 fColor03;
|
||||
FfxFloat32x4 fColor13;
|
||||
FfxFloat32x4 fColor23;
|
||||
FfxFloat32x4 fColor33;
|
||||
};
|
||||
|
||||
#if FFX_HALF
|
||||
struct FetchedBilinearSamplesMin16 {
|
||||
|
||||
FFX_MIN16_F4 fColor00;
|
||||
FFX_MIN16_F4 fColor10;
|
||||
|
||||
FFX_MIN16_F4 fColor01;
|
||||
FFX_MIN16_F4 fColor11;
|
||||
};
|
||||
|
||||
struct FetchedBicubicSamplesMin16 {
|
||||
|
||||
FFX_MIN16_F4 fColor00;
|
||||
FFX_MIN16_F4 fColor10;
|
||||
FFX_MIN16_F4 fColor20;
|
||||
FFX_MIN16_F4 fColor30;
|
||||
|
||||
FFX_MIN16_F4 fColor01;
|
||||
FFX_MIN16_F4 fColor11;
|
||||
FFX_MIN16_F4 fColor21;
|
||||
FFX_MIN16_F4 fColor31;
|
||||
|
||||
FFX_MIN16_F4 fColor02;
|
||||
FFX_MIN16_F4 fColor12;
|
||||
FFX_MIN16_F4 fColor22;
|
||||
FFX_MIN16_F4 fColor32;
|
||||
|
||||
FFX_MIN16_F4 fColor03;
|
||||
FFX_MIN16_F4 fColor13;
|
||||
FFX_MIN16_F4 fColor23;
|
||||
FFX_MIN16_F4 fColor33;
|
||||
};
|
||||
#else //FFX_HALF
|
||||
#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
|
||||
#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
|
||||
{
|
||||
return A + (B - A) * t;
|
||||
}
|
||||
|
||||
FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
|
||||
{
|
||||
return A + (B - A) * t;
|
||||
}
|
||||
|
||||
FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
|
||||
return fColorXY;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
|
||||
{
|
||||
const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
|
||||
return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
|
||||
}
|
||||
|
||||
FfxFloat32 Lanczos2(FfxFloat32 x)
|
||||
{
|
||||
x = ffxMin(abs(x), 2.0f);
|
||||
return Lanczos2NoClamp(x);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#if 0
|
||||
FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
|
||||
{
|
||||
const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
|
||||
return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
|
||||
}
|
||||
#endif
|
||||
|
||||
FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
|
||||
{
|
||||
x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
|
||||
return FFX_MIN16_F(Lanczos2NoClamp(x));
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
// FSR1 lanczos approximation. Input is x*x and must be <= 4.
|
||||
FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
|
||||
{
|
||||
FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
|
||||
FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
|
||||
return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
|
||||
{
|
||||
FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
|
||||
FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
|
||||
return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
|
||||
{
|
||||
x2 = ffxMin(x2, 4.0f);
|
||||
return Lanczos2ApproxSqNoClamp(x2);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
|
||||
{
|
||||
x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
|
||||
return Lanczos2ApproxSqNoClamp(x2);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
|
||||
{
|
||||
return Lanczos2ApproxSqNoClamp(x * x);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
|
||||
{
|
||||
return Lanczos2ApproxSqNoClamp(x * x);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2Approx(FfxFloat32 x)
|
||||
{
|
||||
return Lanczos2ApproxSq(x * x);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
|
||||
{
|
||||
return Lanczos2ApproxSq(x * x);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
|
||||
{
|
||||
return SampleLanczos2Weight(abs(x));
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
|
||||
{
|
||||
return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
||||
{
|
||||
FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
|
||||
FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
|
||||
FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
|
||||
FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
|
||||
{
|
||||
FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
|
||||
FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
|
||||
FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
|
||||
FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
||||
{
|
||||
FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
|
||||
FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
|
||||
FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
|
||||
FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FfxFloat32x4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
||||
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
|
||||
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
|
||||
{
|
||||
FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
|
||||
FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
|
||||
FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
|
||||
FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
|
||||
FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FFX_MIN16_F4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
|
||||
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
|
||||
FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FfxFloat32x4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
||||
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
|
||||
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FFX_MIN16_F4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
|
||||
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
|
||||
|
||||
FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
||||
{
|
||||
FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
|
||||
FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
|
||||
FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
|
||||
FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
|
||||
{
|
||||
FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
|
||||
FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
|
||||
FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
|
||||
FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
|
||||
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
||||
{
|
||||
FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FfxFloat32x4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
||||
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
|
||||
{
|
||||
FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
||||
FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
||||
|
||||
// Deringing
|
||||
|
||||
// TODO: only use 4 by checking jitter
|
||||
const FfxInt32 iDeringingSampleCount = 4;
|
||||
const FFX_MIN16_F4 fDeringingSamples[4] = {
|
||||
Samples.fColor11,
|
||||
Samples.fColor21,
|
||||
Samples.fColor12,
|
||||
Samples.fColor22,
|
||||
};
|
||||
|
||||
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
|
||||
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
||||
{
|
||||
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
||||
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
||||
}
|
||||
|
||||
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
||||
|
||||
return fColorXY;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
|
||||
FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
|
||||
{
|
||||
FfxInt32x2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
|
||||
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
|
||||
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
|
||||
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
|
||||
return result;
|
||||
}
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
|
||||
{
|
||||
FFX_MIN16_I2 result = iPxSample + iPxOffset;
|
||||
result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
|
||||
result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
|
||||
result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
|
||||
result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
|
||||
return result;
|
||||
}
|
||||
#endif //FFX_HALF
|
||||
|
||||
|
||||
#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
|
||||
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
|
||||
{ \
|
||||
SampleType Samples; \
|
||||
\
|
||||
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
|
||||
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
|
||||
Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
|
||||
Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \
|
||||
\
|
||||
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
|
||||
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
|
||||
Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
|
||||
Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \
|
||||
\
|
||||
Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
|
||||
Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
|
||||
Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
|
||||
Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \
|
||||
\
|
||||
Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \
|
||||
Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \
|
||||
Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \
|
||||
Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \
|
||||
\
|
||||
return Samples; \
|
||||
}
|
||||
|
||||
#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
|
||||
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
|
||||
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \
|
||||
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
|
||||
{ \
|
||||
SampleType Samples; \
|
||||
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
|
||||
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
|
||||
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
|
||||
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
|
||||
return Samples; \
|
||||
}
|
||||
|
||||
#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
|
||||
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \
|
||||
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
|
||||
|
||||
// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
|
||||
// is common, so iPxSample can "jitter"
|
||||
#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
|
||||
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
|
||||
{ \
|
||||
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
|
||||
/* Clamp base coords */ \
|
||||
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
|
||||
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
|
||||
/* */ \
|
||||
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
||||
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
|
||||
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
||||
return fColorXY; \
|
||||
}
|
||||
|
||||
#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
|
||||
FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
|
||||
{ \
|
||||
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
|
||||
/* Clamp base coords */ \
|
||||
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
|
||||
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
|
||||
/* */ \
|
||||
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
||||
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
|
||||
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
||||
return fColorXY; \
|
||||
}
|
||||
|
||||
#define FFX_FSR2_CONCAT_ID(x, y) x ## y
|
||||
#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
|
||||
#define FFX_FSR2_SAMPLER_1D_0 Lanczos2
|
||||
#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
|
||||
#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
|
||||
|
||||
#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
|
||||
|
||||
#endif //!defined( FFX_FSR2_SAMPLE_H )
|
250
thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
vendored
Normal file
250
thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
vendored
Normal file
@@ -0,0 +1,250 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#define USE_YCOCG 1
|
||||
|
||||
#define fAutogenEpsilon 0.01f
|
||||
|
||||
// EXPERIMENTAL
|
||||
|
||||
FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
|
||||
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
|
||||
|
||||
#if USE_YCOCG
|
||||
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
|
||||
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
|
||||
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
|
||||
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;
|
||||
FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
|
||||
FfxFloat32x3 X = colorPreAlpha;
|
||||
FfxFloat32x3 Y = colorPostAlpha;
|
||||
FfxFloat32x3 Z = colorPrevPreAlpha;
|
||||
FfxFloat32x3 W = colorPrevPostAlpha;
|
||||
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
|
||||
|
||||
// cleanup very small values
|
||||
retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// works ok: thin edges
|
||||
FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
|
||||
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
|
||||
|
||||
#if USE_YCOCG
|
||||
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
|
||||
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
|
||||
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
|
||||
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;
|
||||
FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
|
||||
|
||||
FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha =
|
||||
FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
|
||||
FfxFloat32x3 X = colorPrevPreAlpha;
|
||||
FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;
|
||||
FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;
|
||||
FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;
|
||||
|
||||
FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);
|
||||
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );
|
||||
|
||||
// only pixels that have significantly changed in color shuold be considered
|
||||
retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// This function computes the TransparencyAndComposition mask:
|
||||
// This mask indicates pixels that should discard locks and apply color clamping.
|
||||
//
|
||||
// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of
|
||||
// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)
|
||||
// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.
|
||||
//
|
||||
// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.
|
||||
// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.
|
||||
// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.
|
||||
// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.
|
||||
//
|
||||
// In the final step it stores the current textures in internal textures for the next frame
|
||||
|
||||
FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);
|
||||
|
||||
// [branch]
|
||||
if (retVal > FFX_MIN16_F(0.01f))
|
||||
{
|
||||
retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
|
||||
{
|
||||
float lum[9];
|
||||
int i = 0;
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;
|
||||
FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;
|
||||
lum[i++] = length(curCol - prevCol);
|
||||
}
|
||||
}
|
||||
|
||||
//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
|
||||
//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
|
||||
|
||||
//return sqrt(gradX * gradX + gradY * gradY);
|
||||
|
||||
float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
|
||||
float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
|
||||
|
||||
return sqrt(sqrt(gradX * gradY));
|
||||
}
|
||||
|
||||
float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
|
||||
{
|
||||
float lum[9];
|
||||
int i = 0;
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);
|
||||
FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);
|
||||
lum[i++] = length(curCol - prevCol);
|
||||
}
|
||||
}
|
||||
|
||||
//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
|
||||
//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
|
||||
|
||||
//return sqrt(gradX * gradX + gradY * gradY);
|
||||
|
||||
float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
|
||||
float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
|
||||
|
||||
return sqrt(sqrt(gradX * gradY));
|
||||
}
|
||||
|
||||
FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
|
||||
|
||||
FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
|
||||
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
|
||||
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
|
||||
|
||||
#if USE_YCOCG
|
||||
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
|
||||
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
|
||||
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
|
||||
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
|
||||
#endif
|
||||
FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);
|
||||
FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));
|
||||
|
||||
#if USE_YCOCG
|
||||
W = RGBToYCoCg(W);
|
||||
#endif
|
||||
minPrev = min(minPrev, W);
|
||||
maxPrev = max(maxPrev, W);
|
||||
}
|
||||
}
|
||||
// instead of computing the overlap: simply count how many samples are outside
|
||||
// set reactive based on that
|
||||
FFX_MIN16_F count = FFX_MIN16_F(0.f);
|
||||
for (int y = -1; y < 2; ++y)
|
||||
{
|
||||
for (int x = -1; x < 2; ++x)
|
||||
{
|
||||
FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));
|
||||
|
||||
#if USE_YCOCG
|
||||
Y = RGBToYCoCg(Y);
|
||||
#endif
|
||||
count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
|
||||
count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
|
||||
count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
|
||||
}
|
||||
}
|
||||
retVal = count / FFX_MIN16_F(27.f);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
// This function computes the Reactive mask:
|
||||
// We want pixels marked where the alpha portion of the frame changes a lot between neighbours
|
||||
// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)
|
||||
// As a result history would not be trustworthy.
|
||||
// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation
|
||||
// For mirrors we may assume the pre-alpha is pretty uniform color.
|
||||
//
|
||||
// This works well generally, but also marks edge pixels
|
||||
FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
|
||||
{
|
||||
// we only get here if alpha has a significant contribution and has changed since last frame.
|
||||
FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
|
||||
|
||||
// mark pixels with huge variance in alpha as reactive
|
||||
FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));
|
||||
FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));
|
||||
retVal = ffxSaturate(alphaEdge - opaqueEdge);
|
||||
|
||||
// the above also marks edge pixels due to jitter, so we need to cancel those out
|
||||
|
||||
|
||||
return retVal;
|
||||
}
|
120
thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
vendored
Normal file
120
thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//#version 450
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0
|
||||
#define FSR2_BIND_SRV_INPUT_COLOR 1
|
||||
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2
|
||||
#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3
|
||||
#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4
|
||||
#define FSR2_BIND_SRV_REACTIVE_MASK 5
|
||||
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 6
|
||||
|
||||
#define FSR2_BIND_UAV_AUTOREACTIVE 7
|
||||
#define FSR2_BIND_UAV_AUTOCOMPOSITION 8
|
||||
#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 9
|
||||
#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 10
|
||||
|
||||
#define FSR2_BIND_CB_FSR2 11
|
||||
#define FSR2_BIND_CB_REACTIVE 12
|
||||
|
||||
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
|
||||
#define FSR2_BIND_SRV_INPUT_DEPTH 13
|
||||
#endif
|
||||
|
||||
#include "ffx_fsr2_callbacks_glsl.h"
|
||||
#include "ffx_fsr2_common.h"
|
||||
|
||||
#ifdef FSR2_BIND_CB_REACTIVE
|
||||
layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
|
||||
{
|
||||
float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
|
||||
float fTcScale;
|
||||
float fReactiveScale;
|
||||
float fReactiveMax;
|
||||
} cbGenerateReactive;
|
||||
|
||||
float getTcThreshold()
|
||||
{
|
||||
return cbGenerateReactive.fTcThreshold;
|
||||
}
|
||||
|
||||
#else
|
||||
float getTcThreshold()
|
||||
{
|
||||
return 0.05f;
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "ffx_fsr2_tcr_autogen.h"
|
||||
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
|
||||
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
|
||||
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
|
||||
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
|
||||
#ifndef FFX_FSR2_NUM_THREADS
|
||||
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
|
||||
#endif // #ifndef FFX_FSR2_NUM_THREADS
|
||||
|
||||
FFX_FSR2_NUM_THREADS
|
||||
void main()
|
||||
{
|
||||
FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(gl_GlobalInvocationID.xy);
|
||||
|
||||
// ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV
|
||||
// fetch pre- and post-alpha color values
|
||||
FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() );
|
||||
FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) );
|
||||
FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f);
|
||||
|
||||
FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) );
|
||||
FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) );
|
||||
|
||||
FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f );
|
||||
|
||||
outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx);
|
||||
|
||||
if (outReactiveMask.y > 0.5f)
|
||||
{
|
||||
outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx);
|
||||
outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale);
|
||||
outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax );
|
||||
}
|
||||
|
||||
outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale);
|
||||
|
||||
outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId)));
|
||||
outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId)));
|
||||
|
||||
StoreAutoReactive(uDispatchThreadId, outReactiveMask);
|
||||
|
||||
StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha);
|
||||
StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha);
|
||||
}
|
194
thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
vendored
Normal file
194
thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifndef FFX_FSR2_UPSAMPLE_H
|
||||
#define FFX_FSR2_UPSAMPLE_H
|
||||
|
||||
FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
|
||||
|
||||
void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)
|
||||
{
|
||||
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
|
||||
}
|
||||
#if FFX_HALF
|
||||
void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)
|
||||
{
|
||||
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE
|
||||
#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate
|
||||
#endif
|
||||
|
||||
FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
|
||||
{
|
||||
FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
|
||||
FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
|
||||
FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
|
||||
FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
|
||||
#else
|
||||
#error "Invalid Lanczos type"
|
||||
#endif
|
||||
return fSampleWeight;
|
||||
}
|
||||
|
||||
#if FFX_HALF
|
||||
FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)
|
||||
{
|
||||
FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
|
||||
FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
|
||||
FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
|
||||
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
|
||||
FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
|
||||
|
||||
// To Test: Save reciproqual sqrt compute
|
||||
// FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
|
||||
#else
|
||||
#error "Invalid Lanczos type"
|
||||
#endif
|
||||
return fSampleWeight;
|
||||
}
|
||||
#endif
|
||||
|
||||
FfxFloat32 ComputeMaxKernelWeight() {
|
||||
const FfxFloat32 fKernelSizeBias = 1.0f;
|
||||
|
||||
FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
|
||||
|
||||
return ffxMin(FfxFloat32(1.99f), fKernelWeight);
|
||||
}
|
||||
|
||||
FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
|
||||
FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
|
||||
{
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
#include "ffx_fsr2_force16_begin.h"
|
||||
#endif
|
||||
// We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
|
||||
FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position
|
||||
FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position
|
||||
FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors...
|
||||
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
#include "ffx_fsr2_force16_end.h"
|
||||
#endif
|
||||
|
||||
FfxFloat32x3 fSamples[iLanczos2SampleCount];
|
||||
|
||||
FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
|
||||
|
||||
FfxInt32x2 offsetTL;
|
||||
offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);
|
||||
offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);
|
||||
|
||||
//Load samples
|
||||
// If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].
|
||||
// Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.
|
||||
// This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.
|
||||
const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;
|
||||
const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;
|
||||
|
||||
FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 row = 0; row < 3; row++) {
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 col = 0; col < 3; col++) {
|
||||
FfxInt32 iSampleIndex = col + (row << 2);
|
||||
|
||||
FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
|
||||
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;
|
||||
|
||||
const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));
|
||||
|
||||
fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));
|
||||
}
|
||||
}
|
||||
|
||||
FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
|
||||
FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);
|
||||
|
||||
// Identify how much of each upsampled color to be used for this frame
|
||||
const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));
|
||||
const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);
|
||||
|
||||
const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));
|
||||
const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));
|
||||
const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
|
||||
|
||||
const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));
|
||||
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 row = 0; row < 3; row++) {
|
||||
FFX_UNROLL
|
||||
for (FfxInt32 col = 0; col < 3; col++) {
|
||||
FfxInt32 iSampleIndex = col + (row << 2);
|
||||
|
||||
const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
|
||||
const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);
|
||||
FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
|
||||
|
||||
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
|
||||
|
||||
const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));
|
||||
FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
|
||||
|
||||
fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
|
||||
|
||||
// Update rectification box
|
||||
{
|
||||
const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
|
||||
const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
|
||||
|
||||
const FfxBoolean bInitialSample = (row == 0) && (col == 0);
|
||||
RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RectificationBoxComputeVarianceBoxData(clippingBox);
|
||||
|
||||
fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON);
|
||||
|
||||
if (fColorAndWeight.w > FSR2_EPSILON) {
|
||||
// Normalize for deringing (we need to compare colors)
|
||||
fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
|
||||
fColorAndWeight.w *= fUpsampleLanczosWeightScale;
|
||||
|
||||
Deringing(clippingBox, fColorAndWeight.xyz);
|
||||
}
|
||||
|
||||
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
|
||||
#include "ffx_fsr2_force16_end.h"
|
||||
#endif
|
||||
|
||||
return fColorAndWeight;
|
||||
}
|
||||
|
||||
#endif //!defined( FFX_FSR2_UPSAMPLE_H )
|
936
thirdparty/amd-fsr2/shaders/ffx_spd.h
vendored
Normal file
936
thirdparty/amd-fsr2/shaders/ffx_spd.h
vendored
Normal file
@@ -0,0 +1,936 @@
|
||||
// This file is part of the FidelityFX SDK.
|
||||
//
|
||||
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifdef FFX_CPU
|
||||
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy
|
||||
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant
|
||||
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant
|
||||
FfxUInt32x4 rectInfo, // left, top, width, height
|
||||
FfxInt32 mips) // optional: if -1, calculate based on rect width and height
|
||||
{
|
||||
workGroupOffset[0] = rectInfo[0] / 64; // rectInfo[0] = left
|
||||
workGroupOffset[1] = rectInfo[1] / 64; // rectInfo[1] = top
|
||||
|
||||
FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width
|
||||
FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height
|
||||
|
||||
dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0];
|
||||
dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1];
|
||||
|
||||
numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]);
|
||||
|
||||
if (mips >= 0)
|
||||
{
|
||||
numWorkGroupsAndMips[1] = FfxUInt32(mips);
|
||||
}
|
||||
else
|
||||
{
|
||||
// calculate based on rect width and height
|
||||
FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]);
|
||||
numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12))));
|
||||
}
|
||||
}
|
||||
|
||||
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy
|
||||
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant
|
||||
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant
|
||||
FfxUInt32x4 rectInfo) // left, top, width, height
|
||||
{
|
||||
SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1);
|
||||
}
|
||||
#endif // #ifdef FFX_CPU
|
||||
|
||||
|
||||
//==============================================================================================================================
|
||||
// NON-PACKED VERSION
|
||||
//==============================================================================================================================
|
||||
#ifdef FFX_GPU
|
||||
#ifdef SPD_PACKED_ONLY
|
||||
// Avoid compiler error
|
||||
FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
}
|
||||
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
|
||||
{
|
||||
}
|
||||
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
|
||||
{
|
||||
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
#endif // #ifdef SPD_PACKED_ONLY
|
||||
|
||||
//_____________________________________________________________/\_______________________________________________________________
|
||||
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
#extension GL_KHR_shader_subgroup_quad:require
|
||||
#endif
|
||||
|
||||
void SpdWorkgroupShuffleBarrier()
|
||||
{
|
||||
#ifdef FFX_GLSL
|
||||
barrier();
|
||||
#endif
|
||||
#ifdef FFX_HLSL
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Only last active workgroup should proceed
|
||||
bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice)
|
||||
{
|
||||
// global atomic counter
|
||||
if (localInvocationIndex == 0)
|
||||
{
|
||||
SpdIncreaseAtomicCounter(slice);
|
||||
}
|
||||
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
return (SpdGetAtomicCounter() != (numWorkGroups - 1));
|
||||
}
|
||||
|
||||
// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3);
|
||||
FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v)
|
||||
{
|
||||
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
|
||||
FfxFloat32x4 v0 = v;
|
||||
FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v);
|
||||
FfxFloat32x4 v2 = subgroupQuadSwapVertical(v);
|
||||
FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
|
||||
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
|
||||
// requires SM6.0
|
||||
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
|
||||
FfxFloat32x4 v0 = v;
|
||||
FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1);
|
||||
FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2);
|
||||
FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
/*
|
||||
// if SM6.0 is not available, you can use the AMD shader intrinsics
|
||||
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
|
||||
// https://gpuopen.com/amd-gpu-services-ags-library/
|
||||
// works for DX11
|
||||
FfxFloat32x4 v0 = v;
|
||||
FfxFloat32x4 v1;
|
||||
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
FfxFloat32x4 v2;
|
||||
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
FfxFloat32x4 v3;
|
||||
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
*/
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
|
||||
{
|
||||
FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y);
|
||||
FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y);
|
||||
FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y);
|
||||
FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice);
|
||||
FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice);
|
||||
FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice);
|
||||
FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice);
|
||||
FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice);
|
||||
FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice);
|
||||
FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_LINEAR_SAMPLER
|
||||
return SpdLoadSourceImage(FfxInt32x2(base), slice);
|
||||
#else
|
||||
return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[3], 0, slice);
|
||||
|
||||
if (mip <= 1)
|
||||
return;
|
||||
|
||||
v[0] = SpdReduceQuad(v[0]);
|
||||
v[1] = SpdReduceQuad(v[1]);
|
||||
v[2] = SpdReduceQuad(v[2]);
|
||||
v[3] = SpdReduceQuad(v[3]);
|
||||
|
||||
if ((localInvocationIndex % 4) == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
|
||||
SpdStoreIntermediate(x / 2, y / 2, v[0]);
|
||||
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
|
||||
SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]);
|
||||
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
|
||||
SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]);
|
||||
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
|
||||
SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat32x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImage(tex, slice);
|
||||
SpdStore(pix, v[3], 0, slice);
|
||||
|
||||
if (mip <= 1)
|
||||
return;
|
||||
|
||||
for (FfxUInt32 i = 0; i < 4; i++)
|
||||
{
|
||||
SpdStoreIntermediate(x, y, v[i]);
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
|
||||
}
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
}
|
||||
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
SpdStoreIntermediate(x + 0, y + 0, v[0]);
|
||||
SpdStoreIntermediate(x + 8, y + 0, v[1]);
|
||||
SpdStoreIntermediate(x + 0, y + 8, v[2]);
|
||||
SpdStoreIntermediate(x + 8, y + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice);
|
||||
#else
|
||||
SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS, try to reduce bank conflicts
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// ...
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
SpdStoreIntermediate(x * 2 + y % 2, y * 2, v);
|
||||
}
|
||||
#else
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(x, y);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediate(x + (y / 2) % 2, y, v);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
// x 0 x 0
|
||||
// 0 0 0 0
|
||||
// 0 x 0 x
|
||||
// 0 0 0 0
|
||||
FfxFloat32x4 v =
|
||||
SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
|
||||
// ...
|
||||
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
|
||||
// ...
|
||||
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
|
||||
// ...
|
||||
SpdStoreIntermediate(x * 4 + y, y * 4, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediate(x * 2 + y / 2, y * 2, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
// x 0 0 0 x 0 0 0
|
||||
// ...
|
||||
// 0 x 0 0 0 x 0 0
|
||||
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
|
||||
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
SpdStoreIntermediate(x + y * 2, 0, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediate(x / 2 + y, 0, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 1)
|
||||
{
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
|
||||
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0);
|
||||
v = SpdReduceQuad(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
|
||||
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
|
||||
FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v0, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
|
||||
FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v1, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
|
||||
FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v2, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
|
||||
FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice);
|
||||
SpdStore(pix, v3, 6, slice);
|
||||
|
||||
if (mips <= 7)
|
||||
return;
|
||||
// no barrier needed, working on values only from the same thread
|
||||
|
||||
FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3);
|
||||
SpdStore(FfxInt32x2(x, y), v, 7, slice);
|
||||
SpdStoreIntermediate(x, y, v);
|
||||
}
|
||||
|
||||
void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
if (mips <= baseMip)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice);
|
||||
|
||||
if (mips <= baseMip + 1)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
|
||||
|
||||
if (mips <= baseMip + 2)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
|
||||
|
||||
if (mips <= baseMip + 3)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice);
|
||||
}
|
||||
|
||||
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
|
||||
{
|
||||
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
|
||||
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
|
||||
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
|
||||
SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
|
||||
SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
|
||||
|
||||
if (mips <= 6)
|
||||
return;
|
||||
|
||||
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
|
||||
return;
|
||||
|
||||
SpdResetAtomicCounter(slice);
|
||||
|
||||
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
|
||||
SpdDownsampleMips_6_7(x, y, mips, slice);
|
||||
|
||||
SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
|
||||
}
|
||||
|
||||
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
|
||||
{
|
||||
SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//==============================================================================================================================
|
||||
// PACKED VERSION
|
||||
//==============================================================================================================================
|
||||
|
||||
#if FFX_HALF
|
||||
|
||||
#ifdef FFX_GLSL
|
||||
#extension GL_EXT_shader_subgroup_extended_types_float16:require
|
||||
#endif
|
||||
|
||||
FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v)
|
||||
{
|
||||
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
FfxFloat16x4 v0 = v;
|
||||
FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v);
|
||||
FfxFloat16x4 v2 = subgroupQuadSwapVertical(v);
|
||||
FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
|
||||
// requires SM6.0
|
||||
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
|
||||
FfxFloat16x4 v0 = v;
|
||||
FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1);
|
||||
FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2);
|
||||
FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
/*
|
||||
// if SM6.0 is not available, you can use the AMD shader intrinsics
|
||||
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
|
||||
// https://gpuopen.com/amd-gpu-services-ags-library/
|
||||
// works for DX11
|
||||
FfxFloat16x4 v0 = v;
|
||||
FfxFloat16x4 v1;
|
||||
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
|
||||
FfxFloat16x4 v2;
|
||||
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
|
||||
FfxFloat16x4 v3;
|
||||
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
*/
|
||||
#endif
|
||||
return FfxFloat16x4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
|
||||
{
|
||||
FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y);
|
||||
FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y);
|
||||
FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y);
|
||||
FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice);
|
||||
FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice);
|
||||
FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice);
|
||||
FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice);
|
||||
FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice);
|
||||
FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice);
|
||||
FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice);
|
||||
return SpdReduce4H(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_LINEAR_SAMPLER
|
||||
return SpdLoadSourceImageH(FfxInt32x2(base), slice);
|
||||
#else
|
||||
return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[3], 0, slice);
|
||||
|
||||
if (mips <= 1)
|
||||
return;
|
||||
|
||||
v[0] = SpdReduceQuadH(v[0]);
|
||||
v[1] = SpdReduceQuadH(v[1]);
|
||||
v[2] = SpdReduceQuadH(v[2]);
|
||||
v[3] = SpdReduceQuadH(v[3]);
|
||||
|
||||
if ((localInvocationIndex % 4) == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2, y / 2, v[0]);
|
||||
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]);
|
||||
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]);
|
||||
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
|
||||
SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxFloat16x4 v[4];
|
||||
|
||||
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
|
||||
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
|
||||
v[0] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[0], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
|
||||
v[1] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[1], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
|
||||
v[2] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[2], 0, slice);
|
||||
|
||||
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
|
||||
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
|
||||
v[3] = SpdReduceLoadSourceImageH(tex, slice);
|
||||
SpdStoreH(pix, v[3], 0, slice);
|
||||
|
||||
if (mips <= 1)
|
||||
return;
|
||||
|
||||
for (FfxInt32 i = 0; i < 4; i++)
|
||||
{
|
||||
SpdStoreIntermediateH(x, y, v[i]);
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
|
||||
}
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
}
|
||||
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
SpdStoreIntermediateH(x + 0, y + 0, v[0]);
|
||||
SpdStoreIntermediateH(x + 8, y + 0, v[1]);
|
||||
SpdStoreIntermediateH(x + 0, y + 8, v[2]);
|
||||
SpdStoreIntermediateH(x + 8, y + 8, v[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
#else
|
||||
SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS, try to reduce bank conflicts
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
// ...
|
||||
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
|
||||
SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v);
|
||||
}
|
||||
#else
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(x, y);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediateH(x + (y / 2) % 2, y, v);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
// x 0 x 0
|
||||
// 0 0 0 0
|
||||
// 0 x 0 x
|
||||
// 0 0 0 0
|
||||
FfxFloat16x4 v =
|
||||
SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
|
||||
// ...
|
||||
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
|
||||
// ...
|
||||
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
|
||||
// ...
|
||||
SpdStoreIntermediateH(x * 4 + y, y * 4, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 64)
|
||||
{
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
// x 0 0 0 x 0 0 0
|
||||
// ...
|
||||
// 0 x 0 0 0 x 0 0
|
||||
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
|
||||
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
|
||||
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
|
||||
// store to LDS
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
SpdStoreIntermediateH(x + y * 2, 0, v);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 16)
|
||||
{
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
|
||||
SpdStoreIntermediateH(x / 2 + y, 0, v);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
|
||||
{
|
||||
#ifdef SPD_NO_WAVE_OPERATIONS
|
||||
if (localInvocationIndex < 1)
|
||||
{
|
||||
// x x x x 0 ...
|
||||
// 0 ...
|
||||
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
#else
|
||||
if (localInvocationIndex < 4)
|
||||
{
|
||||
FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0);
|
||||
v = SpdReduceQuadH(v);
|
||||
// quad index 0 stores result
|
||||
if (localInvocationIndex % 4 == 0)
|
||||
{
|
||||
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
|
||||
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
|
||||
FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v0, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
|
||||
FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v1, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
|
||||
FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v2, 6, slice);
|
||||
|
||||
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
|
||||
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
|
||||
FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice);
|
||||
SpdStoreH(pix, v3, 6, slice);
|
||||
|
||||
if (mips < 8)
|
||||
return;
|
||||
// no barrier needed, working on values only from the same thread
|
||||
|
||||
FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3);
|
||||
SpdStoreH(FfxInt32x2(x, y), v, 7, slice);
|
||||
SpdStoreIntermediateH(x, y, v);
|
||||
}
|
||||
|
||||
void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
|
||||
{
|
||||
if (mips <= baseMip)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice);
|
||||
|
||||
if (mips <= baseMip + 1)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
|
||||
|
||||
if (mips <= baseMip + 2)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
|
||||
|
||||
if (mips <= baseMip + 3)
|
||||
return;
|
||||
SpdWorkgroupShuffleBarrier();
|
||||
SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice);
|
||||
}
|
||||
|
||||
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
|
||||
{
|
||||
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
|
||||
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
|
||||
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
|
||||
|
||||
SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice);
|
||||
|
||||
SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
|
||||
|
||||
if (mips < 7)
|
||||
return;
|
||||
|
||||
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
|
||||
return;
|
||||
|
||||
SpdResetAtomicCounter(slice);
|
||||
|
||||
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
|
||||
SpdDownsampleMips_6_7H(x, y, mips, slice);
|
||||
|
||||
SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
|
||||
}
|
||||
|
||||
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
|
||||
{
|
||||
SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
|
||||
}
|
||||
|
||||
#endif // #if FFX_HALF
|
||||
#endif // #ifdef FFX_GPU
|
Reference in New Issue
Block a user