initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

View File

@@ -0,0 +1,429 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_COMMON_TYPES_H
#define FFX_COMMON_TYPES_H
#if defined(FFX_CPU)
#define FFX_PARAMETER_IN
#define FFX_PARAMETER_OUT
#define FFX_PARAMETER_INOUT
#elif defined(FFX_HLSL)
#define FFX_PARAMETER_IN in
#define FFX_PARAMETER_OUT out
#define FFX_PARAMETER_INOUT inout
#elif defined(FFX_GLSL)
#define FFX_PARAMETER_IN in
#define FFX_PARAMETER_OUT out
#define FFX_PARAMETER_INOUT inout
#endif // #if defined(FFX_CPU)
#if defined(FFX_CPU)
/// A typedef for a boolean value.
///
/// @ingroup CPU
typedef bool FfxBoolean;
/// A typedef for a unsigned 8bit integer.
///
/// @ingroup CPU
typedef uint8_t FfxUInt8;
/// A typedef for a unsigned 16bit integer.
///
/// @ingroup CPU
typedef uint16_t FfxUInt16;
/// A typedef for a unsigned 32bit integer.
///
/// @ingroup CPU
typedef uint32_t FfxUInt32;
/// A typedef for a unsigned 64bit integer.
///
/// @ingroup CPU
typedef uint64_t FfxUInt64;
/// A typedef for a signed 8bit integer.
///
/// @ingroup CPU
typedef int8_t FfxInt8;
/// A typedef for a signed 16bit integer.
///
/// @ingroup CPU
typedef int16_t FfxInt16;
/// A typedef for a signed 32bit integer.
///
/// @ingroup CPU
typedef int32_t FfxInt32;
/// A typedef for a signed 64bit integer.
///
/// @ingroup CPU
typedef int64_t FfxInt64;
/// A typedef for a floating point value.
///
/// @ingroup CPU
typedef float FfxFloat32;
/// A typedef for a 2-dimensional floating point value.
///
/// @ingroup CPU
typedef float FfxFloat32x2[2];
/// A typedef for a 3-dimensional floating point value.
///
/// @ingroup CPU
typedef float FfxFloat32x3[3];
/// A typedef for a 4-dimensional floating point value.
///
/// @ingroup CPU
typedef float FfxFloat32x4[4];
/// A typedef for a 2-dimensional 32bit unsigned integer.
///
/// @ingroup CPU
typedef uint32_t FfxUInt32x2[2];
/// A typedef for a 3-dimensional 32bit unsigned integer.
///
/// @ingroup CPU
typedef uint32_t FfxUInt32x3[3];
/// A typedef for a 4-dimensional 32bit unsigned integer.
///
/// @ingroup CPU
typedef uint32_t FfxUInt32x4[4];
#endif // #if defined(FFX_CPU)
#if defined(FFX_HLSL)
/// A typedef for a boolean value.
///
/// @ingroup GPU
typedef bool FfxBoolean;
#if FFX_HLSL_6_2
typedef float32_t FfxFloat32;
typedef float32_t2 FfxFloat32x2;
typedef float32_t3 FfxFloat32x3;
typedef float32_t4 FfxFloat32x4;
/// A typedef for a unsigned 32bit integer.
///
/// @ingroup GPU
typedef uint32_t FfxUInt32;
typedef uint32_t2 FfxUInt32x2;
typedef uint32_t3 FfxUInt32x3;
typedef uint32_t4 FfxUInt32x4;
typedef int32_t FfxInt32;
typedef int32_t2 FfxInt32x2;
typedef int32_t3 FfxInt32x3;
typedef int32_t4 FfxInt32x4;
#else
#define FfxFloat32 float
#define FfxFloat32x2 float2
#define FfxFloat32x3 float3
#define FfxFloat32x4 float4
/// A typedef for a unsigned 32bit integer.
///
/// @ingroup GPU
typedef uint FfxUInt32;
typedef uint2 FfxUInt32x2;
typedef uint3 FfxUInt32x3;
typedef uint4 FfxUInt32x4;
typedef int FfxInt32;
typedef int2 FfxInt32x2;
typedef int3 FfxInt32x3;
typedef int4 FfxInt32x4;
#endif // #if defined(FFX_HLSL_6_2)
#if FFX_HALF
#if FFX_HLSL_6_2
typedef float16_t FfxFloat16;
typedef float16_t2 FfxFloat16x2;
typedef float16_t3 FfxFloat16x3;
typedef float16_t4 FfxFloat16x4;
/// A typedef for an unsigned 16bit integer.
///
/// @ingroup GPU
typedef uint16_t FfxUInt16;
typedef uint16_t2 FfxUInt16x2;
typedef uint16_t3 FfxUInt16x3;
typedef uint16_t4 FfxUInt16x4;
/// A typedef for a signed 16bit integer.
///
/// @ingroup GPU
typedef int16_t FfxInt16;
typedef int16_t2 FfxInt16x2;
typedef int16_t3 FfxInt16x3;
typedef int16_t4 FfxInt16x4;
#else
typedef min16float FfxFloat16;
typedef min16float2 FfxFloat16x2;
typedef min16float3 FfxFloat16x3;
typedef min16float4 FfxFloat16x4;
/// A typedef for an unsigned 16bit integer.
///
/// @ingroup GPU
typedef min16uint FfxUInt16;
typedef min16uint2 FfxUInt16x2;
typedef min16uint3 FfxUInt16x3;
typedef min16uint4 FfxUInt16x4;
/// A typedef for a signed 16bit integer.
///
/// @ingroup GPU
typedef min16int FfxInt16;
typedef min16int2 FfxInt16x2;
typedef min16int3 FfxInt16x3;
typedef min16int4 FfxInt16x4;
#endif // FFX_HLSL_6_2
#endif // FFX_HALF
#endif // #if defined(FFX_HLSL)
#if defined(FFX_GLSL)
/// A typedef for a boolean value.
///
/// @ingroup GPU
#define FfxBoolean bool
#define FfxFloat32 float
#define FfxFloat32x2 vec2
#define FfxFloat32x3 vec3
#define FfxFloat32x4 vec4
#define FfxUInt32 uint
#define FfxUInt32x2 uvec2
#define FfxUInt32x3 uvec3
#define FfxUInt32x4 uvec4
#define FfxInt32 int
#define FfxInt32x2 ivec2
#define FfxInt32x3 ivec3
#define FfxInt32x4 ivec4
#if FFX_HALF
#define FfxFloat16 float16_t
#define FfxFloat16x2 f16vec2
#define FfxFloat16x3 f16vec3
#define FfxFloat16x4 f16vec4
#define FfxUInt16 uint16_t
#define FfxUInt16x2 u16vec2
#define FfxUInt16x3 u16vec3
#define FfxUInt16x4 u16vec4
#define FfxInt16 int16_t
#define FfxInt16x2 i16vec2
#define FfxInt16x3 i16vec3
#define FfxInt16x4 i16vec4
#endif // FFX_HALF
#endif // #if defined(FFX_GLSL)
// Global toggles:
// #define FFX_HALF (1)
// #define FFX_HLSL_6_2 (1)
#if FFX_HALF
#if FFX_HLSL_6_2
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType##16_t, COL> TypeName;
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType##16_t, COL> TypeName;
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
#else //FFX_HLSL_6_2
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName;
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<min16##BaseComponentType, COL> TypeName;
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<min16##BaseComponentType, ROW, COL> TypeName;
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType );
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL );
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
#endif //FFX_HLSL_6_2
#else //FFX_HALF
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType, COL> TypeName;
#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector<BaseComponentType, COL> TypeName;
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
#endif //FFX_HALF
#if defined(FFX_GPU)
// Common typedefs:
#if defined(FFX_HLSL)
FFX_MIN16_SCALAR( FFX_MIN16_F , float );
FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 );
FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 );
FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 );
FFX_MIN16_SCALAR( FFX_MIN16_I, int );
FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 );
FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 );
FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 );
FFX_MIN16_SCALAR( FFX_MIN16_U, uint );
FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 );
FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 );
FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 );
FFX_16BIT_SCALAR( FFX_F16_t , float );
FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 );
FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 );
FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 );
FFX_16BIT_SCALAR( FFX_I16_t, int );
FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 );
FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 );
FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 );
FFX_16BIT_SCALAR( FFX_U16_t, uint );
FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 );
FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 );
FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 );
#define TYPEDEF_MIN16_TYPES(Prefix) \
typedef FFX_MIN16_F Prefix##_F; \
typedef FFX_MIN16_F2 Prefix##_F2; \
typedef FFX_MIN16_F3 Prefix##_F3; \
typedef FFX_MIN16_F4 Prefix##_F4; \
typedef FFX_MIN16_I Prefix##_I; \
typedef FFX_MIN16_I2 Prefix##_I2; \
typedef FFX_MIN16_I3 Prefix##_I3; \
typedef FFX_MIN16_I4 Prefix##_I4; \
typedef FFX_MIN16_U Prefix##_U; \
typedef FFX_MIN16_U2 Prefix##_U2; \
typedef FFX_MIN16_U3 Prefix##_U3; \
typedef FFX_MIN16_U4 Prefix##_U4;
#define TYPEDEF_16BIT_TYPES(Prefix) \
typedef FFX_16BIT_F Prefix##_F; \
typedef FFX_16BIT_F2 Prefix##_F2; \
typedef FFX_16BIT_F3 Prefix##_F3; \
typedef FFX_16BIT_F4 Prefix##_F4; \
typedef FFX_16BIT_I Prefix##_I; \
typedef FFX_16BIT_I2 Prefix##_I2; \
typedef FFX_16BIT_I3 Prefix##_I3; \
typedef FFX_16BIT_I4 Prefix##_I4; \
typedef FFX_16BIT_U Prefix##_U; \
typedef FFX_16BIT_U2 Prefix##_U2; \
typedef FFX_16BIT_U3 Prefix##_U3; \
typedef FFX_16BIT_U4 Prefix##_U4;
#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \
typedef FfxFloat32 Prefix##_F; \
typedef FfxFloat32x2 Prefix##_F2; \
typedef FfxFloat32x3 Prefix##_F3; \
typedef FfxFloat32x4 Prefix##_F4; \
typedef FfxInt32 Prefix##_I; \
typedef FfxInt32x2 Prefix##_I2; \
typedef FfxInt32x3 Prefix##_I3; \
typedef FfxInt32x4 Prefix##_I4; \
typedef FfxUInt32 Prefix##_U; \
typedef FfxUInt32x2 Prefix##_U2; \
typedef FfxUInt32x3 Prefix##_U3; \
typedef FfxUInt32x4 Prefix##_U4;
#endif // #if defined(FFX_HLSL)
#if defined(FFX_GLSL)
#if FFX_HALF
#define FFX_MIN16_F float16_t
#define FFX_MIN16_F2 f16vec2
#define FFX_MIN16_F3 f16vec3
#define FFX_MIN16_F4 f16vec4
#define FFX_MIN16_I int16_t
#define FFX_MIN16_I2 i16vec2
#define FFX_MIN16_I3 i16vec3
#define FFX_MIN16_I4 i16vec4
#define FFX_MIN16_U uint16_t
#define FFX_MIN16_U2 u16vec2
#define FFX_MIN16_U3 u16vec3
#define FFX_MIN16_U4 u16vec4
#define FFX_16BIT_F float16_t
#define FFX_16BIT_F2 f16vec2
#define FFX_16BIT_F3 f16vec3
#define FFX_16BIT_F4 f16vec4
#define FFX_16BIT_I int16_t
#define FFX_16BIT_I2 i16vec2
#define FFX_16BIT_I3 i16vec3
#define FFX_16BIT_I4 i16vec4
#define FFX_16BIT_U uint16_t
#define FFX_16BIT_U2 u16vec2
#define FFX_16BIT_U3 u16vec3
#define FFX_16BIT_U4 u16vec4
#else // FFX_HALF
#define FFX_MIN16_F float
#define FFX_MIN16_F2 vec2
#define FFX_MIN16_F3 vec3
#define FFX_MIN16_F4 vec4
#define FFX_MIN16_I int
#define FFX_MIN16_I2 ivec2
#define FFX_MIN16_I3 ivec3
#define FFX_MIN16_I4 ivec4
#define FFX_MIN16_U uint
#define FFX_MIN16_U2 uvec2
#define FFX_MIN16_U3 uvec3
#define FFX_MIN16_U4 uvec4
#define FFX_16BIT_F float
#define FFX_16BIT_F2 vec2
#define FFX_16BIT_F3 vec3
#define FFX_16BIT_F4 vec4
#define FFX_16BIT_I int
#define FFX_16BIT_I2 ivec2
#define FFX_16BIT_I3 ivec3
#define FFX_16BIT_I4 ivec4
#define FFX_16BIT_U uint
#define FFX_16BIT_U2 uvec2
#define FFX_16BIT_U3 uvec3
#define FFX_16BIT_U4 uvec4
#endif // FFX_HALF
#endif // #if defined(FFX_GLSL)
#endif // #if defined(FFX_GPU)
#endif // #ifndef FFX_COMMON_TYPES_H

52
thirdparty/amd-fsr2/shaders/ffx_core.h vendored Normal file
View File

@@ -0,0 +1,52 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
/// @defgroup Core
/// @defgroup HLSL
/// @defgroup GLSL
/// @defgroup GPU
/// @defgroup CPU
/// @defgroup CAS
/// @defgroup FSR1
#if !defined(FFX_CORE_H)
#define FFX_CORE_H
#include "ffx_common_types.h"
#if defined(FFX_CPU)
#include "ffx_core_cpu.h"
#endif // #if defined(FFX_CPU)
#if defined(FFX_GLSL) && defined(FFX_GPU)
#include "ffx_core_glsl.h"
#endif // #if defined(FFX_GLSL) && defined(FFX_GPU)
#if defined(FFX_HLSL) && defined(FFX_GPU)
#include "ffx_core_hlsl.h"
#endif // #if defined(FFX_HLSL) && defined(FFX_GPU)
#if defined(FFX_GPU)
#include "ffx_core_gpu_common.h"
#include "ffx_core_gpu_common_half.h"
#include "ffx_core_portability.h"
#endif // #if defined(FFX_GPU)
#endif // #if !defined(FFX_CORE_H)

View File

@@ -0,0 +1,332 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
/// A define for a true value in a boolean expression.
///
/// @ingroup CPU
#define FFX_TRUE (1)
/// A define for a false value in a boolean expression.
///
/// @ingroup CPU
#define FFX_FALSE (0)
#if !defined(FFX_STATIC)
/// A define to abstract declaration of static variables and functions.
///
/// @ingroup CPU
#define FFX_STATIC static
#endif // #if !defined(FFX_STATIC)
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-variable"
#endif
/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer.
///
/// @param [in] x A 32bit floating value.
///
/// @returns
/// An unsigned 32bit integer value containing the bit pattern of <c><i>x</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
{
union
{
FfxFloat32 f;
FfxUInt32 u;
} bits;
bits.f = x;
return bits.u;
}
FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
{
return a[0] * b[0] + a[1] * b[1];
}
FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
{
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
}
FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
{
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
{
return y * t + (-x * t + x);
}
/// Compute the reciprocal of a value.
///
/// @param [in] x The value to compute the reciprocal for.
///
/// @returns
/// The reciprocal value of <c><i>x</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a)
{
return 1.0f / a;
}
/// Compute the square root of a value.
///
/// @param [in] x The first value to compute the min of.
///
/// @returns
/// The the square root of <c><i>x</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
{
return sqrt(x);
}
FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
{
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
}
/// Compute the factional part of a decimal value.
///
/// This function calculates <c><i>x - floor(x)</i></c>.
///
/// @param [in] x The value to compute the fractional part from.
///
/// @returns
/// The fractional part of <c><i>x</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a)
{
return a - floor(a);
}
/// Compute the reciprocal square root of a value.
///
/// @param [in] x The value to compute the reciprocal for.
///
/// @returns
/// The reciprocal square root value of <c><i>x</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a)
{
return ffxReciprocal(ffxSqrt(a));
}
FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
{
return x < y ? x : y;
}
FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
{
return x < y ? x : y;
}
FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
{
return x > y ? x : y;
}
FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
{
return x > y ? x : y;
}
/// Clamp a value to a [0..1] range.
///
/// @param [in] x The value to clamp to [0..1] range.
///
/// @returns
/// The clamped version of <c><i>x</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a)
{
return ffxMin(1.0f, ffxMax(0.0f, a));
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
d[0] = a[0] + b;
d[1] = a[1] + b;
d[2] = a[2] + b;
return;
}
FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
{
d[0] = a[0];
d[1] = a[1];
d[2] = a[2];
return;
}
FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
{
d[0] = a[0] * b[0];
d[1] = a[1] * b[1];
d[2] = a[2] * b[2];
return;
}
FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
d[0] = a[0] * b;
d[1] = a[1] * b;
d[2] = a[2] * b;
return;
}
FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
{
d[0] = ffxReciprocal(a[0]);
d[1] = ffxReciprocal(a[1]);
d[2] = ffxReciprocal(a[2]);
return;
}
/// Convert FfxFloat32 to half (in lower 16-bits of output).
///
/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
///
/// The function supports denormals.
///
/// Some conversion rules are to make computations possibly "safer" on the GPU,
/// -INF & -NaN -> -65504
/// +INF & +NaN -> +65504
///
/// @param [in] f The 32bit floating point value to convert.
///
/// @returns
/// The closest 16bit floating point value to <c><i>f</i></c>.
///
/// @ingroup CPU
FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
{
static FfxUInt16 base[512] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400,
0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000,
0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002,
0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff
};
static FfxUInt8 shift[512] = {
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
};
union
{
FfxFloat32 f;
FfxUInt32 u;
} bits;
bits.f = f;
FfxUInt32 u = bits.u;
FfxUInt32 i = u >> 23;
return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]);
}
/// Pack 2x32-bit floating point values in a single 32bit value.
///
/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
/// 32bit unsigned integer respectively.
///
/// @param [in] value A 2-dimensional floating point value to convert and pack.
///
/// @returns
/// A packed 32bit value containing 2 16bit floating point values.
///
/// @ingroup CPU
FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a)
{
return f32tof16(a[0]) + (f32tof16(a[1]) << 16);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
d = a + ffxBroadcast3(b);
return d;
}
FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
{
d = a;
return d;
}
FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
{
d = a * b;
return d;
}
FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
d = a * ffxBroadcast3(b);
return d;
}
FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
{
d = rcp(a);
return d;
}

1250
thirdparty/amd-fsr2/shaders/ffx_fsr1.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,295 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_ACCUMULATE_H
#define FFX_FSR2_ACCUMULATE_H
FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector)
{
return length(fMotionVector * DisplaySize());
}
#if FFX_HALF
FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector)
{
return length(fMotionVector * FFX_MIN16_F2(DisplaySize()));
}
#endif
void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
{
// Aviod invalid values when accumulation and upsampled weight is 0
fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
//YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz)));
fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor)));
#endif
const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation;
fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha);
fHistoryColor = YCoCgToRGB(fHistoryColor);
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
fHistoryColor = InverseTonemap(fHistoryColor);
#endif
}
void RectifyHistory(
const AccumulationPassCommonParams params,
RectificationBox clippingBox,
FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
FfxFloat32 fLockContributionThisFrame,
FfxFloat32 fTemporalReactiveFactor,
FfxFloat32 fLumaInstabilityFactor)
{
FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f));
const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT);
FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
FfxFloat32x3 boxCenter = clippingBox.boxCenter;
FfxFloat32 boxVecSize = length(clippingBox.boxVec);
boxMin = ffxMax(clippingBox.aabbMin, boxMin);
boxMax = ffxMin(clippingBox.aabbMax, boxMax);
if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) {
const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax);
FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor;
const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f);
fHistoryContribution *= fReactiveContribution;
// Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution));
// Scale accumulation using rectification info
const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f));
fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution));
}
}
void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
{
StoreUpscaledOutput(iPxHrPos, fUpscaledColor);
}
void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight)
{
// we expect similar motion for next frame
// kill lock if that location is outside screen, avoid locks to be clamped to screen borders
FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
if (IsUvInside(fEstimatedUvNextFrame) == false) {
KillLock(fLockStatus);
}
else {
// Decrease lock lifetime
const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame);
const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax);
fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
}
StoreLockStatus(params.iPxHrPos, fLockStatus);
}
FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState)
{
// Always assume max accumulation was reached
FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10)))));
fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20))));
return fBaseAccumulation.xxx;
}
FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff)
{
const FfxFloat32 fUnormThreshold = 1.0f / 255.0f;
const FfxInt32 N_MINUS_1 = 0;
const FfxInt32 N_MINUS_2 = 1;
const FfxInt32 N_MINUS_3 = 2;
const FfxInt32 N_MINUS_4 = 3;
FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x;
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma));
#endif
fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f;
const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false);
FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f);
FfxFloat32 fLumaInstability = 0.0f;
FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]);
FfxFloat32 fMin = abs(fDiffs0);
if (fMin >= fUnormThreshold)
{
for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
if (sign(fDiffs0) == sign(fDiffs1)) {
// Scale difference to protect historically similar values
const FfxFloat32 fMinBias = 1.0f;
fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias);
}
}
const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold);
fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f));
}
//shift history
fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3];
fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2];
fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1];
fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma;
StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory);
return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0);
}
FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor)
{
FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor);
fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity)));
fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
// Force reactive factor for new samples
fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) {
fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f;
}
return fNewFactor;
}
AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos)
{
AccumulationPassCommonParams params;
params.iPxHrPos = iPxHrPos;
const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize();
params.fHrUv = fHrUv;
const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize();
params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize());
params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample);
params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler));
const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler);
params.fDilatedReactiveFactor = fDilatedReactiveMasks.x;
params.fAccumulationMask = fDilatedReactiveMasks.y;
params.bIsResetFrame = (0 == FrameIndex());
params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame);
return params;
}
void Accumulate(FfxInt32x2 iPxHrPos)
{
const AccumulationPassCommonParams params = InitParams(iPxHrPos);
FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0);
FfxFloat32x2 fLockStatus;
InitializeNewLockSample(fLockStatus);
FfxFloat32 fTemporalReactiveFactor = 0.0f;
FfxBoolean bInMotionLastFrame = FFX_FALSE;
LockState lockState = { FFX_FALSE , FFX_FALSE };
if (params.bIsExistingSample && !params.bIsResetFrame) {
ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame);
lockState = ReprojectHistoryLockStatus(params, fLockStatus);
}
FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
FfxFloat32 fLuminanceDiff = 0.0f;
FfxFloat32 fLockContributionThisFrame = 0.0f;
UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff);
// Load upsampled input color
RectificationBox clippingBox;
FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff);
FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
if (params.bIsNewSample) {
fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz);
}
else {
RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor);
Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight);
}
fHistoryColor = UnprepareRgb(fHistoryColor, Exposure());
FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w);
// Get new temporal reactive factor
fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor);
StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor));
// Output final color when RCAS is disabled
#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0
WriteUpscaledOutput(iPxHrPos, fHistoryColor);
#endif
StoreNewLocks(iPxHrPos, 0);
}
#endif // FFX_FSR2_ACCUMULATE_H

View File

@@ -0,0 +1,91 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
// Needed for rw_upscaled_output declaration
#extension GL_EXT_shader_image_load_formatted : require
#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2
#else
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2
#endif
#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3
#define FSR2_BIND_SRV_LOCK_STATUS 4
//#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6
#define FSR2_BIND_SRV_LUMA_INSTABILITY 7
#define FSR2_BIND_SRV_LANCZOS_LUT 8
#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9
#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 10
#define FSR2_BIND_SRV_AUTO_EXPOSURE 11
#define FSR2_BIND_SRV_LUMA_HISTORY 12
#define FSR2_BIND_UAV_INTERNAL_UPSCALED 13
#define FSR2_BIND_UAV_LOCK_STATUS 14
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 15
#define FSR2_BIND_UAV_NEW_LOCKS 16
#define FSR2_BIND_UAV_LUMA_HISTORY 17
#define FSR2_BIND_CB_FSR2 18
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
#define FSR2_BIND_SRV_INPUT_DEPTH 5
#endif
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
#include "ffx_fsr2_sample.h"
#include "ffx_fsr2_upsample.h"
#include "ffx_fsr2_postprocess_lock_status.h"
#include "ffx_fsr2_reproject.h"
#include "ffx_fsr2_accumulate.h"
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
FFX_FSR2_NUM_THREADS
void main()
{
uvec2 uGroupId = gl_WorkGroupID.xy;
const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT;
uGroupId.y = GroupRows - uGroupId.y - 1;
uvec2 uDispatchThreadId = uGroupId * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
Accumulate(ivec2(uDispatchThreadId));
}

View File

@@ -0,0 +1,93 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0
#define FSR2_BIND_SRV_INPUT_COLOR 1
#define FSR2_BIND_UAV_AUTOREACTIVE 2
#define FSR2_BIND_CB_REACTIVE 3
#define FSR2_BIND_CB_FSR2 4
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha;
// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha;
// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask;
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
#if defined(FSR2_BIND_CB_REACTIVE)
layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
{
float scale;
float threshold;
float binaryValue;
uint flags;
} cbGenerateReactive;
#endif
FFX_FSR2_NUM_THREADS
void main()
{
FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy;
FfxFloat32x3 ColorPreAlpha = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb;
FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb;
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0)
{
ColorPreAlpha = Tonemap(ColorPreAlpha);
ColorPostAlpha = Tonemap(ColorPostAlpha);
}
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0)
{
ColorPreAlpha = InverseTonemap(ColorPreAlpha);
ColorPostAlpha = InverseTonemap(ColorPostAlpha);
}
FfxFloat32 out_reactive_value = 0.f;
FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha);
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta);
out_reactive_value *= cbGenerateReactive.scale;
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value;
imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value));
}

View File

@@ -0,0 +1,698 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "ffx_fsr2_resources.h"
#if defined(FFX_GPU)
#include "ffx_core.h"
#endif // #if defined(FFX_GPU)
#if defined(FFX_GPU)
#ifndef FFX_FSR2_PREFER_WAVE64
#define FFX_FSR2_PREFER_WAVE64
#endif // #if defined(FFX_GPU)
#if defined(FSR2_BIND_CB_FSR2)
layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t
{
FfxInt32x2 iRenderSize;
FfxInt32x2 iMaxRenderSize;
FfxInt32x2 iDisplaySize;
FfxInt32x2 iInputColorResourceDimensions;
FfxInt32x2 iLumaMipDimensions;
FfxInt32 iLumaMipLevelToUse;
FfxInt32 iFrameIndex;
FfxFloat32x4 fDeviceToViewDepth;
FfxFloat32x2 fJitter;
FfxFloat32x2 fMotionVectorScale;
FfxFloat32x2 fDownscaleFactor;
FfxFloat32x2 fMotionVectorJitterCancellation;
FfxFloat32 fPreExposure;
FfxFloat32 fPreviousFramePreExposure;
FfxFloat32 fTanHalfFOV;
FfxFloat32 fJitterSequenceLength;
FfxFloat32 fDeltaTime;
FfxFloat32 fDynamicResChangeFactor;
FfxFloat32 fViewSpaceToMetersFactor;
FfxFloat32 fPad;
mat4 mReprojectionMatrix;
} cbFSR2;
#endif
FfxInt32x2 RenderSize()
{
return cbFSR2.iRenderSize;
}
FfxInt32x2 MaxRenderSize()
{
return cbFSR2.iMaxRenderSize;
}
FfxInt32x2 DisplaySize()
{
return cbFSR2.iDisplaySize;
}
FfxInt32x2 InputColorResourceDimensions()
{
return cbFSR2.iInputColorResourceDimensions;
}
FfxInt32x2 LumaMipDimensions()
{
return cbFSR2.iLumaMipDimensions;
}
FfxInt32 LumaMipLevelToUse()
{
return cbFSR2.iLumaMipLevelToUse;
}
FfxInt32 FrameIndex()
{
return cbFSR2.iFrameIndex;
}
FfxFloat32x4 DeviceToViewSpaceTransformFactors()
{
return cbFSR2.fDeviceToViewDepth;
}
FfxFloat32x2 Jitter()
{
return cbFSR2.fJitter;
}
FfxFloat32x2 MotionVectorScale()
{
return cbFSR2.fMotionVectorScale;
}
FfxFloat32x2 DownscaleFactor()
{
return cbFSR2.fDownscaleFactor;
}
FfxFloat32x2 MotionVectorJitterCancellation()
{
return cbFSR2.fMotionVectorJitterCancellation;
}
FfxFloat32 PreExposure()
{
return cbFSR2.fPreExposure;
}
FfxFloat32 PreviousFramePreExposure()
{
return cbFSR2.fPreviousFramePreExposure;
}
FfxFloat32 TanHalfFoV()
{
return cbFSR2.fTanHalfFOV;
}
FfxFloat32 JitterSequenceLength()
{
return cbFSR2.fJitterSequenceLength;
}
FfxFloat32 DeltaTime()
{
return cbFSR2.fDeltaTime;
}
FfxFloat32 DynamicResChangeFactor()
{
return cbFSR2.fDynamicResChangeFactor;
}
FfxFloat32 ViewSpaceToMetersFactor()
{
return cbFSR2.fViewSpaceToMetersFactor;
}
layout (set = 0, binding = 0) uniform sampler s_PointClamp;
layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
// SRVs
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only;
#endif
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered;
#endif
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors;
#endif
#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth;
#endif
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure;
#endif
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure;
#endif
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask;
#endif
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask;
#endif
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth;
#endif
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors;
#endif
#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors;
#endif
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth;
#endif
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color;
#endif
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status;
#endif
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma;
#endif
#if defined(FSR2_BIND_SRV_NEW_LOCKS)
layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks;
#endif
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color;
#endif
#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history;
#endif
#if defined(FSR2_BIND_SRV_RCAS_INPUT)
layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input;
#endif
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut;
#endif
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips;
#endif
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut;
#endif
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks;
#endif
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha;
#endif
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha;
#endif
// UAV
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
#endif
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors;
#endif
#if defined FSR2_BIND_UAV_DILATED_DEPTH
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth;
#endif
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color;
#endif
#if defined FSR2_BIND_UAV_LOCK_STATUS
layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status;
#endif
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma;
#endif
#if defined FSR2_BIND_UAV_NEW_LOCKS
layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks;
#endif
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color;
#endif
#if defined FSR2_BIND_UAV_LUMA_HISTORY
layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
#endif
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
#endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change;
#endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5;
#endif
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks;
#endif
#if defined FSR2_BIND_UAV_EXPOSURE
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
#endif
#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure;
#endif
#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
#endif
#if defined FSR2_BIND_UAV_AUTOREACTIVE
layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive;
#endif
#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition;
#endif
#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha;
#endif
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha;
#endif
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel)
{
return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r;
}
#endif
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel)
{
return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
{
return texelFetch(r_input_depth, iPxPos, 0).r;
}
#endif
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
{
#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP
return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f);
#else
return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
#endif
}
#endif
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
{
return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos)
{
return texelFetch(r_input_color_jittered, iPxPos, 0).rgb;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
{
return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0f).rgb;
}
#endif
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos)
{
return texelFetch(r_prepared_input_color, iPxPos, 0).xyz;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
{
FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
if (bInvalidMotionVector)
{
FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
}
#endif
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
fUvMotionVector -= MotionVectorJitterCancellation();
#endif
return fUvMotionVector;
}
#endif
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory)
{
return texelFetch(r_internal_upscaled_color, iPxHistory, 0);
}
#endif
#if defined(FSR2_BIND_UAV_LUMA_HISTORY)
void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
{
imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory);
}
#endif
#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
{
return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f);
}
#endif
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory)
{
imageStore(rw_internal_upscaled_color, iPxHistory, fHistory);
}
#endif
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
{
imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight);
}
#endif
#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT)
void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor)
{
imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
}
#endif
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
FfxFloat32x2 LoadLockStatus(FfxInt32x2 iPxPos)
{
FfxFloat32x2 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rg;
return fLockStatus;
}
#endif
#if defined(FSR2_BIND_UAV_LOCK_STATUS)
void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x2 fLockstatus)
{
imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f, 0.0f));
}
#endif
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
FfxFloat32 LoadLockInputLuma(FfxInt32x2 iPxPos)
{
return texelFetch(r_lock_input_luma, iPxPos, 0).r;
}
#endif
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
void StoreLockInputLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma)
{
imageStore(rw_lock_input_luma, iPxPos, vec4(fLuma, 0, 0, 0));
}
#endif
#if defined(FSR2_BIND_SRV_NEW_LOCKS)
FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos)
{
return texelFetch(r_new_locks, iPxPos, 0).r;
}
#endif
#if defined(FSR2_BIND_UAV_NEW_LOCKS)
FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos)
{
return imageLoad(rw_new_locks, iPxPos).r;
}
#endif
#if defined(FSR2_BIND_UAV_NEW_LOCKS)
void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock)
{
imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0));
}
#endif
#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR)
void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
{
imageStore(rw_prepared_input_color, iPxPos, fTonemapped);
}
#endif
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
{
return textureLod(sampler2D(r_prepared_input_color, s_LinearClamp), fUV, 0.0f).w;
}
#endif
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
{
FfxFloat32x2 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rg;
return fLockStatus;
}
#endif
#if defined(FSR2_BIND_SRV_DEPTH)
FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput)
{
return texelFetch(r_input_depth, iPxInput, 0).r;
}
#endif
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos)
{
return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r);
}
#endif
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
{
FfxUInt32 uDepth = floatBitsToUint(fDepth);
#if FFX_FSR2_OPTION_INVERTED_DEPTH
imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth);
#else
imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth
#endif
}
#endif
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
{
imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0));
}
#endif
#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
{
//FfxUInt32 uDepth = f32tof16(fDepth);
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
}
#endif
#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS)
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
{
imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f));
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput)
{
return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg;
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
{
return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg;
}
#endif
#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxInt32x2 iPxInput)
{
return texelFetch(r_previous_dilated_motion_vectors, iPxInput, 0).rg;
}
FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 fUV)
{
return textureLod(sampler2D(r_previous_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).xy;
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
{
return texelFetch(r_dilatedDepth, iPxInput, 0).r;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
FfxFloat32 Exposure()
{
FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x;
if (exposure == 0.0f) {
exposure = 1.0f;
}
return exposure;
}
#endif
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
FfxFloat32 AutoExposure()
{
FfxFloat32 exposure = texelFetch(r_auto_exposure, FfxInt32x2(0, 0), 0).x;
if (exposure == 0.0f) {
exposure = 1.0f;
}
return exposure;
}
#endif
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
{
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x;
#else
return 0.f;
#endif
}
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
{
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r);
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
{
return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg;
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
{
return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg;
}
#endif
#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS)
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
{
imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f));
}
#endif
#if defined(FFX_INTERNAL)
FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
{
return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return texelFetch(r_input_opaque_only, iPxPos, 0).xyz;
}
#endif
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz;
}
#endif
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz;
}
#endif
#if defined(FSR2_BIND_UAV_AUTOREACTIVE)
#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION)
void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
{
imageStore(rw_output_autoreactive, iPxPos, vec4(FfxFloat32(fReactive.x), 0.0f, 0.0f, 0.0f));
imageStore(rw_output_autocomposition, iPxPos, vec4(FfxFloat32(fReactive.y), 0.0f, 0.0f, 0.0f));
}
#endif
#endif
#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR)
void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
{
imageStore(rw_output_prev_color_pre_alpha, iPxPos, vec4(color, 0.0f));
}
#endif
#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR)
void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
{
imageStore(rw_output_prev_color_post_alpha, iPxPos, vec4(color, 0.0f));
}
#endif
#endif // #if defined(FFX_GPU)

View File

@@ -0,0 +1,799 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "ffx_fsr2_resources.h"
#if defined(FFX_GPU)
#ifdef __hlsl_dx_compiler
#pragma dxc diagnostic push
#pragma dxc diagnostic ignored "-Wambig-lit-shift"
#endif //__hlsl_dx_compiler
#include "ffx_core.h"
#ifdef __hlsl_dx_compiler
#pragma dxc diagnostic pop
#endif //__hlsl_dx_compiler
#endif // #if defined(FFX_GPU)
#if defined(FFX_GPU)
#ifndef FFX_FSR2_PREFER_WAVE64
#define FFX_FSR2_PREFER_WAVE64
#endif // #if defined(FFX_GPU)
#if defined(FFX_GPU)
#pragma warning(disable: 3205) // conversion from larger type to smaller
#endif // #if defined(FFX_GPU)
#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
#define DECLARE_CB_REGISTER(regIndex) b##regIndex
#define FFX_FSR2_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex))
#define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
#define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL)
cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2)
{
FfxInt32x2 iRenderSize;
FfxInt32x2 iMaxRenderSize;
FfxInt32x2 iDisplaySize;
FfxInt32x2 iInputColorResourceDimensions;
FfxInt32x2 iLumaMipDimensions;
FfxInt32 iLumaMipLevelToUse;
FfxInt32 iFrameIndex;
FfxFloat32x4 fDeviceToViewDepth;
FfxFloat32x2 fJitter;
FfxFloat32x2 fMotionVectorScale;
FfxFloat32x2 fDownscaleFactor;
FfxFloat32x2 fMotionVectorJitterCancellation;
FfxFloat32 fPreExposure;
FfxFloat32 fPreviousFramePreExposure;
FfxFloat32 fTanHalfFOV;
FfxFloat32 fJitterSequenceLength;
FfxFloat32 fDeltaTime;
FfxFloat32 fDynamicResChangeFactor;
FfxFloat32 fViewSpaceToMetersFactor;
};
#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size.
#endif
#if defined(FFX_GPU)
#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p)
#define FFX_FSR2_ROOTSIG_STR(p) #p
#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
"DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
"StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
"addressU = TEXTURE_ADDRESS_CLAMP, " \
"addressV = TEXTURE_ADDRESS_CLAMP, " \
"addressW = TEXTURE_ADDRESS_CLAMP, " \
"comparisonFunc = COMPARISON_NEVER, " \
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
"StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
"addressU = TEXTURE_ADDRESS_CLAMP, " \
"addressV = TEXTURE_ADDRESS_CLAMP, " \
"addressW = TEXTURE_ADDRESS_CLAMP, " \
"comparisonFunc = COMPARISON_NEVER, " \
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
"DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
"RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \
"StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
"addressU = TEXTURE_ADDRESS_CLAMP, " \
"addressV = TEXTURE_ADDRESS_CLAMP, " \
"addressW = TEXTURE_ADDRESS_CLAMP, " \
"comparisonFunc = COMPARISON_NEVER, " \
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
"StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
"addressU = TEXTURE_ADDRESS_CLAMP, " \
"addressV = TEXTURE_ADDRESS_CLAMP, " \
"addressW = TEXTURE_ADDRESS_CLAMP, " \
"comparisonFunc = COMPARISON_NEVER, " \
"borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
#if defined(FFX_FSR2_EMBED_ROOTSIG)
#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG
#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG
#else
#define FFX_FSR2_EMBED_ROOTSIG_CONTENT
#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT
#endif // #if FFX_FSR2_EMBED_ROOTSIG
#endif // #if defined(FFX_GPU)
/* Define getter functions in the order they are defined in the CB! */
FfxInt32x2 RenderSize()
{
return iRenderSize;
}
FfxInt32x2 MaxRenderSize()
{
return iMaxRenderSize;
}
FfxInt32x2 DisplaySize()
{
return iDisplaySize;
}
FfxInt32x2 InputColorResourceDimensions()
{
return iInputColorResourceDimensions;
}
FfxInt32x2 LumaMipDimensions()
{
return iLumaMipDimensions;
}
FfxInt32 LumaMipLevelToUse()
{
return iLumaMipLevelToUse;
}
FfxInt32 FrameIndex()
{
return iFrameIndex;
}
FfxFloat32x2 Jitter()
{
return fJitter;
}
FfxFloat32x4 DeviceToViewSpaceTransformFactors()
{
return fDeviceToViewDepth;
}
FfxFloat32x2 MotionVectorScale()
{
return fMotionVectorScale;
}
FfxFloat32x2 DownscaleFactor()
{
return fDownscaleFactor;
}
FfxFloat32x2 MotionVectorJitterCancellation()
{
return fMotionVectorJitterCancellation;
}
FfxFloat32 PreExposure()
{
return fPreExposure;
}
FfxFloat32 PreviousFramePreExposure()
{
return fPreviousFramePreExposure;
}
FfxFloat32 TanHalfFoV()
{
return fTanHalfFOV;
}
FfxFloat32 JitterSequenceLength()
{
return fJitterSequenceLength;
}
FfxFloat32 DeltaTime()
{
return fDeltaTime;
}
FfxFloat32 DynamicResChangeFactor()
{
return fDynamicResChangeFactor;
}
FfxFloat32 ViewSpaceToMetersFactor()
{
return fViewSpaceToMetersFactor;
}
SamplerState s_PointClamp : register(s0);
SamplerState s_LinearClamp : register(s1);
// SRVs
#if defined(FFX_INTERNAL)
Texture2D<FfxFloat32x4> r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY);
Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
Texture2D<FfxFloat32x4> r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
Texture2D<FfxFloat32> r_input_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
Texture2D<FfxFloat32x2> r_input_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
Texture2D<FfxFloat32x2> r_auto_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
Texture2D<FfxFloat32x2> r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS);
Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
Texture2D<unorm FfxFloat32x2> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
Texture2D<FfxFloat32> r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
Texture2D<unorm FfxFloat32> r_new_locks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
Texture2D<FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE);
Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
Texture2D<unorm FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
Texture2D<float3> r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
Texture2D<float3> r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
Texture2D<FfxFloat32x4> r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
// UAV declarations
RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
RWTexture2D<unorm FfxFloat32x2> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
RWTexture2D<FfxFloat32> rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
RWTexture2D<unorm FfxFloat32> rw_new_locks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
RWTexture2D<FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE);
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5);
RWTexture2D<unorm FfxFloat32x2> rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
RWTexture2D<FfxFloat32x2> rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
globallycoherent RWTexture2D<FfxUInt32> rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT);
RWTexture2D<FfxFloat32x4> rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
RWTexture2D<float> rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE);
RWTexture2D<float> rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION);
RWTexture2D<float3> rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
RWTexture2D<float3> rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
#else // #if defined(FFX_INTERNAL)
#if defined FSR2_BIND_SRV_INPUT_COLOR
Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR);
#endif
#if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY
Texture2D<FfxFloat32x4> r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY);
#endif
#if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS
Texture2D<FfxFloat32x4> r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS);
#endif
#if defined FSR2_BIND_SRV_INPUT_DEPTH
Texture2D<FfxFloat32> r_input_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH);
#endif
#if defined FSR2_BIND_SRV_INPUT_EXPOSURE
Texture2D<FfxFloat32x2> r_input_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE);
#endif
#if defined FSR2_BIND_SRV_AUTO_EXPOSURE
Texture2D<FfxFloat32x2> r_auto_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE);
#endif
#if defined FSR2_BIND_SRV_REACTIVE_MASK
Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK);
#endif
#if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK
Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
#endif
#if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH
Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
#endif
#if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS
Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS);
#endif
#if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS
Texture2D<FfxFloat32x2> r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS);
#endif
#if defined FSR2_BIND_SRV_DILATED_DEPTH
Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH);
#endif
#if defined FSR2_BIND_SRV_INTERNAL_UPSCALED
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED);
#endif
#if defined FSR2_BIND_SRV_LOCK_STATUS
Texture2D<unorm FfxFloat32x2> r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS);
#endif
#if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA
Texture2D<FfxFloat32> r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA);
#endif
#if defined FSR2_BIND_SRV_NEW_LOCKS
Texture2D<unorm FfxFloat32> r_new_locks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS);
#endif
#if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
#endif
#if defined FSR2_BIND_SRV_LUMA_HISTORY
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
#endif
#if defined FSR2_BIND_SRV_RCAS_INPUT
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT);
#endif
#if defined FSR2_BIND_SRV_LANCZOS_LUT
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT);
#endif
#if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS
Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS);
#endif
#if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT
Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT);
#endif
#if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS
Texture2D<unorm FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS);
#endif
#if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR
Texture2D<float3> r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
#endif
#if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR
Texture2D<float3> r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
#endif
// UAV declarations
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
#endif
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS);
#endif
#if defined FSR2_BIND_UAV_DILATED_DEPTH
RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH);
#endif
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED);
#endif
#if defined FSR2_BIND_UAV_LOCK_STATUS
RWTexture2D<unorm FfxFloat32x2> rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS);
#endif
#if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA
RWTexture2D<FfxFloat32> rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA);
#endif
#if defined FSR2_BIND_UAV_NEW_LOCKS
RWTexture2D<unorm FfxFloat32> rw_new_locks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS);
#endif
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
#endif
#if defined FSR2_BIND_UAV_LUMA_HISTORY
RWTexture2D<FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
#endif
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT);
#endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE);
#endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5);
#endif
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
RWTexture2D<unorm FfxFloat32x2> rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS);
#endif
#if defined FSR2_BIND_UAV_EXPOSURE
RWTexture2D<FfxFloat32x2> rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE);
#endif
#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
RWTexture2D<FfxFloat32x2> rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE);
#endif
#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
globallycoherent RWTexture2D<FfxUInt32> rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC);
#endif
#if defined FSR2_BIND_UAV_AUTOREACTIVE
RWTexture2D<float> rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE);
#endif
#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
RWTexture2D<float> rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION);
#endif
#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
RWTexture2D<float3> rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR);
#endif
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
RWTexture2D<float3> rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR);
#endif
#endif // #if defined(FFX_INTERNAL)
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
{
return r_imgMips.mips[mipLevel][iPxPos];
}
#endif
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
{
return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
{
return r_input_depth[iPxPos];
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
{
return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x;
}
#endif
#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL)
FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
{
return r_reactive_mask[iPxPos];
}
#endif
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL)
FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
{
return r_transparency_and_composition_mask[iPxPos];
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
{
return r_input_color_jittered[iPxPos].rgb;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
{
return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb;
}
#endif
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
{
return r_prepared_input_color[iPxPos].xyz;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL)
FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
{
FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
fUvMotionVector -= MotionVectorJitterCancellation();
#endif
return fUvMotionVector;
}
#endif
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
{
return r_internal_upscaled_color[iPxHistory];
}
#endif
#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL)
void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
{
rw_luma_history[iPxPos] = fLumaHistory;
}
#endif
#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL)
FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
{
return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
}
#endif
#if defined(FFX_INTERNAL)
FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
{
return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w;
}
#endif
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
{
rw_internal_upscaled_color[iPxHistory] = fHistory;
}
#endif
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
{
rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
}
#endif
#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL)
void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
{
rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
}
#endif
//LOCK_LIFETIME_REMAINING == 0
//Should make LockInitialLifetime() return a const 1.0f later
#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
{
return r_lock_status[iPxPos];
}
#endif
#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL)
void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
{
rw_lock_status[iPxPos] = fLockStatus;
}
#endif
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos)
{
return r_lock_input_luma[iPxPos];
}
#endif
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma)
{
rw_lock_input_luma[iPxPos] = fLuma;
}
#endif
#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL)
FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
{
return r_new_locks[iPxPos];
}
#endif
#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos)
{
return rw_new_locks[iPxPos];
}
#endif
#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
{
rw_new_locks[iPxPos] = newLock;
}
#endif
#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
{
rw_prepared_input_color[iPxPos] = fTonemapped;
}
#endif
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
{
return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
}
#endif
#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
{
FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
return fLockStatus;
}
#endif
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
{
return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
}
#endif
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
{
FfxUInt32 uDepth = asuint(fDepth);
#if FFX_FSR2_OPTION_INVERTED_DEPTH
InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth);
#else
InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth
#endif
}
#endif
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
{
rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
}
#endif
#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL)
void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
{
rw_dilatedDepth[iPxPos] = fDepth;
}
#endif
#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
{
rw_dilated_motion_vectors[iPxPos] = fMotionVector;
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
{
return r_dilated_motion_vectors[iPxInput].xy;
}
#endif
#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
{
return r_previous_dilated_motion_vectors[iPxInput].xy;
}
FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
{
return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy;
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL)
FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
{
return r_dilatedDepth[iPxInput];
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL)
FfxFloat32 Exposure()
{
FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
if (exposure == 0.0f) {
exposure = 1.0f;
}
return exposure;
}
#endif
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL)
FfxFloat32 AutoExposure()
{
FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
if (exposure == 0.0f) {
exposure = 1.0f;
}
return exposure;
}
#endif
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
{
#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL)
return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
#else
return 0.f;
#endif
}
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL)
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
{
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0);
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
{
return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
}
#endif
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos)
{
return r_dilated_reactive_masks[iPxPos];
}
#endif
#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
{
rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks;
}
#endif
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL)
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return r_input_opaque_only[iPxPos].xyz;
}
#endif
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return r_input_prev_color_pre_alpha[iPxPos];
}
#endif
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return r_input_prev_color_post_alpha[iPxPos];
}
#endif
#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL)
#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL)
void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
{
rw_output_autoreactive[iPxPos] = fReactive.x;
rw_output_autocomposition[iPxPos] = fReactive.y;
}
#endif
#endif
#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
{
rw_output_prev_color_pre_alpha[iPxPos] = color;
}
#endif
#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
{
rw_output_prev_color_post_alpha[iPxPos] = color;
}
#endif
#endif // #if defined(FFX_GPU)

View File

@@ -0,0 +1,565 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#if !defined(FFX_FSR2_COMMON_H)
#define FFX_FSR2_COMMON_H
#if defined(FFX_CPU) || defined(FFX_GPU)
//Locks
#define LOCK_LIFETIME_REMAINING 0
#define LOCK_TEMPORAL_LUMA 1
#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
#if defined(FFX_GPU)
FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f;
FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f;
FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f;
FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX;
FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f;
FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f;
// treat vector truncation warnings as errors
#pragma warning(error: 3206)
// suppress warnings
#pragma warning(disable: 3205) // conversion from larger type to smaller
#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative
// Reconstructed depth usage
FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f;
// Accumulation
FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f;
FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f;
FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples
FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale;
// Auto exposure
FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f;
struct AccumulationPassCommonParams
{
FfxInt32x2 iPxHrPos;
FfxFloat32x2 fHrUv;
FfxFloat32x2 fLrUv_HwSampler;
FfxFloat32x2 fMotionVector;
FfxFloat32x2 fReprojectedHrUv;
FfxFloat32 fHrVelocity;
FfxFloat32 fDepthClipFactor;
FfxFloat32 fDilatedReactiveFactor;
FfxFloat32 fAccumulationMask;
FfxBoolean bIsResetFrame;
FfxBoolean bIsExistingSample;
FfxBoolean bIsNewSample;
};
struct LockState
{
FfxBoolean NewLock; //Set for both unique new and re-locked new
FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock)
};
void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus)
{
fLockStatus = FfxFloat32x2(0, 0);
}
#if FFX_HALF
void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus)
{
fLockStatus = FFX_MIN16_F2(0, 0);
}
#endif
void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus)
{
fLockStatus[LOCK_LIFETIME_REMAINING] = 0;
}
#if FFX_HALF
void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus)
{
fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0);
}
#endif
struct RectificationBox
{
FfxFloat32x3 boxCenter;
FfxFloat32x3 boxVec;
FfxFloat32x3 aabbMin;
FfxFloat32x3 aabbMax;
FfxFloat32 fBoxCenterWeight;
};
#if FFX_HALF
struct RectificationBoxMin16
{
FFX_MIN16_F3 boxCenter;
FFX_MIN16_F3 boxVec;
FFX_MIN16_F3 aabbMin;
FFX_MIN16_F3 aabbMax;
FFX_MIN16_F fBoxCenterWeight;
};
#endif
void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
{
rectificationBox.fBoxCenterWeight = FfxFloat32(0);
rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0);
rectificationBox.boxVec = FfxFloat32x3(0, 0, 0);
rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
}
#if FFX_HALF
void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
{
rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0);
rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0);
rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0);
rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
}
#endif
void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
{
rectificationBox.aabbMin = colorSample;
rectificationBox.aabbMax = colorSample;
FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
rectificationBox.boxCenter = weightedSample;
rectificationBox.boxVec = colorSample * weightedSample;
rectificationBox.fBoxCenterWeight = fSampleWeight;
}
void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
{
if (bInitialSample) {
RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
} else {
rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
rectificationBox.boxCenter += weightedSample;
rectificationBox.boxVec += colorSample * weightedSample;
rectificationBox.fBoxCenterWeight += fSampleWeight;
}
}
#if FFX_HALF
void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
{
rectificationBox.aabbMin = colorSample;
rectificationBox.aabbMax = colorSample;
FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
rectificationBox.boxCenter = weightedSample;
rectificationBox.boxVec = colorSample * weightedSample;
rectificationBox.fBoxCenterWeight = fSampleWeight;
}
void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
{
if (bInitialSample) {
RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
} else {
rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
rectificationBox.boxCenter += weightedSample;
rectificationBox.boxVec += colorSample * weightedSample;
rectificationBox.fBoxCenterWeight += fSampleWeight;
}
}
#endif
void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
{
rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f));
rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
rectificationBox.boxVec = stdDev;
}
#if FFX_HALF
void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
{
rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f));
rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
rectificationBox.boxVec = stdDev;
}
#endif
FfxFloat32x3 SafeRcp3(FfxFloat32x3 v)
{
return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0);
}
#if FFX_HALF
FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v)
{
return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0);
}
#endif
FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
{
const FfxFloat32 m = ffxMax(v0, v1);
return m != 0 ? ffxMin(v0, v1) / m : 0;
}
#if FFX_HALF
FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1)
{
const FFX_MIN16_F m = ffxMax(v0, v1);
return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0);
}
#endif
FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg)
{
FfxFloat32x3 fRgb;
fRgb = FfxFloat32x3(
fYCoCg.x + fYCoCg.y - fYCoCg.z,
fYCoCg.x + fYCoCg.z,
fYCoCg.x - fYCoCg.y - fYCoCg.z);
return fRgb;
}
#if FFX_HALF
FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg)
{
FFX_MIN16_F3 fRgb;
fRgb = FFX_MIN16_F3(
fYCoCg.x + fYCoCg.y - fYCoCg.z,
fYCoCg.x + fYCoCg.z,
fYCoCg.x - fYCoCg.y - fYCoCg.z);
return fRgb;
}
#endif
FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb)
{
FfxFloat32x3 fYCoCg;
fYCoCg = FfxFloat32x3(
0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b,
0.5f * fRgb.r - 0.5f * fRgb.b,
-0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b);
return fYCoCg;
}
#if FFX_HALF
FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb)
{
FFX_MIN16_F3 fYCoCg;
fYCoCg = FFX_MIN16_F3(
0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b,
0.5 * fRgb.r - 0.5 * fRgb.b,
-0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b);
return fYCoCg;
}
#endif
FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
{
return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
}
#if FFX_HALF
FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb)
{
return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f));
}
#endif
FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb)
{
FfxFloat32 fLuminance = RGBToLuma(fLinearRgb);
FfxFloat32 fPercievedLuminance = 0;
if (fLuminance <= 216.0f / 24389.0f) {
fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
}
else {
fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
}
return fPercievedLuminance * 0.01f;
}
#if FFX_HALF
FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb)
{
FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb);
FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0);
if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) {
fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f);
}
else {
fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f);
}
return fPercievedLuminance * FFX_MIN16_F(0.01f);
}
#endif
FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
{
return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
}
FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
{
return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
}
#if FFX_HALF
FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb)
{
return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx;
}
FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb)
{
return fRgb / ffxMax(FFX_MIN16_F(FSR2_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
}
#endif
FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
{
FfxInt32x2 result = iPxSample + iPxOffset;
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
return result;
// return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1));
}
#if FFX_HALF
FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
{
FFX_MIN16_I2 result = iPxSample + iPxOffset;
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
return result;
// return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1));
}
#endif
FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize)
{
const FfxFloat32x2 fSampleLocation = fUv * iTextureSize;
const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f)));
const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize);
return fClampedUv;
}
FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
{
return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
}
#if FFX_HALF
FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size)
{
return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size)));
}
#endif
FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg)
{
Lavg = exp(Lavg);
const FfxFloat32 S = 100.0f; //ISO arithmetic speed
const FfxFloat32 K = 12.5f;
FfxFloat32 ExposureISO100 = log2((Lavg * S) / K);
const FfxFloat32 q = 0.65f;
FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100);
return 1 / Lmax;
}
#if FFX_HALF
FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg)
{
Lavg = exp(Lavg);
const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed
const FFX_MIN16_F K = FFX_MIN16_F(12.5f);
const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K);
const FFX_MIN16_F q = FFX_MIN16_F(0.65f);
const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100);
return FFX_MIN16_F(1) / Lmax;
}
#endif
FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
{
FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
return iPxHrPos;
}
#if FFX_HALF
FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos)
{
FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter());
FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize());
FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr));
return iPxHrPos;
}
#endif
FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
{
return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
}
FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth)
{
const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
// fDeviceToViewDepth details found in ffx_fsr2.cpp
return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
}
FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth)
{
return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor();
}
FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
{
const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth);
const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
return FfxFloat32x3(X, Y, Z);
}
FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
{
return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor();
}
FfxFloat32 GetMaxDistanceInMeters()
{
#if FFX_FSR2_OPTION_INVERTED_DEPTH
return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor();
#else
return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor();
#endif
}
FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
{
fRgb /= fPreExposure;
fRgb *= fExposure;
fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX);
return fRgb;
}
FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
{
fRgb /= fExposure;
fRgb *= PreExposure();
return fRgb;
}
struct BilinearSamplingData
{
FfxInt32x2 iOffsets[4];
FfxFloat32 fWeights[4];
FfxInt32x2 iBasePos;
};
BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
{
BilinearSamplingData data;
FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
data.iBasePos = FfxInt32x2(floor(fPxSample));
FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
data.iOffsets[0] = FfxInt32x2(0, 0);
data.iOffsets[1] = FfxInt32x2(1, 0);
data.iOffsets[2] = FfxInt32x2(0, 1);
data.iOffsets[3] = FfxInt32x2(1, 1);
data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
return data;
}
struct PlaneData
{
FfxFloat32x3 fNormal;
FfxFloat32 fDistanceFromOrigin;
};
PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2)
{
PlaneData plane;
FfxFloat32x3 v0 = fP0 - fP1;
FfxFloat32x3 v1 = fP0 - fP2;
plane.fNormal = normalize(cross(v0, v1));
plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal);
return plane;
}
FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint)
{
return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin);
}
#endif // #if defined(FFX_GPU)
#endif //!defined(FFX_FSR2_COMMON_H)

View File

@@ -0,0 +1,189 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
FFX_GROUPSHARED FfxUInt32 spdCounter;
#ifndef SPD_PACKED_ONLY
FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
{
FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
FfxFloat32x3 fRgb = SampleInputColor(fUv);
fRgb /= PreExposure();
//compute log luma
const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));
// Make sure out of screen pixels contribute no value to the end result
const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
return FfxFloat32x4(result, 0, 0, 0);
}
FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
{
return SPD_LoadMipmap5(tex);
}
void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
{
if (index == LumaMipLevelToUse() || index == 5)
{
SPD_SetMipmap(pix, index, outValue.r);
}
if (index == MipCount() - 1) { //accumulate on 1x1 level
if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))
{
FfxFloat32 prev = SPD_LoadExposureBuffer().y;
FfxFloat32 result = outValue.r;
if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
{
FfxFloat32 rate = 1.0f;
result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
}
FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
SPD_SetExposureBuffer(spdOutput);
}
}
}
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
{
SPD_IncreaseAtomicCounter(spdCounter);
}
FfxUInt32 SpdGetAtomicCounter()
{
return spdCounter;
}
void SpdResetAtomicCounter(FfxUInt32 slice)
{
SPD_ResetAtomicCounter();
}
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
{
return FfxFloat32x4(
spdIntermediateR[x][y],
spdIntermediateG[x][y],
spdIntermediateB[x][y],
spdIntermediateA[x][y]);
}
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
{
spdIntermediateR[x][y] = value.x;
spdIntermediateG[x][y] = value.y;
spdIntermediateB[x][y] = value.z;
spdIntermediateA[x][y] = value.w;
}
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
{
return (v0 + v1 + v2 + v3) * 0.25f;
}
#endif
// define fetch and store functions Packed
#if FFX_HALF
#error Callback must be implemented
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
{
return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);
}
FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
{
return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);
}
void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
{
if (index == LumaMipLevelToUse() || index == 5)
{
imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
return;
}
imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
}
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
{
InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);
}
FfxUInt32 SpdGetAtomicCounter()
{
return spdCounter;
}
void SpdResetAtomicCounter(FfxUInt32 slice)
{
rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;
}
FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
{
return FfxFloat16x4(
spdIntermediateRG[x][y].x,
spdIntermediateRG[x][y].y,
spdIntermediateBA[x][y].x,
spdIntermediateBA[x][y].y);
}
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
{
spdIntermediateRG[x][y] = value.xy;
spdIntermediateBA[x][y] = value.zw;
}
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
{
return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
}
#endif
#include "ffx_spd.h"
void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
{
#if FFX_HALF
SpdDownsampleH(
FfxUInt32x2(WorkGroupId.xy),
FfxUInt32(LocalThreadIndex),
FfxUInt32(MipCount()),
FfxUInt32(NumWorkGroups()),
FfxUInt32(WorkGroupId.z),
FfxUInt32x2(WorkGroupOffset()));
#else
SpdDownsample(
FfxUInt32x2(WorkGroupId.xy),
FfxUInt32(LocalThreadIndex),
FfxUInt32(MipCount()),
FfxUInt32(NumWorkGroups()),
FfxUInt32(WorkGroupId.z),
FfxUInt32x2(WorkGroupOffset()));
#endif
}

View File

@@ -0,0 +1,134 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
#define FSR2_BIND_SRV_INPUT_COLOR 0
#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1
#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2
#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3
#define FSR2_BIND_UAV_AUTO_EXPOSURE 4
#define FSR2_BIND_CB_FSR2 5
#define FSR2_BIND_CB_SPD 6
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
#if defined(FSR2_BIND_CB_SPD)
layout (set = 1, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t
{
uint mips;
uint numWorkGroups;
uvec2 workGroupOffset;
uvec2 renderSize;
} cbSPD;
uint MipCount()
{
return cbSPD.mips;
}
uint NumWorkGroups()
{
return cbSPD.numWorkGroups;
}
uvec2 WorkGroupOffset()
{
return cbSPD.workGroupOffset;
}
uvec2 SPD_RenderSize()
{
return cbSPD.renderSize;
}
#endif
vec2 SPD_LoadExposureBuffer()
{
return imageLoad(rw_auto_exposure, ivec2(0,0)).xy;
}
void SPD_SetExposureBuffer(vec2 value)
{
imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f));
}
vec4 SPD_LoadMipmap5(ivec2 iPxPos)
{
return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f);
}
void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value)
{
switch (slice)
{
case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL:
imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
break;
case 5:
imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
break;
default:
// avoid flattened side effect
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f));
#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
imageStore(rw_img_mip_5, iPxPos, vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f));
#endif
break;
}
}
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
{
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
}
void SPD_ResetAtomicCounter()
{
imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0));
}
#include "ffx_fsr2_compute_luminance_pyramid.h"
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 256
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
FFX_FSR2_NUM_THREADS
void main()
{
ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex);
}

View File

@@ -0,0 +1,258 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_DEPTH_CLIP_H
#define FFX_FSR2_DEPTH_CLIP_H
FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
{
FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
FfxFloat32 fDilatedSum = 0.0f;
FfxFloat32 fDepth = 0.0f;
FfxFloat32 fWeightSum = 0.0f;
for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
if (IsOnScreen(iSamplePos, RenderSize())) {
const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
if (fDepthDiff > 0.0f) {
#if FFX_FSR2_OPTION_INVERTED_DEPTH
const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
#else
const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
#endif
const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
const FfxFloat32 Ksep = 1.37e-05f;
const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
fWeightSum += fWeight;
}
}
}
}
return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
}
FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
{
FfxFloat32 minconvergence = 1.0f;
FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
for (FfxInt32 y = -1; y <= 1; ++y) {
for (FfxInt32 x = -1; x <= 1; ++x) {
FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);
FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
FfxFloat32 fVelocityUv = length(fMotionVector);
fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
}
}
}
return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
}
FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
{
const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
FfxFloat32 fDepthMax = 0.0f;
FfxFloat32 fDepthMin = fMaxDistInMeters;
FfxInt32 iMaxDistFound = 0;
for (FfxInt32 y = -1; y < 2; y++) {
for (FfxInt32 x = -1; x < 2; x++) {
const FfxInt32x2 iOffset = FfxInt32x2(x, y);
const FfxInt32x2 iSamplePos = iPxPos + iOffset;
const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
fDepthMin = ffxMin(fDepthMin, fDepth);
fDepthMax = ffxMax(fDepthMax, fDepth);
}
}
return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
}
FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
{
const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
float fPxDistance = length(fMotionVector * DisplaySize());
return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
}
void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
{
// Compensate for bilinear sampling in accumulation pass
FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);
float fMasksSum = 0.0f;
FfxFloat32x3 fColorSamples[9];
FfxFloat32 fReactiveSamples[9];
FfxFloat32 fTransparencyAndCompositionSamples[9];
FFX_UNROLL
for (FfxInt32 y = -1; y < 2; y++) {
FFX_UNROLL
for (FfxInt32 x = -1; x < 2; x++) {
const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);
fColorSamples[sampleIdx] = fColorSample;
fReactiveSamples[sampleIdx] = fReactiveSample;
fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;
fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
}
}
if (fMasksSum > 0)
{
for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
{
FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
// Increase power for non-similar samples
const FfxFloat32 fPowerBiasMax = 6.0f;
const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
}
}
StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
}
FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
{
//We assume linear data. if non-linear input (sRGB, ...),
//then we should convert to linear first and back to sRGB on output.
FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
return fPreparedYCoCg;
}
FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
{
FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
}
void DepthClip(FfxInt32x2 iPxPos)
{
FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
// Discard tiny mvs
fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
// Compute prepared input color and depth clip
FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));
// Compute dilated reactive mask
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
FfxInt32x2 iSamplePos = iPxPos;
#else
FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
#endif
FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
}
#endif //!defined( FFX_FSR2_DEPTH_CLIPH )

View File

@@ -0,0 +1,66 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1
#define FSR2_BIND_SRV_DILATED_DEPTH 2
#define FSR2_BIND_SRV_REACTIVE_MASK 3
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4
#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 6
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 7
#define FSR2_BIND_SRV_INPUT_COLOR 8
#define FSR2_BIND_SRV_INPUT_DEPTH 9
#define FSR2_BIND_SRV_INPUT_EXPOSURE 10
#define FSR2_BIND_UAV_DEPTH_CLIP 11
#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 12
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 13
#define FSR2_BIND_CB_FSR2 14
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
#include "ffx_fsr2_sample.h"
#include "ffx_fsr2_depth_clip.h"
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
FFX_FSR2_NUM_THREADS
void main()
{
DepthClip(ivec2(gl_GlobalInvocationID.xy));
}

View File

@@ -0,0 +1 @@
// This file doesn't exist in this version of FSR.

View File

@@ -0,0 +1 @@
// This file doesn't exist in this version of FSR.

View File

@@ -0,0 +1,115 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_LOCK_H
#define FFX_FSR2_LOCK_H
void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos)
{
if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize()))))
{
#if FFX_FSR2_OPTION_INVERTED_DEPTH
const FfxUInt32 farZ = 0x0;
#else
const FfxUInt32 farZ = 0x3f800000;
#endif
SetReconstructedDepth(iPxHrPos, farZ);
}
}
FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos)
{
const FfxInt32 RADIUS = 1;
FfxFloat32 fNucleus = LoadLockInputLuma(pos);
FfxFloat32 similar_threshold = 1.05f;
FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX;
FfxFloat32 dissimilarLumaMax = 0;
/*
0 1 2
3 4 5
6 7 8
*/
#define SETBIT(x) (1U << x)
FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar
const FfxUInt32 uNumRejectionMasks = 4;
const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left
SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right
SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left
SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right
};
FfxInt32 idx = 0;
FFX_UNROLL
for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) {
FFX_UNROLL
for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) {
if (x == 0 && y == 0) continue;
FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos);
FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus);
if (difference > 0 && (difference < similar_threshold)) {
mask |= SETBIT(idx);
} else {
dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma);
dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma);
}
}
}
FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin;
if (FFX_FALSE == isRidge) {
return false;
}
FFX_UNROLL
for (FfxInt32 i = 0; i < 4; i++) {
if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) {
return false;
}
}
return true;
}
void ComputeLock(FfxInt32x2 iPxLrPos)
{
if (ComputeThinFeatureConfidence(iPxLrPos))
{
StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f);
}
ClearResourcesForNextFrame(iPxLrPos);
}
#endif // FFX_FSR2_LOCK_H

View File

@@ -0,0 +1,56 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0
#define FSR2_BIND_UAV_NEW_LOCKS 1
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2
#define FSR2_BIND_CB_FSR2 3
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
#include "ffx_fsr2_sample.h"
#include "ffx_fsr2_lock.h"
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
FFX_FSR2_NUM_THREADS
void main()
{
uvec2 uDispatchThreadId = gl_WorkGroupID.xy * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
ComputeLock(ivec2(uDispatchThreadId));
}

View File

@@ -0,0 +1,106 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample)
{
return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
}
#if FFX_HALF
FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample)
{
return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
}
#endif
#if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF
DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
#else
DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
#endif
DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples)
FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord)
{
FfxFloat32 fShadingChangeLuma = 0;
#if 0
fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x);
#else
const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse());
FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv);
fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions());
fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse())));
#endif
fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f);
return fShadingChangeLuma;
}
void UpdateLockStatus(AccumulationPassCommonParams params,
FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state,
FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus,
FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame,
FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) {
const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
//init temporal shading change factor, init to -1 or so in reproject to know if "true new"?
fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA];
FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA];
fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma);
if (state.NewLock) {
fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f;
}
else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) {
fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f);
}
else {
if (fLuminanceDiff > 0.1f) {
KillLock(fLockStatus);
}
}
fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f));
fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor);
fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask);
fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f);
// Compute this frame lock contribution
const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f);
const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution);
}
#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H )

View File

@@ -0,0 +1,67 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#define GROUP_SIZE 8
#define FSR_RCAS_DENOISE 1
void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
{
StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor);
}
#define FSR_RCAS_F
FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
{
FfxFloat32x4 fColor = LoadRCAS_Input(p);
fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
return fColor;
}
void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
#include "ffx_fsr1.h"
void CurrFilter(FFX_MIN16_U2 pos)
{
FfxFloat32x3 c;
FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
c = UnprepareRgb(c, Exposure());
WriteUpscaledOutput(pos, c);
}
void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
{
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
CurrFilter(FFX_MIN16_U2(gxy));
gxy.x += 8u;
CurrFilter(FFX_MIN16_U2(gxy));
gxy.y += 8u;
CurrFilter(FFX_MIN16_U2(gxy));
gxy.x -= 8u;
CurrFilter(FFX_MIN16_U2(gxy));
}

View File

@@ -0,0 +1,80 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
// Needed for rw_upscaled_output declaration
#extension GL_EXT_shader_image_load_formatted : require
#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
#define FSR2_BIND_SRV_RCAS_INPUT 1
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2
#define FSR2_BIND_CB_FSR2 3
#define FSR2_BIND_CB_RCAS 4
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
//Move to prototype shader!
#if defined(FSR2_BIND_CB_RCAS)
layout (set = 1, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t
{
uvec4 rcasConfig;
} cbRCAS;
uvec4 RCASConfig()
{
return cbRCAS.rcasConfig;
}
#else
uvec4 RCASConfig()
{
return uvec4(0);
}
#endif
vec4 LoadRCAS_Input(FfxInt32x2 iPxPos)
{
return texelFetch(r_rcas_input, iPxPos, 0);
}
#include "ffx_fsr2_rcas.h"
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 64
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
FFX_FSR2_NUM_THREADS
void main()
{
RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz);
}

View File

@@ -0,0 +1,145 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
{
fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
// Project current depth into previous frame locations.
// Push to all pixels having some contribution if reprojection is using bilinear logic.
for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
if (IsOnScreen(iStorePos, iPxDepthSize)) {
StoreReconstructedDepth(iStorePos, fDepth);
}
}
}
}
void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
{
const FfxInt32 iSampleCount = 9;
const FfxInt32x2 iSampleOffsets[iSampleCount] = {
FfxInt32x2(+0, +0),
FfxInt32x2(+1, +0),
FfxInt32x2(+0, +1),
FfxInt32x2(+0, -1),
FfxInt32x2(-1, +0),
FfxInt32x2(-1, +1),
FfxInt32x2(+1, +1),
FfxInt32x2(-1, -1),
FfxInt32x2(+1, -1),
};
// pull out the depth loads to allow SC to batch them
FfxFloat32 depth[9];
FfxInt32 iSampleIndex = 0;
FFX_UNROLL
for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
depth[iSampleIndex] = LoadInputDepth(iPos);
}
// find closest depth
fNearestDepthCoord = iPxPos;
fNearestDepth = depth[0];
FFX_UNROLL
for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
if (IsOnScreen(iPos, iPxSize)) {
FfxFloat32 fNdDepth = depth[iSampleIndex];
#if FFX_FSR2_OPTION_INVERTED_DEPTH
if (fNdDepth > fNearestDepth) {
#else
if (fNdDepth < fNearestDepth) {
#endif
fNearestDepthCoord = iPos;
fNearestDepth = fNdDepth;
}
}
}
}
FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos)
{
//We assume linear data. if non-linear input (sRGB, ...),
//then we should convert to linear first and back to sRGB on output.
FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
// Use internal auto exposure for locking logic
fRgb /= PreExposure();
fRgb *= Exposure();
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
fRgb = Tonemap(fRgb);
#endif
//compute luma used to lock pixels, if used elsewhere the ffxPow must be moved!
const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0));
return fLockInputLuma;
}
void ReconstructAndDilate(FfxInt32x2 iPxLrPos)
{
FfxFloat32 fDilatedDepth;
FfxInt32x2 iNearestDepthCoord;
FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
FfxInt32x2 iSamplePos = iPxLrPos;
FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
#else
FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
#endif
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
StoreDilatedDepth(iPxLrPos, fDilatedDepth);
StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector);
ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos);
StoreLockInputLuma(iPxLrPos, fLockInputLuma);
}
#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )

View File

@@ -0,0 +1,65 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0
#define FSR2_BIND_SRV_INPUT_DEPTH 1
#define FSR2_BIND_SRV_INPUT_COLOR 2
#define FSR2_BIND_SRV_INPUT_EXPOSURE 3
#define FSR2_BIND_SRV_LUMA_HISTORY 4
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5
#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6
#define FSR2_BIND_UAV_DILATED_DEPTH 7
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 8
#define FSR2_BIND_UAV_LUMA_HISTORY 9
#define FSR2_BIND_UAV_LUMA_INSTABILITY 10
#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 11
#define FSR2_BIND_CB_FSR2 12
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
#include "ffx_fsr2_sample.h"
#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h"
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
FFX_FSR2_NUM_THREADS
void main()
{
ReconstructAndDilate(FFX_MIN16_I2(gl_GlobalInvocationID.xy));
}

View File

@@ -0,0 +1,136 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_REPROJECT_H
#define FFX_FSR2_REPROJECT_H
#ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference
#endif
FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample)
{
return LoadHistory(iPxSample);
}
#if FFX_HALF
FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample)
{
return FFX_MIN16_F4(LoadHistory(iPxSample));
}
#endif
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory)
DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
#else
DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory)
DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
#endif
FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample)
{
FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f);
return fSample;
}
#if FFX_HALF
FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample)
{
FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0);
return fSample;
}
#endif
#if 1
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples)
#else
DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus)
DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples)
#endif
#else
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
#else
DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus)
DeclareCustomTextureSample(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
#endif
#endif
FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv)
{
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize()));
#else
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos);
#endif
return fDilatedMotionVector;
}
FfxBoolean IsUvInside(FfxFloat32x2 fUv)
{
return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f);
}
void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample)
{
fReprojectedHrUv = params.fHrUv + params.fMotionVector;
bIsExistingSample = IsUvInside(fReprojectedHrUv);
}
void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
{
FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure());
fHistoryColor = RGBToYCoCg(fHistoryColor);
//Compute temporal reactivity info
fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w));
bInMotionLastFrame = (fHistory.w < 0.0f);
}
LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus)
{
LockState state = { FFX_FALSE, FFX_FALSE };
const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos);
state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0;
fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv);
if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) {
state.WasLockedPrevFrame = true;
}
return state;
}
#endif //!defined( FFX_FSR2_REPROJECT_H )

View File

@@ -0,0 +1,105 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_RESOURCES_H
#define FFX_FSR2_RESOURCES_H
#if defined(FFX_CPU) || defined(FFX_GPU)
#define FFX_FSR2_RESOURCE_IDENTIFIER_NULL 0
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6
#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7
#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12
#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15
#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16
#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17
#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18
#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19
#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26
#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27
#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44
#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION 45
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51
#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 53
#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 54
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 55
#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 56
#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 57
// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12]
#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4
#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE)
#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 58
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2
#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2
#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4
#define FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8
#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
#endif //!defined( FFX_FSR2_RESOURCES_H )

View File

@@ -0,0 +1,605 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_SAMPLE_H
#define FFX_FSR2_SAMPLE_H
// suppress warnings
#ifdef FFX_HLSL
#pragma warning(disable: 4008) // potentially divide by zero
#endif //FFX_HLSL
struct FetchedBilinearSamples {
FfxFloat32x4 fColor00;
FfxFloat32x4 fColor10;
FfxFloat32x4 fColor01;
FfxFloat32x4 fColor11;
};
struct FetchedBicubicSamples {
FfxFloat32x4 fColor00;
FfxFloat32x4 fColor10;
FfxFloat32x4 fColor20;
FfxFloat32x4 fColor30;
FfxFloat32x4 fColor01;
FfxFloat32x4 fColor11;
FfxFloat32x4 fColor21;
FfxFloat32x4 fColor31;
FfxFloat32x4 fColor02;
FfxFloat32x4 fColor12;
FfxFloat32x4 fColor22;
FfxFloat32x4 fColor32;
FfxFloat32x4 fColor03;
FfxFloat32x4 fColor13;
FfxFloat32x4 fColor23;
FfxFloat32x4 fColor33;
};
#if FFX_HALF
struct FetchedBilinearSamplesMin16 {
FFX_MIN16_F4 fColor00;
FFX_MIN16_F4 fColor10;
FFX_MIN16_F4 fColor01;
FFX_MIN16_F4 fColor11;
};
struct FetchedBicubicSamplesMin16 {
FFX_MIN16_F4 fColor00;
FFX_MIN16_F4 fColor10;
FFX_MIN16_F4 fColor20;
FFX_MIN16_F4 fColor30;
FFX_MIN16_F4 fColor01;
FFX_MIN16_F4 fColor11;
FFX_MIN16_F4 fColor21;
FFX_MIN16_F4 fColor31;
FFX_MIN16_F4 fColor02;
FFX_MIN16_F4 fColor12;
FFX_MIN16_F4 fColor22;
FFX_MIN16_F4 fColor32;
FFX_MIN16_F4 fColor03;
FFX_MIN16_F4 fColor13;
FFX_MIN16_F4 fColor23;
FFX_MIN16_F4 fColor33;
};
#else //FFX_HALF
#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
#endif //FFX_HALF
FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
{
return A + (B - A) * t;
}
FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
{
FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
return fColorXY;
}
#if FFX_HALF
FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
{
return A + (B - A) * t;
}
FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
{
FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
return fColorXY;
}
#endif
FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
{
const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
}
FfxFloat32 Lanczos2(FfxFloat32 x)
{
x = ffxMin(abs(x), 2.0f);
return Lanczos2NoClamp(x);
}
#if FFX_HALF
#if 0
FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
{
const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
}
#endif
FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
{
x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
return FFX_MIN16_F(Lanczos2NoClamp(x));
}
#endif //FFX_HALF
// FSR1 lanczos approximation. Input is x*x and must be <= 4.
FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
{
FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
}
#if FFX_HALF
FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
{
FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
}
#endif //FFX_HALF
FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
{
x2 = ffxMin(x2, 4.0f);
return Lanczos2ApproxSqNoClamp(x2);
}
#if FFX_HALF
FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
{
x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
return Lanczos2ApproxSqNoClamp(x2);
}
#endif //FFX_HALF
FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
{
return Lanczos2ApproxSqNoClamp(x * x);
}
#if FFX_HALF
FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
{
return Lanczos2ApproxSqNoClamp(x * x);
}
#endif //FFX_HALF
FfxFloat32 Lanczos2Approx(FfxFloat32 x)
{
return Lanczos2ApproxSq(x * x);
}
#if FFX_HALF
FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
{
return Lanczos2ApproxSq(x * x);
}
#endif //FFX_HALF
FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
{
return SampleLanczos2Weight(abs(x));
}
#if FFX_HALF
FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
{
return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
}
#endif //FFX_HALF
FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
{
FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
}
#if FFX_HALF
FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
{
FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
}
#endif
FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
{
FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
}
FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
{
FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
// Deringing
// TODO: only use 4 by checking jitter
const FfxInt32 iDeringingSampleCount = 4;
const FfxFloat32x4 fDeringingSamples[4] = {
Samples.fColor11,
Samples.fColor21,
Samples.fColor12,
Samples.fColor22,
};
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
FFX_UNROLL
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
}
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
return fColorXY;
}
#if FFX_HALF
FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
{
FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
}
FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
{
FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
// Deringing
// TODO: only use 4 by checking jitter
const FfxInt32 iDeringingSampleCount = 4;
const FFX_MIN16_F4 fDeringingSamples[4] = {
Samples.fColor11,
Samples.fColor21,
Samples.fColor12,
Samples.fColor22,
};
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
FFX_UNROLL
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
{
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
}
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
return fColorXY;
}
#endif //FFX_HALF
FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
{
FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
// Deringing
// TODO: only use 4 by checking jitter
const FfxInt32 iDeringingSampleCount = 4;
const FfxFloat32x4 fDeringingSamples[4] = {
Samples.fColor11,
Samples.fColor21,
Samples.fColor12,
Samples.fColor22,
};
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
FFX_UNROLL
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
}
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
return fColorXY;
}
#if FFX_HALF
FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
{
FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
// Deringing
// TODO: only use 4 by checking jitter
const FfxInt32 iDeringingSampleCount = 4;
const FFX_MIN16_F4 fDeringingSamples[4] = {
Samples.fColor11,
Samples.fColor21,
Samples.fColor12,
Samples.fColor22,
};
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
FFX_UNROLL
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
{
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
}
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
return fColorXY;
}
#endif //FFX_HALF
FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
{
FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
}
#if FFX_HALF
FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
{
FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
}
#endif //FFX_HALF
FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
{
FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
// Deringing
// TODO: only use 4 by checking jitter
const FfxInt32 iDeringingSampleCount = 4;
const FfxFloat32x4 fDeringingSamples[4] = {
Samples.fColor11,
Samples.fColor21,
Samples.fColor12,
Samples.fColor22,
};
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
FFX_UNROLL
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
{
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
}
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
return fColorXY;
}
#if FFX_HALF
FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
{
FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
// Deringing
// TODO: only use 4 by checking jitter
const FfxInt32 iDeringingSampleCount = 4;
const FFX_MIN16_F4 fDeringingSamples[4] = {
Samples.fColor11,
Samples.fColor21,
Samples.fColor12,
Samples.fColor22,
};
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
FFX_UNROLL
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
{
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
}
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
return fColorXY;
}
#endif
// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
{
FfxInt32x2 result = iPxSample + iPxOffset;
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
return result;
}
#if FFX_HALF
FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
{
FFX_MIN16_I2 result = iPxSample + iPxOffset;
result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
return result;
}
#endif //FFX_HALF
#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
{ \
SampleType Samples; \
\
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \
\
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \
\
Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \
\
Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \
Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \
Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \
Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \
\
return Samples; \
}
#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
{ \
SampleType Samples; \
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
return Samples; \
}
#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
// is common, so iPxSample can "jitter"
#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
{ \
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
/* Clamp base coords */ \
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
/* */ \
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
return fColorXY; \
}
#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
{ \
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
/* Clamp base coords */ \
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
/* */ \
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
return fColorXY; \
}
#define FFX_FSR2_CONCAT_ID(x, y) x ## y
#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
#define FFX_FSR2_SAMPLER_1D_0 Lanczos2
#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
#endif //!defined( FFX_FSR2_SAMPLE_H )

View File

@@ -0,0 +1,250 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#define USE_YCOCG 1
#define fAutogenEpsilon 0.01f
// EXPERIMENTAL
FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
{
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
#if USE_YCOCG
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
#endif
FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;
FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
FfxFloat32x3 X = colorPreAlpha;
FfxFloat32x3 Y = colorPostAlpha;
FfxFloat32x3 Z = colorPrevPreAlpha;
FfxFloat32x3 W = colorPrevPostAlpha;
FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
// cleanup very small values
retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
return retVal;
}
// works ok: thin edges
FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
{
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
#if USE_YCOCG
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
#endif
FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;
FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;
bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha =
FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
FfxFloat32x3 X = colorPrevPreAlpha;
FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;
FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;
FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;
FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);
FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );
// only pixels that have significantly changed in color shuold be considered
retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );
return retVal;
}
// This function computes the TransparencyAndComposition mask:
// This mask indicates pixels that should discard locks and apply color clamping.
//
// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of
// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)
// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.
//
// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.
// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.
// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.
// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.
//
// In the final step it stores the current textures in internal textures for the next frame
FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
{
FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);
// [branch]
if (retVal > FFX_MIN16_F(0.01f))
{
retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);
}
return retVal;
}
float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
{
float lum[9];
int i = 0;
for (int y = -1; y < 2; ++y)
{
for (int x = -1; x < 2; ++x)
{
FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;
FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;
lum[i++] = length(curCol - prevCol);
}
}
//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
//return sqrt(gradX * gradX + gradY * gradY);
float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
return sqrt(sqrt(gradX * gradY));
}
float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
{
float lum[9];
int i = 0;
for (int y = -1; y < 2; ++y)
{
for (int x = -1; x < 2; ++x)
{
FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);
FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);
lum[i++] = length(curCol - prevCol);
}
}
//float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
//float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
//return sqrt(gradX * gradX + gradY * gradY);
float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
return sqrt(sqrt(gradX * gradY));
}
FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
{
FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);
FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
#if USE_YCOCG
colorPreAlpha = RGBToYCoCg(colorPreAlpha);
colorPostAlpha = RGBToYCoCg(colorPostAlpha);
colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
#endif
FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);
FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);
for (int y = -1; y < 2; ++y)
{
for (int x = -1; x < 2; ++x)
{
FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));
#if USE_YCOCG
W = RGBToYCoCg(W);
#endif
minPrev = min(minPrev, W);
maxPrev = max(maxPrev, W);
}
}
// instead of computing the overlap: simply count how many samples are outside
// set reactive based on that
FFX_MIN16_F count = FFX_MIN16_F(0.f);
for (int y = -1; y < 2; ++y)
{
for (int x = -1; x < 2; ++x)
{
FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));
#if USE_YCOCG
Y = RGBToYCoCg(Y);
#endif
count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
}
}
retVal = count / FFX_MIN16_F(27.f);
return retVal;
}
// This function computes the Reactive mask:
// We want pixels marked where the alpha portion of the frame changes a lot between neighbours
// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)
// As a result history would not be trustworthy.
// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation
// For mirrors we may assume the pre-alpha is pretty uniform color.
//
// This works well generally, but also marks edge pixels
FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
{
// we only get here if alpha has a significant contribution and has changed since last frame.
FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
// mark pixels with huge variance in alpha as reactive
FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));
FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));
retVal = ffxSaturate(alphaEdge - opaqueEdge);
// the above also marks edge pixels due to jitter, so we need to cancel those out
return retVal;
}

View File

@@ -0,0 +1,120 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//#version 450
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require
#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0
#define FSR2_BIND_SRV_INPUT_COLOR 1
#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2
#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3
#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4
#define FSR2_BIND_SRV_REACTIVE_MASK 5
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 6
#define FSR2_BIND_UAV_AUTOREACTIVE 7
#define FSR2_BIND_UAV_AUTOCOMPOSITION 8
#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 9
#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 10
#define FSR2_BIND_CB_FSR2 11
#define FSR2_BIND_CB_REACTIVE 12
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
#define FSR2_BIND_SRV_INPUT_DEPTH 13
#endif
#include "ffx_fsr2_callbacks_glsl.h"
#include "ffx_fsr2_common.h"
#ifdef FSR2_BIND_CB_REACTIVE
layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
{
float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
float fTcScale;
float fReactiveScale;
float fReactiveMax;
} cbGenerateReactive;
float getTcThreshold()
{
return cbGenerateReactive.fTcThreshold;
}
#else
float getTcThreshold()
{
return 0.05f;
}
#endif
#include "ffx_fsr2_tcr_autogen.h"
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#define FFX_FSR2_THREAD_GROUP_WIDTH 8
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#define FFX_FSR2_THREAD_GROUP_DEPTH 1
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
#ifndef FFX_FSR2_NUM_THREADS
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
#endif // #ifndef FFX_FSR2_NUM_THREADS
FFX_FSR2_NUM_THREADS
void main()
{
FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(gl_GlobalInvocationID.xy);
// ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV
// fetch pre- and post-alpha color values
FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() );
FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) );
FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f);
FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) );
FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) );
FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f );
outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx);
if (outReactiveMask.y > 0.5f)
{
outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx);
outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale);
outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax );
}
outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale);
outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId)));
outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId)));
StoreAutoReactive(uDispatchThreadId, outReactiveMask);
StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha);
StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha);
}

View File

@@ -0,0 +1,194 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_FSR2_UPSAMPLE_H
#define FFX_FSR2_UPSAMPLE_H
FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)
{
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
}
#if FFX_HALF
void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)
{
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
}
#endif
#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE
#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate
#endif
FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
{
FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
#else
#error "Invalid Lanczos type"
#endif
return fSampleWeight;
}
#if FFX_HALF
FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)
{
FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
// To Test: Save reciproqual sqrt compute
// FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
#else
#error "Invalid Lanczos type"
#endif
return fSampleWeight;
}
#endif
FfxFloat32 ComputeMaxKernelWeight() {
const FfxFloat32 fKernelSizeBias = 1.0f;
FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
return ffxMin(FfxFloat32(1.99f), fKernelWeight);
}
FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
{
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
#include "ffx_fsr2_force16_begin.h"
#endif
// We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position
FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position
FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors...
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
#include "ffx_fsr2_force16_end.h"
#endif
FfxFloat32x3 fSamples[iLanczos2SampleCount];
FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
FfxInt32x2 offsetTL;
offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);
offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);
//Load samples
// If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].
// Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.
// This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.
const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;
const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;
FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);
FFX_UNROLL
for (FfxInt32 row = 0; row < 3; row++) {
FFX_UNROLL
for (FfxInt32 col = 0; col < 3; col++) {
FfxInt32 iSampleIndex = col + (row << 2);
FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;
const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));
fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));
}
}
FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);
FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);
// Identify how much of each upsampled color to be used for this frame
const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));
const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);
const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));
const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));
const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));
FFX_UNROLL
for (FfxInt32 row = 0; row < 3; row++) {
FFX_UNROLL
for (FfxInt32 col = 0; col < 3; col++) {
FfxInt32 iSampleIndex = col + (row << 2);
const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);
FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));
FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
// Update rectification box
{
const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
const FfxBoolean bInitialSample = (row == 0) && (col == 0);
RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
}
}
}
RectificationBoxComputeVarianceBoxData(clippingBox);
fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON);
if (fColorAndWeight.w > FSR2_EPSILON) {
// Normalize for deringing (we need to compare colors)
fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
fColorAndWeight.w *= fUpsampleLanczosWeightScale;
Deringing(clippingBox, fColorAndWeight.xyz);
}
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
#include "ffx_fsr2_force16_end.h"
#endif
return fColorAndWeight;
}
#endif //!defined( FFX_FSR2_UPSAMPLE_H )

936
thirdparty/amd-fsr2/shaders/ffx_spd.h vendored Normal file
View File

@@ -0,0 +1,936 @@
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifdef FFX_CPU
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant
FfxUInt32x4 rectInfo, // left, top, width, height
FfxInt32 mips) // optional: if -1, calculate based on rect width and height
{
workGroupOffset[0] = rectInfo[0] / 64; // rectInfo[0] = left
workGroupOffset[1] = rectInfo[1] / 64; // rectInfo[1] = top
FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width
FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height
dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0];
dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1];
numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]);
if (mips >= 0)
{
numWorkGroupsAndMips[1] = FfxUInt32(mips);
}
else
{
// calculate based on rect width and height
FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]);
numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12))));
}
}
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant
FfxUInt32x4 rectInfo) // left, top, width, height
{
SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1);
}
#endif // #ifdef FFX_CPU
//==============================================================================================================================
// NON-PACKED VERSION
//==============================================================================================================================
#ifdef FFX_GPU
#ifdef SPD_PACKED_ONLY
// Avoid compiler error
FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice)
{
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
}
FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice)
{
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
}
void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice)
{
}
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
{
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
}
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
{
}
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
{
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
}
#endif // #ifdef SPD_PACKED_ONLY
//_____________________________________________________________/\_______________________________________________________________
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
#extension GL_KHR_shader_subgroup_quad:require
#endif
void SpdWorkgroupShuffleBarrier()
{
#ifdef FFX_GLSL
barrier();
#endif
#ifdef FFX_HLSL
GroupMemoryBarrierWithGroupSync();
#endif
}
// Only last active workgroup should proceed
bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice)
{
// global atomic counter
if (localInvocationIndex == 0)
{
SpdIncreaseAtomicCounter(slice);
}
SpdWorkgroupShuffleBarrier();
return (SpdGetAtomicCounter() != (numWorkGroups - 1));
}
// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3);
FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v)
{
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
FfxFloat32x4 v0 = v;
FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v);
FfxFloat32x4 v2 = subgroupQuadSwapVertical(v);
FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v);
return SpdReduce4(v0, v1, v2, v3);
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
// requires SM6.0
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
FfxFloat32x4 v0 = v;
FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1);
FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2);
FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3);
return SpdReduce4(v0, v1, v2, v3);
/*
// if SM6.0 is not available, you can use the AMD shader intrinsics
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
// https://gpuopen.com/amd-gpu-services-ags-library/
// works for DX11
FfxFloat32x4 v0 = v;
FfxFloat32x4 v1;
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
FfxFloat32x4 v2;
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
FfxFloat32x4 v3;
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
return SpdReduce4(v0, v1, v2, v3);
*/
#endif
return v;
}
FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
{
FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y);
FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y);
FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y);
FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y);
return SpdReduce4(v0, v1, v2, v3);
}
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
{
FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice);
FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice);
FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice);
FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice);
return SpdReduce4(v0, v1, v2, v3);
}
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice)
{
return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
}
FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
{
FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice);
FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice);
FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice);
FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice);
return SpdReduce4(v0, v1, v2, v3);
}
FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice)
{
#ifdef SPD_LINEAR_SAMPLER
return SpdLoadSourceImage(FfxInt32x2(base), slice);
#else
return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
#endif
}
void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
FfxFloat32x4 v[4];
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
v[0] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[0], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
v[1] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[1], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
v[2] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[2], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
v[3] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[3], 0, slice);
if (mip <= 1)
return;
v[0] = SpdReduceQuad(v[0]);
v[1] = SpdReduceQuad(v[1]);
v[2] = SpdReduceQuad(v[2]);
v[3] = SpdReduceQuad(v[3]);
if ((localInvocationIndex % 4) == 0)
{
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
SpdStoreIntermediate(x / 2, y / 2, v[0]);
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]);
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]);
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]);
}
}
void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
FfxFloat32x4 v[4];
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
v[0] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[0], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
v[1] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[1], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
v[2] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[2], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
v[3] = SpdReduceLoadSourceImage(tex, slice);
SpdStore(pix, v[3], 0, slice);
if (mip <= 1)
return;
for (FfxUInt32 i = 0; i < 4; i++)
{
SpdStoreIntermediate(x, y, v[i]);
SpdWorkgroupShuffleBarrier();
if (localInvocationIndex < 64)
{
v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
}
SpdWorkgroupShuffleBarrier();
}
if (localInvocationIndex < 64)
{
SpdStoreIntermediate(x + 0, y + 0, v[0]);
SpdStoreIntermediate(x + 8, y + 0, v[1]);
SpdStoreIntermediate(x + 0, y + 8, v[2]);
SpdStoreIntermediate(x + 8, y + 8, v[3]);
}
}
void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice);
#else
SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice);
#endif
}
void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 64)
{
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
// store to LDS, try to reduce bank conflicts
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
// ...
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
SpdStoreIntermediate(x * 2 + y % 2, y * 2, v);
}
#else
FfxFloat32x4 v = SpdLoadIntermediate(x, y);
v = SpdReduceQuad(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
SpdStoreIntermediate(x + (y / 2) % 2, y, v);
}
#endif
}
void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 16)
{
// x 0 x 0
// 0 0 0 0
// 0 x 0 x
// 0 0 0 0
FfxFloat32x4 v =
SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
// store to LDS
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
// ...
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
// ...
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
// ...
SpdStoreIntermediate(x * 4 + y, y * 4, v);
}
#else
if (localInvocationIndex < 64)
{
FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2);
v = SpdReduceQuad(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
SpdStoreIntermediate(x * 2 + y / 2, y * 2, v);
}
}
#endif
}
void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 4)
{
// x 0 0 0 x 0 0 0
// ...
// 0 x 0 0 0 x 0 0
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
// store to LDS
// x x x x 0 ...
// 0 ...
SpdStoreIntermediate(x + y * 2, 0, v);
}
#else
if (localInvocationIndex < 16)
{
FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4);
v = SpdReduceQuad(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
SpdStoreIntermediate(x / 2 + y, 0, v);
}
}
#endif
}
void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 1)
{
// x x x x 0 ...
// 0 ...
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
}
#else
if (localInvocationIndex < 4)
{
FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0);
v = SpdReduceQuad(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
}
}
#endif
}
void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
{
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice);
SpdStore(pix, v0, 6, slice);
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice);
SpdStore(pix, v1, 6, slice);
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice);
SpdStore(pix, v2, 6, slice);
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice);
SpdStore(pix, v3, 6, slice);
if (mips <= 7)
return;
// no barrier needed, working on values only from the same thread
FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3);
SpdStore(FfxInt32x2(x, y), v, 7, slice);
SpdStoreIntermediate(x, y, v);
}
void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
{
if (mips <= baseMip)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice);
if (mips <= baseMip + 1)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
if (mips <= baseMip + 2)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
if (mips <= baseMip + 3)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice);
}
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
{
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice);
SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
if (mips <= 6)
return;
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
return;
SpdResetAtomicCounter(slice);
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
SpdDownsampleMips_6_7(x, y, mips, slice);
SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
}
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
{
SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//==============================================================================================================================
// PACKED VERSION
//==============================================================================================================================
#if FFX_HALF
#ifdef FFX_GLSL
#extension GL_EXT_shader_subgroup_extended_types_float16:require
#endif
FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v)
{
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
FfxFloat16x4 v0 = v;
FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v);
FfxFloat16x4 v2 = subgroupQuadSwapVertical(v);
FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v);
return SpdReduce4H(v0, v1, v2, v3);
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
// requires SM6.0
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
FfxFloat16x4 v0 = v;
FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1);
FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2);
FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3);
return SpdReduce4H(v0, v1, v2, v3);
/*
// if SM6.0 is not available, you can use the AMD shader intrinsics
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
// https://gpuopen.com/amd-gpu-services-ags-library/
// works for DX11
FfxFloat16x4 v0 = v;
FfxFloat16x4 v1;
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
FfxFloat16x4 v2;
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
FfxFloat16x4 v3;
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
return SpdReduce4H(v0, v1, v2, v3);
*/
#endif
return FfxFloat16x4(0.0, 0.0, 0.0, 0.0);
}
FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
{
FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y);
FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y);
FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y);
FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y);
return SpdReduce4H(v0, v1, v2, v3);
}
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
{
FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice);
FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice);
FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice);
FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice);
return SpdReduce4H(v0, v1, v2, v3);
}
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice)
{
return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
}
FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
{
FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice);
FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice);
FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice);
FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice);
return SpdReduce4H(v0, v1, v2, v3);
}
FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice)
{
#ifdef SPD_LINEAR_SAMPLER
return SpdLoadSourceImageH(FfxInt32x2(base), slice);
#else
return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
#endif
}
void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
{
FfxFloat16x4 v[4];
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
v[0] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[0], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
v[1] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[1], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
v[2] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[2], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
v[3] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[3], 0, slice);
if (mips <= 1)
return;
v[0] = SpdReduceQuadH(v[0]);
v[1] = SpdReduceQuadH(v[1]);
v[2] = SpdReduceQuadH(v[2]);
v[3] = SpdReduceQuadH(v[3]);
if ((localInvocationIndex % 4) == 0)
{
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
SpdStoreIntermediateH(x / 2, y / 2, v[0]);
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]);
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]);
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]);
}
}
void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
{
FfxFloat16x4 v[4];
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
v[0] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[0], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
v[1] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[1], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
v[2] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[2], 0, slice);
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
v[3] = SpdReduceLoadSourceImageH(tex, slice);
SpdStoreH(pix, v[3], 0, slice);
if (mips <= 1)
return;
for (FfxInt32 i = 0; i < 4; i++)
{
SpdStoreIntermediateH(x, y, v[i]);
SpdWorkgroupShuffleBarrier();
if (localInvocationIndex < 64)
{
v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
}
SpdWorkgroupShuffleBarrier();
}
if (localInvocationIndex < 64)
{
SpdStoreIntermediateH(x + 0, y + 0, v[0]);
SpdStoreIntermediateH(x + 8, y + 0, v[1]);
SpdStoreIntermediateH(x + 0, y + 8, v[2]);
SpdStoreIntermediateH(x + 8, y + 8, v[3]);
}
}
void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice);
#else
SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice);
#endif
}
void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 64)
{
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
// store to LDS, try to reduce bank conflicts
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
// ...
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v);
}
#else
FfxFloat16x4 v = SpdLoadIntermediateH(x, y);
v = SpdReduceQuadH(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
SpdStoreIntermediateH(x + (y / 2) % 2, y, v);
}
#endif
}
void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 16)
{
// x 0 x 0
// 0 0 0 0
// 0 x 0 x
// 0 0 0 0
FfxFloat16x4 v =
SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
// store to LDS
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
// ...
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
// ...
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
// ...
SpdStoreIntermediateH(x * 4 + y, y * 4, v);
}
#else
if (localInvocationIndex < 64)
{
FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2);
v = SpdReduceQuadH(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v);
}
}
#endif
}
void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 4)
{
// x 0 0 0 x 0 0 0
// ...
// 0 x 0 0 0 x 0 0
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
// store to LDS
// x x x x 0 ...
// 0 ...
SpdStoreIntermediateH(x + y * 2, 0, v);
}
#else
if (localInvocationIndex < 16)
{
FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4);
v = SpdReduceQuadH(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
SpdStoreIntermediateH(x / 2 + y, 0, v);
}
}
#endif
}
void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
{
#ifdef SPD_NO_WAVE_OPERATIONS
if (localInvocationIndex < 1)
{
// x x x x 0 ...
// 0 ...
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
}
#else
if (localInvocationIndex < 4)
{
FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0);
v = SpdReduceQuadH(v);
// quad index 0 stores result
if (localInvocationIndex % 4 == 0)
{
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
}
}
#endif
}
void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
{
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice);
SpdStoreH(pix, v0, 6, slice);
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice);
SpdStoreH(pix, v1, 6, slice);
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice);
SpdStoreH(pix, v2, 6, slice);
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice);
SpdStoreH(pix, v3, 6, slice);
if (mips < 8)
return;
// no barrier needed, working on values only from the same thread
FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3);
SpdStoreH(FfxInt32x2(x, y), v, 7, slice);
SpdStoreIntermediateH(x, y, v);
}
void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
{
if (mips <= baseMip)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice);
if (mips <= baseMip + 1)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
if (mips <= baseMip + 2)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
if (mips <= baseMip + 3)
return;
SpdWorkgroupShuffleBarrier();
SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice);
}
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
{
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice);
SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
if (mips < 7)
return;
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
return;
SpdResetAtomicCounter(slice);
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
SpdDownsampleMips_6_7H(x, y, mips, slice);
SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
}
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
{
SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
}
#endif // #if FFX_HALF
#endif // #ifdef FFX_GPU