From 96fe5cd23f77d4dc47e37bf5daeb93587e538bf6 Mon Sep 17 00:00:00 2001 From: assiduous Date: Sat, 7 Mar 2026 13:29:05 -0800 Subject: [PATCH 01/14] Add super resolution interfaces and descriptors --- Graphics/CMakeLists.txt | 1 + Graphics/SuperResolution/CMakeLists.txt | 113 ++++++ .../interface/SuperResolution.h | 383 ++++++++++++++++++ .../interface/SuperResolutionFactory.h | 270 ++++++++++++ .../interface/SuperResolutionFactoryLoader.h | 84 ++++ Graphics/SuperResolution/readme.md | 2 + Graphics/SuperResolution/src/DLLMain.cpp | 53 +++ .../SuperResolution/src/SuperResolution.def | 2 + .../src/SuperResolutionFactory.cpp | 155 +++++++ .../src/SuperResolution_D3D12.cpp | 32 ++ .../SuperResolutionFactoryH_test.c | 36 ++ .../SuperResolutionFactoryH_test.cpp | 27 ++ .../SuperResolution/SuperResolutionH_test.c | 33 ++ .../SuperResolution/SuperResolutionH_test.cpp | 27 ++ 14 files changed, 1218 insertions(+) create mode 100644 Graphics/SuperResolution/CMakeLists.txt create mode 100644 Graphics/SuperResolution/interface/SuperResolution.h create mode 100644 Graphics/SuperResolution/interface/SuperResolutionFactory.h create mode 100644 Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h create mode 100644 Graphics/SuperResolution/readme.md create mode 100644 Graphics/SuperResolution/src/DLLMain.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolution.def create mode 100644 Graphics/SuperResolution/src/SuperResolutionFactory.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolution_D3D12.cpp create mode 100644 Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c create mode 100644 Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.cpp create mode 100644 Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c create mode 100644 Tests/IncludeTest/SuperResolution/SuperResolutionH_test.cpp diff --git a/Graphics/CMakeLists.txt b/Graphics/CMakeLists.txt index 41cbdbd65b..3cebd8ad10 100644 --- a/Graphics/CMakeLists.txt +++ b/Graphics/CMakeLists.txt @@ -56,3 +56,4 @@ if(ARCHIVER_SUPPORTED) endif() add_subdirectory(GraphicsTools) +add_subdirectory(SuperResolution) diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt new file mode 100644 index 0000000000..6bae288a20 --- /dev/null +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -0,0 +1,113 @@ +cmake_minimum_required (VERSION 3.10) + +include(../../BuildTools/CMake/BuildUtils.cmake) + +project(Diligent-SuperResolution CXX) + +set(INCLUDE +) + +set(INTERFACE + interface/SuperResolution.h + interface/SuperResolutionFactory.h + interface/SuperResolutionFactoryLoader.h +) + +set(SOURCE + src/SuperResolutionFactory.cpp +) + +if(D3D12_SUPPORTED) + list(APPEND SOURCE src/SuperResolution_D3D12.cpp) +endif() + +set(DLL_SOURCE + src/DLLMain.cpp + src/SuperResolution.def +) + +add_library(Diligent-SuperResolutionInterface INTERFACE) +target_link_libraries (Diligent-SuperResolutionInterface INTERFACE Diligent-GraphicsEngineInterface) +target_include_directories(Diligent-SuperResolutionInterface INTERFACE interface) + +add_library(Diligent-SuperResolution-static STATIC + ${SOURCE} ${INTERFACE} ${INCLUDE} + readme.md +) +add_library(Diligent-SuperResolution-shared SHARED + readme.md +) + +if((PLATFORM_WIN32 OR PLATFORM_UNIVERSAL_WINDOWS) AND NOT MINGW_BUILD) + target_sources(Diligent-SuperResolution-shared PRIVATE ${DLL_SOURCE}) +endif() + +target_include_directories(Diligent-SuperResolution-static +PRIVATE + include +) + +target_compile_definitions(Diligent-SuperResolution-shared PUBLIC DILIGENT_SUPER_RESOLUTION_SHARED=1) + + +target_link_libraries(Diligent-SuperResolution-static +PUBLIC + Diligent-SuperResolutionInterface +PRIVATE + Diligent-BuildSettings + Diligent-Common +) + +if(D3D12_SUPPORTED) + target_link_libraries(Diligent-SuperResolution-static PRIVATE Diligent-GraphicsEngineD3D12-static) + target_include_directories(Diligent-SuperResolution-static PRIVATE ../GraphicsEngineD3D12/include) +endif() + +target_link_libraries(Diligent-SuperResolution-shared +PUBLIC + Diligent-SuperResolutionInterface +PRIVATE + Diligent-BuildSettings +) +target_link_whole_archive(Diligent-SuperResolution-shared Diligent-SuperResolution-static) + +if(PLATFORM_WIN32) + # Do not add 'lib' prefix when building with MinGW + set_target_properties(Diligent-SuperResolution-shared PROPERTIES PREFIX "") + + # Set output name to SuperResolution{32|64}{r|d} + set_dll_output_name(Diligent-SuperResolution-shared SuperResolution) +else() + set_target_properties(Diligent-SuperResolution-shared PROPERTIES + OUTPUT_NAME SuperResolution + ) +endif() + +if (MINGW_BUILD) + # Restrict export to GetSuperResolutionFactory + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/export.map + "{ global: *GetSuperResolutionFactory*; local: *; };" + ) + target_link_options(Diligent-SuperResolution-shared PRIVATE LINKER:--version-script=export.map) +endif() + +source_group("src" FILES ${SOURCE}) +source_group("include" FILES ${INCLUDE}) +source_group("interface" FILES ${INTERFACE}) +source_group("dll" FILES ${DLL_SOURCE}) + +set_source_files_properties( + readme.md PROPERTIES HEADER_FILE_ONLY TRUE +) + +set_target_properties(Diligent-SuperResolution-static Diligent-SuperResolution-shared PROPERTIES + FOLDER DiligentCore/Graphics +) + +set_common_target_properties(Diligent-SuperResolution-static) +set_common_target_properties(Diligent-SuperResolution-shared) + +if(DILIGENT_INSTALL_CORE) + install_core_lib(Diligent-SuperResolution-shared) + install_core_lib(Diligent-SuperResolution-static) +endif() diff --git a/Graphics/SuperResolution/interface/SuperResolution.h b/Graphics/SuperResolution/interface/SuperResolution.h new file mode 100644 index 0000000000..355a3a02c8 --- /dev/null +++ b/Graphics/SuperResolution/interface/SuperResolution.h @@ -0,0 +1,383 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +/// \file +/// Defines Diligent::ISuperResolution interface and related data structures + +#include "../../GraphicsEngine/interface/DeviceObject.h" +#include "../../GraphicsEngine/interface/GraphicsTypes.h" +#include "../../GraphicsEngine/interface/TextureView.h" +#include "../../GraphicsEngine/interface/DeviceContext.h" + +DILIGENT_BEGIN_NAMESPACE(Diligent) + +// {A1B2C3D4-E5F6-7890-ABCD-EF1234567890} +static DILIGENT_CONSTEXPR INTERFACE_ID IID_SuperResolution = + {0xa1b2c3d4, 0xe5f6, 0x7890, {0xab, 0xcd, 0xef, 0x12, 0x34, 0x56, 0x78, 0x90}}; + +// clang-format off + +/// Super resolution flags. +DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_FLAGS, Uint32) +{ + SUPER_RESOLUTION_FLAG_NONE = 0u, + + /// When set, the upscaler automatically calculates exposure for each frame. + /// The exposure texture in ExecuteSuperResolutionAttribs is ignored. + SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE = 1u << 0, + + /// When set, enables the sharpening pass in the upscaler. + /// The Sharpness field in ExecuteSuperResolutionAttribs controls the amount. + SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING = 1u << 1, + + SUPER_RESOLUTION_FLAG_LAST = SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING +}; +DEFINE_FLAG_ENUM_OPERATORS(SUPER_RESOLUTION_FLAGS) + + +/// Super resolution optimization type. +/// Defines the quality/performance trade-off for super resolution upscaling. +DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_OPTIMIZATION_TYPE, Uint8) +{ + /// Maximum quality, lowest performance. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY = 0u, + + /// Favor quality over performance. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_QUALITY, + + /// Balanced quality/performance trade-off. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED, + + /// Favor performance over quality. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE, + + /// Maximum performance, lowest quality. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE, + + SUPER_RESOLUTION_OPTIMIZATION_TYPE_COUNT +}; + +/// This structure describes the super resolution upscaler object and is part of the creation +/// parameters given to ISuperResolutionFactory::CreateSuperResolution(). +struct SuperResolutionDesc DILIGENT_DERIVE(DeviceObjectAttribs) + + /// Unique identifier of the super resolution variant to create. + /// + /// Must match one of the VariantIds reported by ISuperResolutionFactory::EnumerateVariants(). + INTERFACE_ID VariantId DEFAULT_INITIALIZER({}); + + /// Input (render) width. Must be greater than zero and not exceed OutputWidth. + /// + /// Use ISuperResolutionFactory::GetSourceSettings() to obtain the + /// optimal input resolution for a given output resolution and optimization type. + Uint32 InputWidth DEFAULT_INITIALIZER(0); + + /// Input (render) height. Must be greater than zero and not exceed OutputHeight + /// + /// Use ISuperResolutionFactory::GetSourceSettings() to obtain the + /// optimal input resolution for a given output resolution and optimization type. + Uint32 InputHeight DEFAULT_INITIALIZER(0); + + /// Target (output) texture width. + Uint32 OutputWidth DEFAULT_INITIALIZER(0); + + /// Target (output) texture height. + Uint32 OutputHeight DEFAULT_INITIALIZER(0); + + /// Output texture format. + TEXTURE_FORMAT OutputFormat DEFAULT_INITIALIZER(TEX_FORMAT_RGBA16_FLOAT); + + /// Color input texture format. + TEXTURE_FORMAT ColorFormat DEFAULT_INITIALIZER(TEX_FORMAT_RGBA16_FLOAT); + + /// Depth input texture format. + /// Required for temporal upscaling. + TEXTURE_FORMAT DepthFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); + + /// Motion vectors texture format. + /// + /// Required for temporal upscaling. + TEXTURE_FORMAT MotionFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); + + /// Reactive mask texture format. + /// + /// Optional. Used for temporal upscaling to guide the denoiser for areas with inaccurate motion information (e.g., alpha-blended objects). + TEXTURE_FORMAT ReactiveMaskFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); + + /// Ignore history mask texture format. + /// + /// Optional. Used for temporal upscaling to indicate regions where temporal history + /// should be completely discarded (binary mask: 0 = use history, 1 = ignore history). + /// Unlike the reactive mask which provides proportional control, this is a binary decision. + TEXTURE_FORMAT IgnoreHistoryMaskFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); + + /// Exposure scale texture format. + /// + /// Optional. When auto-exposure is disabled, specifies the format of the 1x1 exposure + /// texture provided in ExecuteSuperResolutionAttribs::pExposureTextureSRV. + TEXTURE_FORMAT ExposureFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); + + /// Engine creation flags controlling the super resolution upscaler behavior. + /// See SUPER_RESOLUTION_FLAGS. + SUPER_RESOLUTION_FLAGS Flags DEFAULT_INITIALIZER(SUPER_RESOLUTION_FLAG_NONE); +}; +typedef struct SuperResolutionDesc SuperResolutionDesc; + + +/// Attributes for querying the optimal source (input) settings for super resolution upscaling. +/// +/// This structure is used by ISuperResolutionFactory::GetSourceSettings(). +struct SuperResolutionSourceSettingsAttribs +{ + /// Unique identifier of the super resolution variant to create. + /// + /// Must match one of the VariantIds reported by ISuperResolutionFactory::EnumerateVariants(). + INTERFACE_ID VariantId DEFAULT_INITIALIZER({}); + + /// Target (output) texture width. Must be greater than zero. + Uint32 OutputWidth DEFAULT_INITIALIZER(0); + + /// Target (output) texture height. Must be greater than zero. + Uint32 OutputHeight DEFAULT_INITIALIZER(0); + + /// Output texture format. + /// + /// Some backends (e.g. DirectSR) may return different optimal input resolutions + /// depending on the output format. When set to TEX_FORMAT_UNKNOWN, the backend will use a reasonable default. + TEXTURE_FORMAT OutputFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); + + /// Flags controlling the super resolution behavior. + /// + /// These flags affect the optimal source resolution returned by the backend. + /// Must match the flags that will be used when creating the upscaler. + SUPER_RESOLUTION_FLAGS Flags DEFAULT_INITIALIZER(SUPER_RESOLUTION_FLAG_NONE); + + /// Optimization type controlling the quality/performance trade-off. + SUPER_RESOLUTION_OPTIMIZATION_TYPE OptimizationType DEFAULT_INITIALIZER(SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED); +}; +typedef struct SuperResolutionSourceSettingsAttribs SuperResolutionSourceSettingsAttribs; + + +/// Super resolution execute attributes + +/// This structure is used by ISuperResolution::Execute(). +struct ExecuteSuperResolutionAttribs +{ + /// Device context to execute the super resolution on. + IDeviceContext* pContext DEFAULT_INITIALIZER(nullptr); + + /// Low-resolution color texture (shader resource view). + /// + /// This is the input image to be upscaled. + ITextureView* pColorTextureSRV DEFAULT_INITIALIZER(nullptr); + + /// Depth buffer of the low-resolution render (shader resource view). + /// + /// Required for temporal upscaling (SUPER_RESOLUTION_TYPE_TEMPORAL). + ITextureView* pDepthTextureSRV DEFAULT_INITIALIZER(nullptr); + + /// Motion vectors texture (shader resource view). + /// + /// Required for temporal upscaling (SUPER_RESOLUTION_TYPE_TEMPORAL). + /// Expected to contain per-pixel 2D motion vectors in pixel space. + ITextureView* pMotionVectorsSRV DEFAULT_INITIALIZER(nullptr); + + /// Output (upscaled) texture (unordered access view or render target view). + /// + /// Must match SuperResolutionDesc::OutputWidth x OutputHeight. + ITextureView* pOutputTextureView DEFAULT_INITIALIZER(nullptr); + + /// Exposure texture (shader resource view). + /// + /// Optional. A 1x1 R16_FLOAT texture containing the exposure value. + /// The upscaler reads the R channel and uses it to multiply the input color. + /// Ignored when SuperResolutionDesc::Flags includes SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE. + ITextureView* pExposureTextureSRV DEFAULT_INITIALIZER(nullptr); + + /// Reactive mask texture (shader resource view). + /// + /// Optional. Per-pixel mask in [0, 1] range guiding temporal history usage: + /// 0.0 - normal temporal behavior + /// 1.0 - ignore temporal history (use current frame only) + /// Useful for alpha-blended objects or areas with inaccurate motion vectors. + /// Only used when SuperResolutionDesc::ReactiveMaskFormat != TEX_FORMAT_UNKNOWN. + ITextureView* pReactiveMaskTextureSRV DEFAULT_INITIALIZER(nullptr); + + /// Ignore history mask texture (shader resource view). + /// + /// Optional. Binary per-pixel mask where non-zero values indicate regions + /// where temporal history should be completely discarded. + /// Unlike the reactive mask which provides proportional control, + /// this is a binary decision (discard or keep). + /// Only used when SuperResolutionDesc::IgnoreHistoryMaskFormat != TEX_FORMAT_UNKNOWN. + ITextureView* pIgnoreHistoryMaskTextureSRV DEFAULT_INITIALIZER(nullptr); + + /// Jitter offset X applied to the projection matrix (in pixels). + /// + /// Used for temporal upscaling. + float JitterX DEFAULT_INITIALIZER(0.0f); + + /// Jitter offset Y applied to the projection matrix (in pixels). + /// + /// Used for temporal upscaling. + float JitterY DEFAULT_INITIALIZER(0.0f); + + /// Pre-exposure value. + /// + /// If the input color texture is pre-multiplied by a fixed value, + /// set this to that value so the upscaler can divide by it. + /// Default is 1.0 (no pre-exposure adjustment). + float PreExposure DEFAULT_INITIALIZER(1.0f); + + /// Motion vector scale X. + /// + /// Multiplier applied to the X component of motion vectors. + /// Use this to convert motion vectors from their native space to pixel space. + /// Default is 1.0 (motion vectors are already in pixel space). + float MotionVectorScaleX DEFAULT_INITIALIZER(1.0f); + + /// Motion vector scale Y. + /// + /// Multiplier applied to the Y component of motion vectors. + /// Use this to convert motion vectors from their native space to pixel space. + /// Default is 1.0 (motion vectors are already in pixel space). + float MotionVectorScaleY DEFAULT_INITIALIZER(1.0f); + + /// Exposure scale value (scalar). + /// + /// A multiplier applied to the exposure. This is separate from PreExposure + /// and the exposure texture. Used by DirectSR-style upscalers. + /// Default is 1.0 (no additional scaling). + float ExposureScale DEFAULT_INITIALIZER(1.0f); + + /// Sharpness control. + /// + /// Controls the amount of sharpening applied during upscaling. + /// Range is typically [0.0, 1.0], where 0.0 means no sharpening + /// and 1.0 means maximum sharpening. + /// Only used when the upscaler supports sharpness (see SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS). + /// Default is 0.0 (no sharpening). + float Sharpness DEFAULT_INITIALIZER(0.0f); + + /// Camera near plane distance. + /// + /// Used by some upscalers for depth reconstruction. + /// Default is 0.0 (not provided). + float CameraNear DEFAULT_INITIALIZER(0.0f); + + /// Camera far plane distance. + /// + /// Used by some upscalers for depth reconstruction. + /// Default is 0.0 (not provided). + float CameraFar DEFAULT_INITIALIZER(0.0f); + + /// Camera vertical field of view angle, in radians. + /// + /// Used by some upscalers for depth reconstruction. + /// Default is 0.0 (not provided). + float CameraFovAngleVert DEFAULT_INITIALIZER(0.0f); + + /// Time elapsed since the previous frame, in seconds. + /// + /// Used by some upscalers to adjust temporal accumulation behavior. + /// Default is 0.0. + float TimeDeltaInSeconds DEFAULT_INITIALIZER(0.0f); + + /// Set to true to reset temporal history (e.g., on camera cut). + /// + /// Default is False. + Bool ResetHistory DEFAULT_INITIALIZER(False); +}; +typedef struct ExecuteSuperResolutionAttribs ExecuteSuperResolutionAttribs; + + +#define DILIGENT_INTERFACE_NAME ISuperResolution +#include "../../../Primitives/interface/DefineInterfaceHelperMacros.h" + +#define ISuperResolutionInclusiveMethods \ + IDeviceObjectInclusiveMethods; \ + ISuperResolutionMethods SuperResolution + +/// Super resolution upscaler interface. +/// +/// The super resolution object encapsulates a hardware-accelerated or software-based super resolution +/// effect (e.g., MetalFX on Metal, DirectSR on D3D12). +/// It is created via ISuperResolutionFactory::CreateSuperResolution(). +DILIGENT_BEGIN_INTERFACE(ISuperResolution, IDeviceObject) +{ +#if DILIGENT_CPP_INTERFACE + /// Returns the super resolution description used to create the object. + virtual const SuperResolutionDesc& METHOD(GetDesc)() const override = 0; +#endif + + /// Returns the optimal jitter offset for the given frame index. + + /// \param [in] Index - Frame index. The sequence wraps automatically. + /// \param [out] pJitterX - Jitter offset X in pixel space, typically in [-0.5, 0.5] range. + /// \param [out] pJitterY - Jitter offset Y in pixel space, typically in [-0.5, 0.5] range. + /// + /// For temporal upscaling, the upscaler provides a recommended jitter pattern + /// (e.g. Halton sequence) that should be applied to the projection matrix each frame. + /// For spatial upscaling, both values are set to zero. + VIRTUAL void METHOD(GetJitterOffset)(THIS_ + Uint32 Index, + float* pJitterX, + float* pJitterY) CONST PURE; + + + /// Executes the super resolution upscaler. + + /// \param [in] Attribs - Upscale operation attributes, see Diligent::ExecuteSuperResolutionAttribs. + /// + /// The command must be called outside of a render pass. + /// All input textures must be in the appropriate states or + /// TransitionMode should be set to RESOURCE_STATE_TRANSITION_MODE_TRANSITION. + /// + /// \remarks Supported contexts: graphics. + VIRTUAL void METHOD(Execute)(THIS_ + const ExecuteSuperResolutionAttribs REF Attribs) PURE; +}; +DILIGENT_END_INTERFACE + +// clang-format on + +#include "../../../Primitives/interface/UndefInterfaceHelperMacros.h" + +#if DILIGENT_C_INTERFACE + +// clang-format off +# define ISuperResolution_GetDesc(This) (const struct SuperResolutionDesc*)IDeviceObject_GetDesc(This) + +# define ISuperResolution_GetJitterOffset(This, ...) CALL_IFACE_METHOD(SuperResolution, GetJitterOffset, This, __VA_ARGS__) +# define ISuperResolution_Execute(This, ...) CALL_IFACE_METHOD(SuperResolution, Execute, This, __VA_ARGS__) + +// clang-format on + +#endif + +DILIGENT_END_NAMESPACE // namespace Diligent diff --git a/Graphics/SuperResolution/interface/SuperResolutionFactory.h b/Graphics/SuperResolution/interface/SuperResolutionFactory.h new file mode 100644 index 0000000000..1bc90a3782 --- /dev/null +++ b/Graphics/SuperResolution/interface/SuperResolutionFactory.h @@ -0,0 +1,270 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +/// \file +/// Defines Diligent::ISuperResolutionFactory interface and related structures. + +#include "../../../Primitives/interface/Object.h" +#include "../../../Primitives/interface/DebugOutput.h" +#include "../../../Primitives/interface/MemoryAllocator.h" +#include "../../../Primitives/interface/FlagEnum.h" +#include "../../../Graphics/GraphicsEngine/interface/RenderDevice.h" + +#include "SuperResolution.h" + +DILIGENT_BEGIN_NAMESPACE(Diligent) + +// {79A904EC-EB17-4339-86BC-8A37632B0BD1} +static DILIGENT_CONSTEXPR INTERFACE_ID IID_SuperResolutionFactory = + {0x79a904ec, 0xeb17, 0x4339, {0x86, 0xbc, 0x8a, 0x37, 0x63, 0x2b, 0xb, 0xd1}}; + +// clang-format off + +/// Super resolution upscaler type. +DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_TYPE, Uint8) +{ + /// Spatial upscaling only (single frame, no motion vectors required). + SUPER_RESOLUTION_TYPE_SPATIAL = 0u, + + /// Temporal upscaling (uses motion vectors and history accumulation). + SUPER_RESOLUTION_TYPE_TEMPORAL +}; + +// Capability flags for spatial super resolution upscaling. +DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_SPATIAL_CAP_FLAGS, Uint32) +{ + SUPER_RESOLUTION_SPATIAL_CAP_FLAG_NONE = 0u, + + /// The upscaler is a native hardware-accelerated implementation (e.g. MetalFX, DirectSR) + /// as opposed to a custom software fallback. + SUPER_RESOLUTION_SPATIAL_CAP_FLAG_NATIVE = 1u << 0, + + SUPER_RESOLUTION_SPATIAL_CAP_FLAG_LAST = SUPER_RESOLUTION_SPATIAL_CAP_FLAG_NATIVE +}; +DEFINE_FLAG_ENUM_OPERATORS(SUPER_RESOLUTION_SPATIAL_CAP_FLAGS) + + +/// Capability flags for temporal super resolution upscaling. +DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_TEMPORAL_CAP_FLAGS, Uint32) +{ + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_NONE = 0u, + + /// The upscaler is a native hardware-accelerated implementation (e.g. MetalFX, DirectSR) + /// as opposed to a custom software fallback. + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_NATIVE = 1u << 0, + + /// The upscaler supports exposure scale texture input. + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_EXPOSURE_SCALE_TEXTURE = 1u << 1, + + /// The upscaler supports ignore history mask texture input. + /// When set, the backend processes the pIgnoreHistoryMaskTextureSRV field + /// in ExecuteSuperResolutionAttribs. + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_IGNORE_HISTORY_MASK = 1u << 2, + + /// The upscaler supports reactive mask texture input. + /// When set, the backend processes the pReactiveMaskTextureSRV field + /// in ExecuteSuperResolutionAttribs. + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_REACTIVE_MASK = 1u << 3, + + /// The upscaler supports the sharpness control parameter. + /// When set, the Sharpness field in ExecuteSuperResolutionAttribs is used. + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS = 1u << 4, + + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_LAST = SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS +}; +DEFINE_FLAG_ENUM_OPERATORS(SUPER_RESOLUTION_TEMPORAL_CAP_FLAGS) + + +/// Information about a supported super resolution variant +struct SuperResolutionInfo +{ + /// Human-readable name of the upscaler variant (e.g. "DLSS", "FSR", "MetalFX Spatial", "MetalFX Temporal"). + Char Name[128] DEFAULT_INITIALIZER({}); + + /// Unique identifier for this upscaler variant. + /// Use this identifier when creating the upscaler with ISuperResolutionFactory::CreateSuperResolution(). + INTERFACE_ID VariantId DEFAULT_INITIALIZER({}); + + /// Upscaler type. Determines which input textures and parameters are required. + SUPER_RESOLUTION_TYPE Type DEFAULT_INITIALIZER(SUPER_RESOLUTION_TYPE_SPATIAL); + +#if defined(DILIGENT_SHARP_GEN) + Uint32 CapFlags DEFAULT_INITIALIZER(0); +#else + union + { + /// Capability flags for SUPER_RESOLUTION_TYPE_SPATIAL. + SUPER_RESOLUTION_SPATIAL_CAP_FLAGS SpatialCapFlags DEFAULT_INITIALIZER(SUPER_RESOLUTION_SPATIAL_CAP_FLAG_NONE); + + /// Capability flags for SUPER_RESOLUTION_TYPE_TEMPORAL. + SUPER_RESOLUTION_TEMPORAL_CAP_FLAGS TemporalCapFlags; + }; +#endif + +#if DILIGENT_CPP_INTERFACE + constexpr Uint32 SpatialOrTemporalCapFlags() const + { +# if defined(DILIGENT_SHARP_GEN) + return CapFlags; +# else + return SpatialCapFlags; +# endif + } + + /// Comparison operator tests if two structures are equivalent + + /// \param [in] RHS - reference to the structure to perform comparison with + /// \return + /// - True if all members of the two structures are equal. + /// - False otherwise. + bool operator==(const SuperResolutionInfo& RHS) const noexcept + { + return (VariantId == RHS.VariantId && + Type == RHS.Type && + SpatialOrTemporalCapFlags() == RHS.SpatialOrTemporalCapFlags() && + memcmp(Name, RHS.Name, sizeof(Name)) == 0); + } +#endif +}; +typedef struct SuperResolutionInfo SuperResolutionInfo; + + +/// Optimal source (input) settings returned by ISuperResolutionFactory::GetSourceSettings(). +struct SuperResolutionSourceSettings +{ + /// Recommended input width for the given output resolution and optimization type. + Uint32 OptimalInputWidth DEFAULT_INITIALIZER(0); + + /// Recommended input height for the given output resolution and optimization type. + Uint32 OptimalInputHeight DEFAULT_INITIALIZER(0); +}; +typedef struct SuperResolutionSourceSettings SuperResolutionSourceSettings; + + +#define DILIGENT_INTERFACE_NAME ISuperResolutionFactory +#include "../../../Primitives/interface/DefineInterfaceHelperMacros.h" + +#define ISuperResolutionFactoryInclusiveMethods \ + IObjectInclusiveMethods; \ + ISuperResolutionFactoryMethods SuperResolutionFactory + +// clang-format off + +/// SuperResolution factory interface +DILIGENT_BEGIN_INTERFACE(ISuperResolutionFactory, IObject) +{ + /// Enumerates the supported super resolution variants for the given render device. + + /// \param [in] pDevice - Render device to query the supported super resolution variants for. + /// \param [in, out] NumVariants - Number of super resolution variants. If `Variants` is null, this + /// parameter is used to return the number of supported variants. + /// If `Variants` is not null, this parameter should contain the maximum number + /// of elements to be written to `Variants` array. It is overwritten with the actual + /// number of variants written to the array. + /// \param [out] Variants - Array to receive the supported super resolution variants. + /// Each variant is described by SuperResolutionInfo structure. + VIRTUAL void METHOD(EnumerateVariants)(THIS_ + IRenderDevice* pDevice, + Uint32 REF NumVariants, + SuperResolutionInfo* Variants) PURE; + + + /// Returns the optimal source (input) settings for super resolution upscaling. + + /// \param [in] pDevice - Render device to query the optimal source settings for. + /// \param [in] Attribs - Attributes, see Diligent::SuperResolutionSourceSettingsAttribs for details. + /// \param [out] Settings - On success, receives the optimal source settings, + /// see Diligent::SuperResolutionSourceSettings for details. + /// + /// \remarks On backends that don't support hardware upscaling, Settings will be zero-initialized. + /// Use this method to determine the optimal render resolution before creating + /// the upscaler object. + VIRTUAL void METHOD(GetSourceSettings)(THIS_ + IRenderDevice* pDevice, + const SuperResolutionSourceSettingsAttribs REF Attribs, + SuperResolutionSourceSettings REF Settings) CONST PURE; + + + /// Creates a new upscaler object. + + /// \param [in] pDevice - Render device to create the upscaler for. + /// \param [in] Desc - Super resolution upscaler description, see Diligent::SuperResolutionDesc for details. + /// \param [out] ppUpscaler - Address of the memory location where a pointer to the + /// super resolution upscaler interface will be written. + /// The function calls AddRef(), so that the new object will have + /// one reference. + /// + /// \remarks On backends that don't support hardware upscaling, the method will + /// return nullptr. + VIRTUAL void METHOD(CreateSuperResolution)(THIS_ + IRenderDevice* pDevice, + const SuperResolutionDesc REF Desc, + ISuperResolution** ppUpscaler) PURE; + + /// Sets a user-provided debug message callback. + + /// \param [in] MessageCallback - Debug message callback function to use instead of the default one. + VIRTUAL void METHOD(SetMessageCallback)(THIS_ + DebugMessageCallbackType MessageCallback) CONST PURE; + + /// Sets whether to break program execution on assertion failure. + + /// \param [in] BreakOnError - Whether to break on assertion failure. + VIRTUAL void METHOD(SetBreakOnError)(THIS_ + bool BreakOnError) CONST PURE; + + /// Sets the memory allocator to be used by the SuperResolution. + + /// \param [in] pAllocator - Pointer to the memory allocator. + /// + /// The allocator is a global setting that applies to the entire execution unit + /// (executable or shared library that contains the SuperResolution implementation). + /// + /// The allocator should be set before any other factory method is called and + /// should not be changed afterwards. + /// The allocator object must remain valid until all objects created by the factory + /// are destroyed. + VIRTUAL void METHOD(SetMemoryAllocator)(THIS_ + IMemoryAllocator* pAllocator) CONST PURE; +}; +DILIGENT_END_INTERFACE + +#include "../../../Primitives/interface/UndefInterfaceHelperMacros.h" + +#if DILIGENT_C_INTERFACE + +# define ISuperResolutionFactory_EnumerateVariants(This, ...) CALL_IFACE_METHOD(SuperResolutionFactory, EnumerateVariants, This, __VA_ARGS__) +# define ISuperResolutionFactory_GetSourceSettings(This, ...) CALL_IFACE_METHOD(SuperResolutionFactory, GetSourceSettings, This, __VA_ARGS__) +# define ISuperResolutionFactory_CreateSuperResolution(This, ...) CALL_IFACE_METHOD(SuperResolutionFactory, CreateSuperResolution, This, __VA_ARGS__) +# define ISuperResolutionFactory_SetMessageCallback(This, ...) CALL_IFACE_METHOD(SuperResolutionFactory, SetMessageCallback, This, __VA_ARGS__) +# define ISuperResolutionFactory_SetBreakOnError(This, ...) CALL_IFACE_METHOD(SuperResolutionFactory, SetBreakOnError, This, __VA_ARGS__) +# define ISuperResolutionFactory_SetMemoryAllocator(This, ...) CALL_IFACE_METHOD(SuperResolutionFactory, SetMemoryAllocator, This, __VA_ARGS__) + +#endif + +DILIGENT_END_NAMESPACE // namespace Diligent diff --git a/Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h b/Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h new file mode 100644 index 0000000000..2da0dff36f --- /dev/null +++ b/Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h @@ -0,0 +1,84 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +#include "SuperResolutionFactory.h" + +#if PLATFORM_ANDROID || PLATFORM_LINUX || PLATFORM_MACOS || PLATFORM_IOS || PLATFORM_TVOS || PLATFORM_WEB || (PLATFORM_WIN32 && !defined(_MSC_VER)) +// https://gcc.gnu.org/wiki/Visibility +# define API_QUALIFIER __attribute__((visibility("default"))) +#elif PLATFORM_WIN32 || PLATFORM_UNIVERSAL_WINDOWS +# define API_QUALIFIER +#else +# error Unsupported platform +#endif + +#if DILIGENT_SUPER_RESOLUTION_SHARED && PLATFORM_WIN32 && defined(_MSC_VER) +# include "../../GraphicsEngine/interface/LoadEngineDll.h" +# define DILIGENT_SUPER_RESOLUTION_EXPLICIT_LOAD 1 +#endif + +DILIGENT_BEGIN_NAMESPACE(Diligent) + +typedef struct ISuperResolutionFactory* (*GetSuperResolutionFactoryType)(); + +#if DILIGENT_SUPER_RESOLUTION_EXPLICIT_LOAD + +inline GetSuperResolutionFactoryType DILIGENT_GLOBAL_FUNCTION(LoadSuperResolutionFactory)() +{ + static GetSuperResolutionFactoryType GetFactoryFunc = NULL; + if (GetFactoryFunc == NULL) + { + GetFactoryFunc = (GetSuperResolutionFactoryType)LoadEngineDll("SuperResolution", "GetSuperResolutionFactory"); + } + return GetFactoryFunc; +} + +#else + +API_QUALIFIER +struct ISuperResolutionFactory* DILIGENT_GLOBAL_FUNCTION(GetSuperResolutionFactory)(); + +#endif + +/// Loads the SuperResolution implementation DLL if necessary and returns the SuperResolution factory. +inline struct ISuperResolutionFactory* DILIGENT_GLOBAL_FUNCTION(LoadAndGetSuperResolutionFactory)() +{ + GetSuperResolutionFactoryType GetFactoryFunc = NULL; +#if DILIGENT_SUPER_RESOLUTION_EXPLICIT_LOAD + GetFactoryFunc = DILIGENT_GLOBAL_FUNCTION(LoadSuperResolutionFactory)(); + if (GetFactoryFunc == NULL) + { + return NULL; + } +#else + GetFactoryFunc = DILIGENT_GLOBAL_FUNCTION(GetSuperResolutionFactory); +#endif + return GetFactoryFunc(); +} + +DILIGENT_END_NAMESPACE // namespace Diligent diff --git a/Graphics/SuperResolution/readme.md b/Graphics/SuperResolution/readme.md new file mode 100644 index 0000000000..1435f75c7a --- /dev/null +++ b/Graphics/SuperResolution/readme.md @@ -0,0 +1,2 @@ +# Super Resolution + diff --git a/Graphics/SuperResolution/src/DLLMain.cpp b/Graphics/SuperResolution/src/DLLMain.cpp new file mode 100644 index 0000000000..bdf50328dc --- /dev/null +++ b/Graphics/SuperResolution/src/DLLMain.cpp @@ -0,0 +1,53 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include +#include + +BOOL APIENTRY DllMain(HANDLE hModule, + DWORD ul_reason_for_call, + LPVOID lpReserved) +{ + switch (ul_reason_for_call) + { + case DLL_PROCESS_ATTACH: +#if defined(_DEBUG) || defined(DEBUG) + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + break; + + case DLL_THREAD_ATTACH: + break; + + case DLL_THREAD_DETACH: + break; + + case DLL_PROCESS_DETACH: + break; + } + + return TRUE; +} diff --git a/Graphics/SuperResolution/src/SuperResolution.def b/Graphics/SuperResolution/src/SuperResolution.def new file mode 100644 index 0000000000..8395d1fb20 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolution.def @@ -0,0 +1,2 @@ +EXPORTS + GetSuperResolutionFactory=Diligent_GetSuperResolutionFactory \ No newline at end of file diff --git a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp new file mode 100644 index 0000000000..fa235cf722 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp @@ -0,0 +1,155 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionFactory.h" +#include "SuperResolutionFactoryLoader.h" +#include "DummyReferenceCounters.hpp" +#include "EngineMemory.h" +#include "PlatformDebug.hpp" + +namespace Diligent +{ + +namespace +{ + +class SuperResolutionFactoryImpl final : public ISuperResolutionFactory +{ +public: + static SuperResolutionFactoryImpl* GetInstance() + { + static SuperResolutionFactoryImpl TheFactory; + return &TheFactory; + } + + SuperResolutionFactoryImpl() : + m_RefCounters{*this} + {} + + virtual void DILIGENT_CALL_TYPE QueryInterface(const INTERFACE_ID& IID, IObject** ppInterface) override final; + + virtual ReferenceCounterValueType DILIGENT_CALL_TYPE AddRef() override final + { + return m_RefCounters.AddStrongRef(); + } + + virtual ReferenceCounterValueType DILIGENT_CALL_TYPE Release() override final + { + return m_RefCounters.ReleaseStrongRef(); + } + + virtual IReferenceCounters* DILIGENT_CALL_TYPE GetReferenceCounters() const override final + { + return const_cast(static_cast(&m_RefCounters)); + } + + virtual void DILIGENT_CALL_TYPE EnumerateVariants(IRenderDevice* pDevice, Uint32& NumVariants, SuperResolutionInfo* Variants) override final; + + virtual void DILIGENT_CALL_TYPE GetSourceSettings(IRenderDevice* pDevice, + const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) const override final; + + virtual void DILIGENT_CALL_TYPE CreateSuperResolution(IRenderDevice* pDevice, + const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) override final; + + virtual void DILIGENT_CALL_TYPE + SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final; + + virtual void DILIGENT_CALL_TYPE SetBreakOnError(bool BreakOnError) const override final; + + virtual void DILIGENT_CALL_TYPE SetMemoryAllocator(IMemoryAllocator* pAllocator) const override final; + +private: + DummyReferenceCounters m_RefCounters; +}; + + +void SuperResolutionFactoryImpl::QueryInterface(const INTERFACE_ID& IID, IObject** ppInterface) +{ + if (ppInterface == nullptr) + return; + + *ppInterface = nullptr; + if (IID == IID_Unknown || IID == IID_SuperResolutionFactory) + { + *ppInterface = this; + (*ppInterface)->AddRef(); + } +} + +void SuperResolutionFactoryImpl::EnumerateVariants(IRenderDevice* pDevice, Uint32& NumVariants, SuperResolutionInfo* Variants) +{ + NumVariants = 0; +} + +void SuperResolutionFactoryImpl::GetSourceSettings(IRenderDevice* pDevice, + const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) const +{ + Settings = {}; +} + +void SuperResolutionFactoryImpl::CreateSuperResolution(IRenderDevice* pDevice, + const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) +{ +} + +void SuperResolutionFactoryImpl::SetMessageCallback(DebugMessageCallbackType MessageCallback) const +{ + SetDebugMessageCallback(MessageCallback); +} + +void SuperResolutionFactoryImpl::SetBreakOnError(bool BreakOnError) const +{ + PlatformDebug::SetBreakOnError(BreakOnError); +} + +void SuperResolutionFactoryImpl::SetMemoryAllocator(IMemoryAllocator* pAllocator) const +{ + SetRawAllocator(pAllocator); +} + +} // namespace + + +API_QUALIFIER +ISuperResolutionFactory* GetSuperResolutionFactory() +{ + return SuperResolutionFactoryImpl::GetInstance(); +} + +} // namespace Diligent + +extern "C" +{ + API_QUALIFIER + Diligent::ISuperResolutionFactory* Diligent_GetSuperResolutionFactory() + { + return Diligent::GetSuperResolutionFactory(); + } +} diff --git a/Graphics/SuperResolution/src/SuperResolution_D3D12.cpp b/Graphics/SuperResolution/src/SuperResolution_D3D12.cpp new file mode 100644 index 0000000000..92fcc323a6 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolution_D3D12.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolution.h" + +namespace Diligent +{ + +} // namespace Diligent diff --git a/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c b/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c new file mode 100644 index 0000000000..cbb14786a0 --- /dev/null +++ b/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c @@ -0,0 +1,36 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DiligentCore/Graphics/SuperResolution/interface/SuperResolutionFactory.h" + +void TestSuperResolutionFactory_CInterface(ISuperResolutionFactory* pSuperResolutionFactory) +{ + IRenderDevice* pDevice = NULL; + ISuperResolutionFactory_EnumerateVariants(pSuperResolutionFactory, pDevice, (Uint32*)NULL, (SuperResolutionInfo*)NULL); + ISuperResolutionFactory_GetSourceSettings(pSuperResolutionFactory, pDevice, (const SuperResolutionSourceSettingsAttribs*)NULL, (SuperResolutionSourceSettings*)NULL); + ISuperResolutionFactory_CreateSuperResolution(pSuperResolutionFactory, pDevice, (const SuperResolutionDesc*)NULL, (ISuperResolution**)NULL); + ISuperResolutionFactory_SetMessageCallback(pSuperResolutionFactory, (DebugMessageCallbackType)NULL); +} diff --git a/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.cpp b/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.cpp new file mode 100644 index 0000000000..473932d2c1 --- /dev/null +++ b/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.cpp @@ -0,0 +1,27 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DiligentCore/Graphics/SuperResolution/interface/SuperResolutionFactory.h" diff --git a/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c b/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c new file mode 100644 index 0000000000..963b72cf0a --- /dev/null +++ b/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c @@ -0,0 +1,33 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DiligentCore/Graphics/SuperResolution/interface/SuperResolution.h" + +void TestSuperResolution_CInterface(ISuperResolution* pUpscaler) +{ + ISuperResolution_GetJitterOffset(pUpscaler, 0, (float*)NULL, (float*)NULL); + ISuperResolution_Execute(pUpscaler, (const ExecuteSuperResolutionAttribs*)NULL); +} diff --git a/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.cpp b/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.cpp new file mode 100644 index 0000000000..d629afdc83 --- /dev/null +++ b/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.cpp @@ -0,0 +1,27 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DiligentCore/Graphics/SuperResolution/interface/SuperResolution.h" From 9c44db2ab9dc96564b7283a8882cbc862821e387 Mon Sep 17 00:00:00 2001 From: assiduous Date: Sat, 7 Mar 2026 13:50:57 -0800 Subject: [PATCH 02/14] Super resolution: move SuperResolutionSourceSettingsAttribs to SuperResolutionFactory.h --- .../interface/SuperResolution.h | 56 ------------------ .../interface/SuperResolutionFactory.h | 57 +++++++++++++++++++ 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/Graphics/SuperResolution/interface/SuperResolution.h b/Graphics/SuperResolution/interface/SuperResolution.h index 355a3a02c8..9cee8356bd 100644 --- a/Graphics/SuperResolution/interface/SuperResolution.h +++ b/Graphics/SuperResolution/interface/SuperResolution.h @@ -60,28 +60,6 @@ DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_FLAGS, Uint32) DEFINE_FLAG_ENUM_OPERATORS(SUPER_RESOLUTION_FLAGS) -/// Super resolution optimization type. -/// Defines the quality/performance trade-off for super resolution upscaling. -DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_OPTIMIZATION_TYPE, Uint8) -{ - /// Maximum quality, lowest performance. - SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY = 0u, - - /// Favor quality over performance. - SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_QUALITY, - - /// Balanced quality/performance trade-off. - SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED, - - /// Favor performance over quality. - SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE, - - /// Maximum performance, lowest quality. - SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE, - - SUPER_RESOLUTION_OPTIMIZATION_TYPE_COUNT -}; - /// This structure describes the super resolution upscaler object and is part of the creation /// parameters given to ISuperResolutionFactory::CreateSuperResolution(). struct SuperResolutionDesc DILIGENT_DERIVE(DeviceObjectAttribs) @@ -149,40 +127,6 @@ struct SuperResolutionDesc DILIGENT_DERIVE(DeviceObjectAttribs) typedef struct SuperResolutionDesc SuperResolutionDesc; -/// Attributes for querying the optimal source (input) settings for super resolution upscaling. -/// -/// This structure is used by ISuperResolutionFactory::GetSourceSettings(). -struct SuperResolutionSourceSettingsAttribs -{ - /// Unique identifier of the super resolution variant to create. - /// - /// Must match one of the VariantIds reported by ISuperResolutionFactory::EnumerateVariants(). - INTERFACE_ID VariantId DEFAULT_INITIALIZER({}); - - /// Target (output) texture width. Must be greater than zero. - Uint32 OutputWidth DEFAULT_INITIALIZER(0); - - /// Target (output) texture height. Must be greater than zero. - Uint32 OutputHeight DEFAULT_INITIALIZER(0); - - /// Output texture format. - /// - /// Some backends (e.g. DirectSR) may return different optimal input resolutions - /// depending on the output format. When set to TEX_FORMAT_UNKNOWN, the backend will use a reasonable default. - TEXTURE_FORMAT OutputFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); - - /// Flags controlling the super resolution behavior. - /// - /// These flags affect the optimal source resolution returned by the backend. - /// Must match the flags that will be used when creating the upscaler. - SUPER_RESOLUTION_FLAGS Flags DEFAULT_INITIALIZER(SUPER_RESOLUTION_FLAG_NONE); - - /// Optimization type controlling the quality/performance trade-off. - SUPER_RESOLUTION_OPTIMIZATION_TYPE OptimizationType DEFAULT_INITIALIZER(SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED); -}; -typedef struct SuperResolutionSourceSettingsAttribs SuperResolutionSourceSettingsAttribs; - - /// Super resolution execute attributes /// This structure is used by ISuperResolution::Execute(). diff --git a/Graphics/SuperResolution/interface/SuperResolutionFactory.h b/Graphics/SuperResolution/interface/SuperResolutionFactory.h index 1bc90a3782..4f3d1e56cc 100644 --- a/Graphics/SuperResolution/interface/SuperResolutionFactory.h +++ b/Graphics/SuperResolution/interface/SuperResolutionFactory.h @@ -166,6 +166,63 @@ struct SuperResolutionSourceSettings typedef struct SuperResolutionSourceSettings SuperResolutionSourceSettings; +/// Super resolution optimization type. +/// Defines the quality/performance trade-off for super resolution upscaling. +DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_OPTIMIZATION_TYPE, Uint8) +{ + /// Maximum quality, lowest performance. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY = 0u, + + /// Favor quality over performance. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_QUALITY, + + /// Balanced quality/performance trade-off. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED, + + /// Favor performance over quality. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE, + + /// Maximum performance, lowest quality. + SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE, + + SUPER_RESOLUTION_OPTIMIZATION_TYPE_COUNT +}; + + +/// Attributes for querying the optimal source (input) settings for super resolution upscaling. +/// +/// This structure is used by ISuperResolutionFactory::GetSourceSettings(). +struct SuperResolutionSourceSettingsAttribs +{ + /// Unique identifier of the super resolution variant to create. + /// + /// Must match one of the VariantIds reported by ISuperResolutionFactory::EnumerateVariants(). + INTERFACE_ID VariantId DEFAULT_INITIALIZER({}); + + /// Target (output) texture width. Must be greater than zero. + Uint32 OutputWidth DEFAULT_INITIALIZER(0); + + /// Target (output) texture height. Must be greater than zero. + Uint32 OutputHeight DEFAULT_INITIALIZER(0); + + /// Output texture format. + /// + /// Some backends (e.g. DirectSR) may return different optimal input resolutions + /// depending on the output format. When set to TEX_FORMAT_UNKNOWN, the backend will use a reasonable default. + TEXTURE_FORMAT OutputFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); + + /// Flags controlling the super resolution behavior. + /// + /// These flags affect the optimal source resolution returned by the backend. + /// Must match the flags that will be used when creating the upscaler. + SUPER_RESOLUTION_FLAGS Flags DEFAULT_INITIALIZER(SUPER_RESOLUTION_FLAG_NONE); + + /// Optimization type controlling the quality/performance trade-off. + SUPER_RESOLUTION_OPTIMIZATION_TYPE OptimizationType DEFAULT_INITIALIZER(SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED); +}; +typedef struct SuperResolutionSourceSettingsAttribs SuperResolutionSourceSettingsAttribs; + + #define DILIGENT_INTERFACE_NAME ISuperResolutionFactory #include "../../../Primitives/interface/DefineInterfaceHelperMacros.h" From 4b9cddf0d2285aec267eaff46bcf202b29d4057f Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Thu, 12 Mar 2026 11:20:55 +0600 Subject: [PATCH 03/14] Implement SuperResolution API and factory infrastructure --- CMakeLists.txt | 14 +- Graphics/CMakeLists.txt | 5 +- Graphics/SuperResolution/CMakeLists.txt | 13 +- .../include/SuperResolutionBase.hpp | 86 ++++++++ .../include/SuperResolutionInternal.hpp | 47 +++++ .../interface/SuperResolution.h | 39 ++-- .../interface/SuperResolutionFactory.h | 22 +- .../interface/SuperResolutionFactoryLoader.h | 34 ++-- .../SuperResolution/src/SuperResolution.def | 2 +- .../src/SuperResolutionFactory.cpp | 190 ++++++++++++------ Tests/DiligentCoreAPITest/CMakeLists.txt | 9 + .../src/c_interface/SuperResolution_C_Test.c | 85 ++++++++ .../SuperResolutionFactoryH_test.c | 7 +- .../SuperResolution/SuperResolutionH_test.c | 1 + 14 files changed, 439 insertions(+), 115 deletions(-) create mode 100644 Graphics/SuperResolution/include/SuperResolutionBase.hpp create mode 100644 Graphics/SuperResolution/include/SuperResolutionInternal.hpp create mode 100644 Tests/DiligentCoreAPITest/src/c_interface/SuperResolution_C_Test.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a06ee52baa..980206ca2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,7 @@ set(VULKAN_SUPPORTED FALSE CACHE INTERNAL "Vulkan is not supported") set(METAL_SUPPORTED FALSE CACHE INTERNAL "Metal is not supported") set(WEBGPU_SUPPORTED FALSE CACHE INTERNAL "WebGPU is not supported") set(ARCHIVER_SUPPORTED FALSE CACHE INTERNAL "Archiver is not supported") +set(SUPER_RESOLUTION_SUPPORTED FALSE CACHE INTERNAL "Super resolution is not supported") set(DILIGENT_CORE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "DiligentCore module source directory") @@ -180,10 +181,11 @@ if(MINGW) endif() if(PLATFORM_WIN32) - set(GL_SUPPORTED TRUE CACHE INTERNAL "OpenGL is supported on Win32 platform") - set(VULKAN_SUPPORTED TRUE CACHE INTERNAL "Vulkan is supported on Win32 platform") - set(WEBGPU_SUPPORTED TRUE CACHE INTERNAL "WebGPU is supported on Win32 platform") - set(ARCHIVER_SUPPORTED TRUE CACHE INTERNAL "Archiver is supported on Win32 platform") + set(GL_SUPPORTED TRUE CACHE INTERNAL "OpenGL is supported on Win32 platform") + set(VULKAN_SUPPORTED TRUE CACHE INTERNAL "Vulkan is supported on Win32 platform") + set(WEBGPU_SUPPORTED TRUE CACHE INTERNAL "WebGPU is supported on Win32 platform") + set(ARCHIVER_SUPPORTED TRUE CACHE INTERNAL "Archiver is supported on Win32 platform") + set(SUPER_RESOLUTION_SUPPORTED TRUE CACHE INTERNAL "Super resolution is supported on Win32 platform") target_compile_definitions(Diligent-PublicBuildSettings INTERFACE PLATFORM_WIN32=1) elseif(PLATFORM_UNIVERSAL_WINDOWS) set(ARCHIVER_SUPPORTED TRUE CACHE INTERNAL "Archiver is supported on Universal Windows platform") @@ -303,6 +305,7 @@ else() option(DILIGENT_NO_WEBGPU "Disable WebGPU backend" ON) endif() option(DILIGENT_NO_ARCHIVER "Do not build archiver" OFF) +option(DILIGENT_NO_SUPER_RESOLUTION "Do not build super resolution" OFF) option(DILIGENT_EMSCRIPTEN_STRIP_DEBUG_INFO "Strip debug information from WebAsm binaries" OFF) @@ -329,6 +332,9 @@ endif() if(${DILIGENT_NO_ARCHIVER}) set(ARCHIVER_SUPPORTED FALSE CACHE INTERNAL "Archiver is forcibly disabled") endif() +if(${DILIGENT_NO_SUPER_RESOLUTION}) + set(SUPER_RESOLUTION_SUPPORTED FALSE CACHE INTERNAL "Super resolution is forcibly disabled") +endif() if(NOT (${D3D11_SUPPORTED} OR ${D3D12_SUPPORTED} OR ${GL_SUPPORTED} OR ${GLES_SUPPORTED} OR ${VULKAN_SUPPORTED} OR ${METAL_SUPPORTED} OR ${WEBGPU_SUPPORTED})) message(FATAL_ERROR "No rendering backends are select to build") diff --git a/Graphics/CMakeLists.txt b/Graphics/CMakeLists.txt index 3cebd8ad10..49e32b169d 100644 --- a/Graphics/CMakeLists.txt +++ b/Graphics/CMakeLists.txt @@ -56,4 +56,7 @@ if(ARCHIVER_SUPPORTED) endif() add_subdirectory(GraphicsTools) -add_subdirectory(SuperResolution) + +if(SUPER_RESOLUTION_SUPPORTED) + add_subdirectory(SuperResolution) +endif() diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index 6bae288a20..8ad4e3314a 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -1,10 +1,12 @@ -cmake_minimum_required (VERSION 3.10) +cmake_minimum_required (VERSION 3.11) include(../../BuildTools/CMake/BuildUtils.cmake) project(Diligent-SuperResolution CXX) set(INCLUDE + include/SuperResolutionBase.hpp + include/SuperResolutionInternal.hpp ) set(INTERFACE @@ -29,6 +31,7 @@ set(DLL_SOURCE add_library(Diligent-SuperResolutionInterface INTERFACE) target_link_libraries (Diligent-SuperResolutionInterface INTERFACE Diligent-GraphicsEngineInterface) target_include_directories(Diligent-SuperResolutionInterface INTERFACE interface) +target_compile_definitions(Diligent-SuperResolutionInterface INTERFACE SUPER_RESOLUTION_SUPPORTED=1) add_library(Diligent-SuperResolution-static STATIC ${SOURCE} ${INTERFACE} ${INCLUDE} @@ -45,17 +48,21 @@ endif() target_include_directories(Diligent-SuperResolution-static PRIVATE include + ../GraphicsEngine/include + ../GraphicsEngineD3DBase/include + ../GraphicsEngineNextGenBase/include ) target_compile_definitions(Diligent-SuperResolution-shared PUBLIC DILIGENT_SUPER_RESOLUTION_SHARED=1) - target_link_libraries(Diligent-SuperResolution-static PUBLIC Diligent-SuperResolutionInterface PRIVATE Diligent-BuildSettings Diligent-Common + Diligent-GraphicsAccessories + Diligent-ShaderTools ) if(D3D12_SUPPORTED) @@ -86,7 +93,7 @@ endif() if (MINGW_BUILD) # Restrict export to GetSuperResolutionFactory file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/export.map - "{ global: *GetSuperResolutionFactory*; local: *; };" + "{ global: *CreateSuperResolutionFactory*; local: *; };" ) target_link_options(Diligent-SuperResolution-shared PRIVATE LINKER:--version-script=export.map) endif() diff --git a/Graphics/SuperResolution/include/SuperResolutionBase.hpp b/Graphics/SuperResolution/include/SuperResolutionBase.hpp new file mode 100644 index 0000000000..34ecec4327 --- /dev/null +++ b/Graphics/SuperResolution/include/SuperResolutionBase.hpp @@ -0,0 +1,86 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +#include "SuperResolution.h" +#include "ObjectBase.hpp" + +#include +#include + +namespace Diligent +{ + +class SuperResolutionBase : public ObjectBase +{ +public: + using TBase = ObjectBase; + + SuperResolutionBase(IReferenceCounters* pRefCounters, + const SuperResolutionDesc& Desc) : + TBase{pRefCounters}, + m_Name{Desc.Name != nullptr ? Desc.Name : ""}, + m_Desc{Desc} + { + m_Desc.Name = m_Name.c_str(); + } + + IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolution, TBase) + + virtual const SuperResolutionDesc& DILIGENT_CALL_TYPE GetDesc() const override final + { + return m_Desc; + } + + virtual void DILIGENT_CALL_TYPE GetJitterOffset(Uint32 Index, float& JitterX, float& JitterY) const override final + { + if (!m_JitterPattern.empty()) + { + const Uint32 WrappedIndex = Index % static_cast(m_JitterPattern.size()); + JitterX = m_JitterPattern[WrappedIndex].X; + JitterY = m_JitterPattern[WrappedIndex].Y; + } + else + { + JitterX = 0.0f; + JitterY = 0.0f; + } + } + +protected: + struct JitterOffset + { + float X = 0.0f; + float Y = 0.0f; + }; + + const std::string m_Name; + SuperResolutionDesc m_Desc; + std::vector m_JitterPattern; +}; + +} // namespace Diligent diff --git a/Graphics/SuperResolution/include/SuperResolutionInternal.hpp b/Graphics/SuperResolution/include/SuperResolutionInternal.hpp new file mode 100644 index 0000000000..8ce66908fa --- /dev/null +++ b/Graphics/SuperResolution/include/SuperResolutionInternal.hpp @@ -0,0 +1,47 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +#include "SuperResolutionFactory.h" + +#include +#include + +namespace Diligent +{ + +enum SUPER_RESOLUTION_BACKEND : Uint8 +{ + SUPER_RESOLUTION_BACKEND_D3D12_DSR, + SUPER_RESOLUTION_BACKEND_METAL_FX, + SUPER_RESOLUTION_BACKEND_SOFTWARE, + SUPER_RESOLUTION_BACKEND_COUNT +}; + +using SuperResolutionVariants = std::array, SUPER_RESOLUTION_BACKEND_COUNT>; + +} // namespace Diligent diff --git a/Graphics/SuperResolution/interface/SuperResolution.h b/Graphics/SuperResolution/interface/SuperResolution.h index 9cee8356bd..3e9e4cb006 100644 --- a/Graphics/SuperResolution/interface/SuperResolution.h +++ b/Graphics/SuperResolution/interface/SuperResolution.h @@ -29,7 +29,7 @@ /// \file /// Defines Diligent::ISuperResolution interface and related data structures -#include "../../GraphicsEngine/interface/DeviceObject.h" +#include "../../../Primitives/interface/Object.h" #include "../../GraphicsEngine/interface/GraphicsTypes.h" #include "../../GraphicsEngine/interface/TextureView.h" #include "../../GraphicsEngine/interface/DeviceContext.h" @@ -62,7 +62,10 @@ DEFINE_FLAG_ENUM_OPERATORS(SUPER_RESOLUTION_FLAGS) /// This structure describes the super resolution upscaler object and is part of the creation /// parameters given to ISuperResolutionFactory::CreateSuperResolution(). -struct SuperResolutionDesc DILIGENT_DERIVE(DeviceObjectAttribs) +struct SuperResolutionDesc +{ + /// Object name. + const Char* Name DEFAULT_INITIALIZER(nullptr); /// Unique identifier of the super resolution variant to create. /// @@ -107,13 +110,6 @@ struct SuperResolutionDesc DILIGENT_DERIVE(DeviceObjectAttribs) /// Optional. Used for temporal upscaling to guide the denoiser for areas with inaccurate motion information (e.g., alpha-blended objects). TEXTURE_FORMAT ReactiveMaskFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); - /// Ignore history mask texture format. - /// - /// Optional. Used for temporal upscaling to indicate regions where temporal history - /// should be completely discarded (binary mask: 0 = use history, 1 = ignore history). - /// Unlike the reactive mask which provides proportional control, this is a binary decision. - TEXTURE_FORMAT IgnoreHistoryMaskFormat DEFAULT_INITIALIZER(TEX_FORMAT_UNKNOWN); - /// Exposure scale texture format. /// /// Optional. When auto-exposure is disabled, specifies the format of the 1x1 exposure @@ -178,7 +174,7 @@ struct ExecuteSuperResolutionAttribs /// where temporal history should be completely discarded. /// Unlike the reactive mask which provides proportional control, /// this is a binary decision (discard or keep). - /// Only used when SuperResolutionDesc::IgnoreHistoryMaskFormat != TEX_FORMAT_UNKNOWN. + /// Format must be TEX_FORMAT_R8_UINT. ITextureView* pIgnoreHistoryMaskTextureSRV DEFAULT_INITIALIZER(nullptr); /// Jitter offset X applied to the projection matrix (in pixels). @@ -263,8 +259,8 @@ typedef struct ExecuteSuperResolutionAttribs ExecuteSuperResolutionAttribs; #define DILIGENT_INTERFACE_NAME ISuperResolution #include "../../../Primitives/interface/DefineInterfaceHelperMacros.h" -#define ISuperResolutionInclusiveMethods \ - IDeviceObjectInclusiveMethods; \ +#define ISuperResolutionInclusiveMethods \ + IObjectInclusiveMethods; \ ISuperResolutionMethods SuperResolution /// Super resolution upscaler interface. @@ -272,26 +268,24 @@ typedef struct ExecuteSuperResolutionAttribs ExecuteSuperResolutionAttribs; /// The super resolution object encapsulates a hardware-accelerated or software-based super resolution /// effect (e.g., MetalFX on Metal, DirectSR on D3D12). /// It is created via ISuperResolutionFactory::CreateSuperResolution(). -DILIGENT_BEGIN_INTERFACE(ISuperResolution, IDeviceObject) +DILIGENT_BEGIN_INTERFACE(ISuperResolution, IObject) { -#if DILIGENT_CPP_INTERFACE /// Returns the super resolution description used to create the object. - virtual const SuperResolutionDesc& METHOD(GetDesc)() const override = 0; -#endif + VIRTUAL const SuperResolutionDesc REF METHOD(GetDesc)(THIS) CONST PURE; /// Returns the optimal jitter offset for the given frame index. - /// \param [in] Index - Frame index. The sequence wraps automatically. - /// \param [out] pJitterX - Jitter offset X in pixel space, typically in [-0.5, 0.5] range. - /// \param [out] pJitterY - Jitter offset Y in pixel space, typically in [-0.5, 0.5] range. + /// \param [in] Index - Frame index. The sequence wraps automatically. + /// \param [out] JitterX - Jitter offset X in pixel space, typically in [-0.5, 0.5] range. + /// \param [out] JitterY - Jitter offset Y in pixel space, typically in [-0.5, 0.5] range. /// /// For temporal upscaling, the upscaler provides a recommended jitter pattern /// (e.g. Halton sequence) that should be applied to the projection matrix each frame. /// For spatial upscaling, both values are set to zero. VIRTUAL void METHOD(GetJitterOffset)(THIS_ Uint32 Index, - float* pJitterX, - float* pJitterY) CONST PURE; + float REF JitterX, + float REF JitterY) CONST PURE; /// Executes the super resolution upscaler. @@ -315,8 +309,7 @@ DILIGENT_END_INTERFACE #if DILIGENT_C_INTERFACE // clang-format off -# define ISuperResolution_GetDesc(This) (const struct SuperResolutionDesc*)IDeviceObject_GetDesc(This) - +# define ISuperResolution_GetDesc(This) CALL_IFACE_METHOD(SuperResolution, GetDesc, This) # define ISuperResolution_GetJitterOffset(This, ...) CALL_IFACE_METHOD(SuperResolution, GetJitterOffset, This, __VA_ARGS__) # define ISuperResolution_Execute(This, ...) CALL_IFACE_METHOD(SuperResolution, Execute, This, __VA_ARGS__) diff --git a/Graphics/SuperResolution/interface/SuperResolutionFactory.h b/Graphics/SuperResolution/interface/SuperResolutionFactory.h index 4f3d1e56cc..a79369a7dc 100644 --- a/Graphics/SuperResolution/interface/SuperResolutionFactory.h +++ b/Graphics/SuperResolution/interface/SuperResolutionFactory.h @@ -233,11 +233,14 @@ typedef struct SuperResolutionSourceSettingsAttribs SuperResolutionSourceSetting // clang-format off /// SuperResolution factory interface +/// +/// The factory is created per render device using CreateSuperResolutionFactory(). +/// It enumerates available super resolution backends, queries optimal settings, +/// and creates upscaler instances for the device it was created with. DILIGENT_BEGIN_INTERFACE(ISuperResolutionFactory, IObject) { - /// Enumerates the supported super resolution variants for the given render device. + /// Enumerates the supported super resolution variants. - /// \param [in] pDevice - Render device to query the supported super resolution variants for. /// \param [in, out] NumVariants - Number of super resolution variants. If `Variants` is null, this /// parameter is used to return the number of supported variants. /// If `Variants` is not null, this parameter should contain the maximum number @@ -246,14 +249,12 @@ DILIGENT_BEGIN_INTERFACE(ISuperResolutionFactory, IObject) /// \param [out] Variants - Array to receive the supported super resolution variants. /// Each variant is described by SuperResolutionInfo structure. VIRTUAL void METHOD(EnumerateVariants)(THIS_ - IRenderDevice* pDevice, Uint32 REF NumVariants, SuperResolutionInfo* Variants) PURE; /// Returns the optimal source (input) settings for super resolution upscaling. - /// \param [in] pDevice - Render device to query the optimal source settings for. /// \param [in] Attribs - Attributes, see Diligent::SuperResolutionSourceSettingsAttribs for details. /// \param [out] Settings - On success, receives the optimal source settings, /// see Diligent::SuperResolutionSourceSettings for details. @@ -262,14 +263,12 @@ DILIGENT_BEGIN_INTERFACE(ISuperResolutionFactory, IObject) /// Use this method to determine the optimal render resolution before creating /// the upscaler object. VIRTUAL void METHOD(GetSourceSettings)(THIS_ - IRenderDevice* pDevice, const SuperResolutionSourceSettingsAttribs REF Attribs, SuperResolutionSourceSettings REF Settings) CONST PURE; - /// Creates a new upscaler object. + /// Creates a new upscaler object. - /// \param [in] pDevice - Render device to create the upscaler for. /// \param [in] Desc - Super resolution upscaler description, see Diligent::SuperResolutionDesc for details. /// \param [out] ppUpscaler - Address of the memory location where a pointer to the /// super resolution upscaler interface will be written. @@ -279,7 +278,6 @@ DILIGENT_BEGIN_INTERFACE(ISuperResolutionFactory, IObject) /// \remarks On backends that don't support hardware upscaling, the method will /// return nullptr. VIRTUAL void METHOD(CreateSuperResolution)(THIS_ - IRenderDevice* pDevice, const SuperResolutionDesc REF Desc, ISuperResolution** ppUpscaler) PURE; @@ -324,4 +322,12 @@ DILIGENT_END_INTERFACE #endif +/// Creates a super resolution factory for the specified render device. + +/// \param [in] pDevice - Render device to create the factory for. +/// \param [out] ppFactory - Address of the memory location where a pointer to the +/// super resolution factory interface will be written. +void DILIGENT_GLOBAL_FUNCTION(CreateSuperResolutionFactory)(IRenderDevice* pDevice, + ISuperResolutionFactory** ppFactory); + DILIGENT_END_NAMESPACE // namespace Diligent diff --git a/Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h b/Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h index 2da0dff36f..8c98f2c5ff 100644 --- a/Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h +++ b/Graphics/SuperResolution/interface/SuperResolutionFactoryLoader.h @@ -44,41 +44,45 @@ DILIGENT_BEGIN_NAMESPACE(Diligent) -typedef struct ISuperResolutionFactory* (*GetSuperResolutionFactoryType)(); +typedef void (*CreateSuperResolutionFactoryType)(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); #if DILIGENT_SUPER_RESOLUTION_EXPLICIT_LOAD -inline GetSuperResolutionFactoryType DILIGENT_GLOBAL_FUNCTION(LoadSuperResolutionFactory)() +inline CreateSuperResolutionFactoryType DILIGENT_GLOBAL_FUNCTION(LoadSuperResolutionFactory)() { - static GetSuperResolutionFactoryType GetFactoryFunc = NULL; - if (GetFactoryFunc == NULL) + static CreateSuperResolutionFactoryType CreateFactoryFunc = NULL; + if (CreateFactoryFunc == NULL) { - GetFactoryFunc = (GetSuperResolutionFactoryType)LoadEngineDll("SuperResolution", "GetSuperResolutionFactory"); + CreateFactoryFunc = (CreateSuperResolutionFactoryType)LoadEngineDll("SuperResolution", "CreateSuperResolutionFactory"); } - return GetFactoryFunc; + return CreateFactoryFunc; } #else API_QUALIFIER -struct ISuperResolutionFactory* DILIGENT_GLOBAL_FUNCTION(GetSuperResolutionFactory)(); +void DILIGENT_GLOBAL_FUNCTION(CreateSuperResolutionFactory)(IRenderDevice* pDevice, + ISuperResolutionFactory** ppFactory); #endif -/// Loads the SuperResolution implementation DLL if necessary and returns the SuperResolution factory. -inline struct ISuperResolutionFactory* DILIGENT_GLOBAL_FUNCTION(LoadAndGetSuperResolutionFactory)() +/// Loads the SuperResolution implementation DLL if necessary and creates a SuperResolution factory +/// for the specified render device. +inline void DILIGENT_GLOBAL_FUNCTION(LoadAndCreateSuperResolutionFactory)(IRenderDevice* pDevice, + ISuperResolutionFactory** ppFactory) { - GetSuperResolutionFactoryType GetFactoryFunc = NULL; + CreateSuperResolutionFactoryType CreateFactoryFunc = NULL; #if DILIGENT_SUPER_RESOLUTION_EXPLICIT_LOAD - GetFactoryFunc = DILIGENT_GLOBAL_FUNCTION(LoadSuperResolutionFactory)(); - if (GetFactoryFunc == NULL) + CreateFactoryFunc = DILIGENT_GLOBAL_FUNCTION(LoadSuperResolutionFactory)(); + if (CreateFactoryFunc == NULL) { - return NULL; + *ppFactory = NULL; + return; } #else - GetFactoryFunc = DILIGENT_GLOBAL_FUNCTION(GetSuperResolutionFactory); + CreateFactoryFunc = DILIGENT_GLOBAL_FUNCTION(CreateSuperResolutionFactory); #endif - return GetFactoryFunc(); + CreateFactoryFunc(pDevice, ppFactory); } DILIGENT_END_NAMESPACE // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolution.def b/Graphics/SuperResolution/src/SuperResolution.def index 8395d1fb20..ffa1fdd26d 100644 --- a/Graphics/SuperResolution/src/SuperResolution.def +++ b/Graphics/SuperResolution/src/SuperResolution.def @@ -1,2 +1,2 @@ EXPORTS - GetSuperResolutionFactory=Diligent_GetSuperResolutionFactory \ No newline at end of file + CreateSuperResolutionFactory=Diligent_CreateSuperResolutionFactory diff --git a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp index fa235cf722..0c7c985b85 100644 --- a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp @@ -26,9 +26,12 @@ #include "SuperResolutionFactory.h" #include "SuperResolutionFactoryLoader.h" -#include "DummyReferenceCounters.hpp" +#include "ObjectBase.hpp" +#include "RefCntAutoPtr.hpp" #include "EngineMemory.h" #include "PlatformDebug.hpp" +#include "DebugUtilities.hpp" +#include "SuperResolutionInternal.hpp" namespace Diligent { @@ -36,87 +39,148 @@ namespace Diligent namespace { -class SuperResolutionFactoryImpl final : public ISuperResolutionFactory +class SuperResolutionFactoryImpl final : public ObjectBase { public: - static SuperResolutionFactoryImpl* GetInstance() - { - static SuperResolutionFactoryImpl TheFactory; - return &TheFactory; - } - - SuperResolutionFactoryImpl() : - m_RefCounters{*this} - {} - - virtual void DILIGENT_CALL_TYPE QueryInterface(const INTERFACE_ID& IID, IObject** ppInterface) override final; - - virtual ReferenceCounterValueType DILIGENT_CALL_TYPE AddRef() override final - { - return m_RefCounters.AddStrongRef(); - } + using TBase = ObjectBase; - virtual ReferenceCounterValueType DILIGENT_CALL_TYPE Release() override final - { - return m_RefCounters.ReleaseStrongRef(); - } + SuperResolutionFactoryImpl(IReferenceCounters* pRefCounters, IRenderDevice* pDevice); - virtual IReferenceCounters* DILIGENT_CALL_TYPE GetReferenceCounters() const override final - { - return const_cast(static_cast(&m_RefCounters)); - } + IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolutionFactory, TBase) - virtual void DILIGENT_CALL_TYPE EnumerateVariants(IRenderDevice* pDevice, Uint32& NumVariants, SuperResolutionInfo* Variants) override final; + virtual void DILIGENT_CALL_TYPE EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) override final; - virtual void DILIGENT_CALL_TYPE GetSourceSettings(IRenderDevice* pDevice, - const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings) const override final; + virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const override final; - virtual void DILIGENT_CALL_TYPE CreateSuperResolution(IRenderDevice* pDevice, - const SuperResolutionDesc& Desc, - ISuperResolution** ppUpscaler) override final; + virtual void DILIGENT_CALL_TYPE CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final; - virtual void DILIGENT_CALL_TYPE - SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final; + virtual void DILIGENT_CALL_TYPE SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final; virtual void DILIGENT_CALL_TYPE SetBreakOnError(bool BreakOnError) const override final; virtual void DILIGENT_CALL_TYPE SetMemoryAllocator(IMemoryAllocator* pAllocator) const override final; private: - DummyReferenceCounters m_RefCounters; + void PopulateVariants(); + +private: + SUPER_RESOLUTION_BACKEND FindVariant(const INTERFACE_ID& VariantId) const; + + RefCntAutoPtr m_pDevice; + SuperResolutionVariants m_Variants{}; }; -void SuperResolutionFactoryImpl::QueryInterface(const INTERFACE_ID& IID, IObject** ppInterface) +SuperResolutionFactoryImpl::SuperResolutionFactoryImpl(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : + TBase{pRefCounters}, + m_pDevice{pDevice} { - if (ppInterface == nullptr) - return; + PopulateVariants(); +} - *ppInterface = nullptr; - if (IID == IID_Unknown || IID == IID_SuperResolutionFactory) +void SuperResolutionFactoryImpl::PopulateVariants() +{ +} + +SUPER_RESOLUTION_BACKEND SuperResolutionFactoryImpl::FindVariant(const INTERFACE_ID& VariantId) const +{ + for (Uint32 BackendIdx = 0; BackendIdx < SUPER_RESOLUTION_BACKEND_COUNT; ++BackendIdx) { - *ppInterface = this; - (*ppInterface)->AddRef(); + for (const SuperResolutionInfo& Info : m_Variants[BackendIdx]) + { + if (Info.VariantId == VariantId) + return static_cast(BackendIdx); + } } + return SUPER_RESOLUTION_BACKEND_COUNT; } -void SuperResolutionFactoryImpl::EnumerateVariants(IRenderDevice* pDevice, Uint32& NumVariants, SuperResolutionInfo* Variants) +void SuperResolutionFactoryImpl::EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) { - NumVariants = 0; + Uint32 Count = 0; + for (Uint32 BackendIdx = 0; BackendIdx < SUPER_RESOLUTION_BACKEND_COUNT; ++BackendIdx) + Count += static_cast(m_Variants[BackendIdx].size()); + + if (Variants == nullptr) + { + NumVariants = Count; + return; + } + + const Uint32 MaxVariants = NumVariants; + NumVariants = 0; + for (Uint32 BackendIdx = 0; BackendIdx < SUPER_RESOLUTION_BACKEND_COUNT; ++BackendIdx) + { + for (const SuperResolutionInfo& Info : m_Variants[BackendIdx]) + { + if (NumVariants >= MaxVariants) + break; + Variants[NumVariants++] = Info; + } + } } -void SuperResolutionFactoryImpl::GetSourceSettings(IRenderDevice* pDevice, - const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings) const +void SuperResolutionFactoryImpl::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const { Settings = {}; + + const SUPER_RESOLUTION_BACKEND Backend = FindVariant(Attribs.VariantId); + if (Backend == SUPER_RESOLUTION_BACKEND_COUNT) + { + LOG_WARNING_MESSAGE("Super resolution variant not found for the specified VariantId"); + return; + } + + switch (Backend) + { + case SUPER_RESOLUTION_BACKEND_D3D12_DSR: + case SUPER_RESOLUTION_BACKEND_METAL_FX: + case SUPER_RESOLUTION_BACKEND_SOFTWARE: + LOG_WARNING_MESSAGE("Unknown super resolution backend"); + break; + + default: + LOG_WARNING_MESSAGE("Unknown super resolution backend"); + break; + } } -void SuperResolutionFactoryImpl::CreateSuperResolution(IRenderDevice* pDevice, - const SuperResolutionDesc& Desc, - ISuperResolution** ppUpscaler) +void SuperResolutionFactoryImpl::CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) { + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + if (ppUpscaler == nullptr) + return; + + DEV_CHECK_ERR(*ppUpscaler == nullptr, "*ppUpscaler is not null. Overwriting reference to existing object may cause memory leaks"); + + *ppUpscaler = nullptr; + + const SUPER_RESOLUTION_BACKEND Backend = FindVariant(Desc.VariantId); + if (Backend == SUPER_RESOLUTION_BACKEND_COUNT) + { + LOG_ERROR_MESSAGE("Super resolution variant not found for the specified VariantId. Call EnumerateVariants() to get valid variant IDs."); + return; + } + + try + { + switch (Backend) + { + case SUPER_RESOLUTION_BACKEND_D3D12_DSR: + case SUPER_RESOLUTION_BACKEND_METAL_FX: + case SUPER_RESOLUTION_BACKEND_SOFTWARE: + LOG_WARNING_MESSAGE("Unknown super resolution backend"); + break; + + default: + LOG_ERROR_MESSAGE("Unknown super resolution backend"); + break; + } + } + catch (...) + { + LOG_ERROR("Failed to create super resolution upscaler '", (Desc.Name ? Desc.Name : ""), "'"); + } } void SuperResolutionFactoryImpl::SetMessageCallback(DebugMessageCallbackType MessageCallback) const @@ -137,10 +201,23 @@ void SuperResolutionFactoryImpl::SetMemoryAllocator(IMemoryAllocator* pAllocator } // namespace -API_QUALIFIER -ISuperResolutionFactory* GetSuperResolutionFactory() +API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) { - return SuperResolutionFactoryImpl::GetInstance(); + DEV_CHECK_ERR(ppFactory != nullptr, "ppFactory must not be null"); + if (ppFactory == nullptr) + return; + + *ppFactory = nullptr; + + try + { + SuperResolutionFactoryImpl* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryImpl instance", SuperResolutionFactoryImpl)(pDevice); + pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); + } + catch (...) + { + LOG_ERROR("Failed to create super resolution factory"); + } } } // namespace Diligent @@ -148,8 +225,9 @@ ISuperResolutionFactory* GetSuperResolutionFactory() extern "C" { API_QUALIFIER - Diligent::ISuperResolutionFactory* Diligent_GetSuperResolutionFactory() + void Diligent_CreateSuperResolutionFactory(Diligent::IRenderDevice* pDevice, + Diligent::ISuperResolutionFactory** ppFactory) { - return Diligent::GetSuperResolutionFactory(); + Diligent::CreateSuperResolutionFactory(pDevice, ppFactory); } } diff --git a/Tests/DiligentCoreAPITest/CMakeLists.txt b/Tests/DiligentCoreAPITest/CMakeLists.txt index ef5c7fe764..89b20f6dda 100644 --- a/Tests/DiligentCoreAPITest/CMakeLists.txt +++ b/Tests/DiligentCoreAPITest/CMakeLists.txt @@ -35,6 +35,11 @@ if(NOT ARCHIVER_SUPPORTED) list(REMOVE_ITEM SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/RenderStateCacheTest.cpp) endif() +if(NOT SUPER_RESOLUTION_SUPPORTED) + list(REMOVE_ITEM SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/SuperResolutionTest.cpp) + list(REMOVE_ITEM SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/c_interface/SuperResolution_C_Test.c) +endif() + if(D3D11_SUPPORTED) file(GLOB D3D11_SOURCE LIST_DIRECTORIES false src/D3D11/*) file(GLOB D3D11_INCLUDE LIST_DIRECTORIES false include/D3D11/*) @@ -114,6 +119,10 @@ PRIVATE Diligent-ShaderTools ) +if(SUPER_RESOLUTION_SUPPORTED) + target_link_libraries(DiligentCoreAPITest PRIVATE Diligent-SuperResolution-static) +endif() + if(TARGET Diligent-HLSL2GLSLConverterLib) target_link_libraries(DiligentCoreAPITest PRIVATE Diligent-HLSL2GLSLConverterLib) endif() diff --git a/Tests/DiligentCoreAPITest/src/c_interface/SuperResolution_C_Test.c b/Tests/DiligentCoreAPITest/src/c_interface/SuperResolution_C_Test.c new file mode 100644 index 0000000000..b7a5c40fd0 --- /dev/null +++ b/Tests/DiligentCoreAPITest/src/c_interface/SuperResolution_C_Test.c @@ -0,0 +1,85 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolution.h" +#include "SuperResolutionFactory.h" + +int TestObjectCInterface(struct IObject* pObject); + +int TestSuperResolutionCInterface(struct ISuperResolution* pUpscaler) +{ + IObject* pUnknown = NULL; + ReferenceCounterValueType RefCnt1 = 0, RefCnt2 = 0; + + const SuperResolutionDesc* pUpscalerDesc = NULL; + float JitterX = 0.0f; + float JitterY = 0.0f; + + int num_errors = + TestObjectCInterface((struct IObject*)pUpscaler); + + IObject_QueryInterface(pUpscaler, &IID_Unknown, &pUnknown); + if (pUnknown != NULL) + IObject_Release(pUnknown); + else + ++num_errors; + + RefCnt1 = IObject_AddRef(pUpscaler); + if (RefCnt1 <= 1) + ++num_errors; + RefCnt2 = IObject_Release(pUpscaler); + if (RefCnt2 <= 0) + ++num_errors; + if (RefCnt2 != RefCnt1 - 1) + ++num_errors; + + pUpscalerDesc = ISuperResolution_GetDesc(pUpscaler); + if (pUpscalerDesc == NULL) + ++num_errors; + if (pUpscalerDesc->Name == NULL) + ++num_errors; + if (pUpscalerDesc->InputWidth == 0) + ++num_errors; + if (pUpscalerDesc->InputHeight == 0) + ++num_errors; + + ISuperResolution_GetJitterOffset(pUpscaler, 0, &JitterX, &JitterY); + (void)JitterX; + (void)JitterY; + + return num_errors; +} + +int TestSuperResolutionFactoryCInterface(struct ISuperResolutionFactory* pFactory) +{ + int num_errors = 0; + Uint32 NumVariants = 0; + + ISuperResolutionFactory_EnumerateVariants(pFactory, &NumVariants, NULL); + (void)NumVariants; + + return num_errors; +} diff --git a/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c b/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c index cbb14786a0..27b6a6a7d2 100644 --- a/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c +++ b/Tests/IncludeTest/SuperResolution/SuperResolutionFactoryH_test.c @@ -28,9 +28,8 @@ void TestSuperResolutionFactory_CInterface(ISuperResolutionFactory* pSuperResolutionFactory) { - IRenderDevice* pDevice = NULL; - ISuperResolutionFactory_EnumerateVariants(pSuperResolutionFactory, pDevice, (Uint32*)NULL, (SuperResolutionInfo*)NULL); - ISuperResolutionFactory_GetSourceSettings(pSuperResolutionFactory, pDevice, (const SuperResolutionSourceSettingsAttribs*)NULL, (SuperResolutionSourceSettings*)NULL); - ISuperResolutionFactory_CreateSuperResolution(pSuperResolutionFactory, pDevice, (const SuperResolutionDesc*)NULL, (ISuperResolution**)NULL); + ISuperResolutionFactory_EnumerateVariants(pSuperResolutionFactory, (Uint32*)NULL, (SuperResolutionInfo*)NULL); + ISuperResolutionFactory_GetSourceSettings(pSuperResolutionFactory, (const SuperResolutionSourceSettingsAttribs*)NULL, (SuperResolutionSourceSettings*)NULL); + ISuperResolutionFactory_CreateSuperResolution(pSuperResolutionFactory, (const SuperResolutionDesc*)NULL, (ISuperResolution**)NULL); ISuperResolutionFactory_SetMessageCallback(pSuperResolutionFactory, (DebugMessageCallbackType)NULL); } diff --git a/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c b/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c index 963b72cf0a..60abe3091a 100644 --- a/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c +++ b/Tests/IncludeTest/SuperResolution/SuperResolutionH_test.c @@ -28,6 +28,7 @@ void TestSuperResolution_CInterface(ISuperResolution* pUpscaler) { + ISuperResolution_GetDesc(pUpscaler); ISuperResolution_GetJitterOffset(pUpscaler, 0, (float*)NULL, (float*)NULL); ISuperResolution_Execute(pUpscaler, (const ExecuteSuperResolutionAttribs*)NULL); } From 40f9051e0b635bcca0eab388df6c761bf81ffaed Mon Sep 17 00:00:00 2001 From: assiduous Date: Sun, 15 Mar 2026 15:52:29 -0700 Subject: [PATCH 04/14] Refactor super-resolution factory to use one subclass per device type --- Graphics/SuperResolution/CMakeLists.txt | 10 +- ...nal.hpp => SuperResolutionFactoryBase.hpp} | 29 ++- .../src/SuperResolutionFactory.cpp | 233 ------------------ .../src/SuperResolutionFactoryBase.cpp | 122 +++++++++ .../src/SuperResolutionFactoryD3D12.cpp | 84 +++++++ 5 files changed, 234 insertions(+), 244 deletions(-) rename Graphics/SuperResolution/include/{SuperResolutionInternal.hpp => SuperResolutionFactoryBase.hpp} (61%) delete mode 100644 Graphics/SuperResolution/src/SuperResolutionFactory.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index 8ad4e3314a..0d7d2e41e7 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -6,7 +6,7 @@ project(Diligent-SuperResolution CXX) set(INCLUDE include/SuperResolutionBase.hpp - include/SuperResolutionInternal.hpp + include/SuperResolutionFactoryBase.hpp ) set(INTERFACE @@ -16,11 +16,15 @@ set(INTERFACE ) set(SOURCE - src/SuperResolutionFactory.cpp + src/SuperResolutionFactoryBase.cpp ) if(D3D12_SUPPORTED) - list(APPEND SOURCE src/SuperResolution_D3D12.cpp) + list(APPEND + SOURCE + src/SuperResolution_D3D12.cpp + src/SuperResolutionFactoryD3D12.cpp + ) endif() set(DLL_SOURCE diff --git a/Graphics/SuperResolution/include/SuperResolutionInternal.hpp b/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp similarity index 61% rename from Graphics/SuperResolution/include/SuperResolutionInternal.hpp rename to Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp index 8ce66908fa..69cb1a7f39 100644 --- a/Graphics/SuperResolution/include/SuperResolutionInternal.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp @@ -27,21 +27,34 @@ #pragma once #include "SuperResolutionFactory.h" +#include "ObjectBase.hpp" +#include "RefCntAutoPtr.hpp" -#include #include namespace Diligent { -enum SUPER_RESOLUTION_BACKEND : Uint8 +class SuperResolutionFactoryBase : public ObjectBase { - SUPER_RESOLUTION_BACKEND_D3D12_DSR, - SUPER_RESOLUTION_BACKEND_METAL_FX, - SUPER_RESOLUTION_BACKEND_SOFTWARE, - SUPER_RESOLUTION_BACKEND_COUNT -}; +public: + using TBase = ObjectBase; + + SuperResolutionFactoryBase(IReferenceCounters* pRefCounters, IRenderDevice* pDevice); + + IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolutionFactory, TBase) + + virtual void DILIGENT_CALL_TYPE EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) override final; -using SuperResolutionVariants = std::array, SUPER_RESOLUTION_BACKEND_COUNT>; + virtual void DILIGENT_CALL_TYPE SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final; + + virtual void DILIGENT_CALL_TYPE SetBreakOnError(bool BreakOnError) const override final; + + virtual void DILIGENT_CALL_TYPE SetMemoryAllocator(IMemoryAllocator* pAllocator) const override final; + +private: + RefCntAutoPtr m_pDevice; + std::vector m_Variants{}; +}; } // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp deleted file mode 100644 index 0c7c985b85..0000000000 --- a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#include "SuperResolutionFactory.h" -#include "SuperResolutionFactoryLoader.h" -#include "ObjectBase.hpp" -#include "RefCntAutoPtr.hpp" -#include "EngineMemory.h" -#include "PlatformDebug.hpp" -#include "DebugUtilities.hpp" -#include "SuperResolutionInternal.hpp" - -namespace Diligent -{ - -namespace -{ - -class SuperResolutionFactoryImpl final : public ObjectBase -{ -public: - using TBase = ObjectBase; - - SuperResolutionFactoryImpl(IReferenceCounters* pRefCounters, IRenderDevice* pDevice); - - IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolutionFactory, TBase) - - virtual void DILIGENT_CALL_TYPE EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) override final; - - virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const override final; - - virtual void DILIGENT_CALL_TYPE CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final; - - virtual void DILIGENT_CALL_TYPE SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final; - - virtual void DILIGENT_CALL_TYPE SetBreakOnError(bool BreakOnError) const override final; - - virtual void DILIGENT_CALL_TYPE SetMemoryAllocator(IMemoryAllocator* pAllocator) const override final; - -private: - void PopulateVariants(); - -private: - SUPER_RESOLUTION_BACKEND FindVariant(const INTERFACE_ID& VariantId) const; - - RefCntAutoPtr m_pDevice; - SuperResolutionVariants m_Variants{}; -}; - - -SuperResolutionFactoryImpl::SuperResolutionFactoryImpl(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : - TBase{pRefCounters}, - m_pDevice{pDevice} -{ - PopulateVariants(); -} - -void SuperResolutionFactoryImpl::PopulateVariants() -{ -} - -SUPER_RESOLUTION_BACKEND SuperResolutionFactoryImpl::FindVariant(const INTERFACE_ID& VariantId) const -{ - for (Uint32 BackendIdx = 0; BackendIdx < SUPER_RESOLUTION_BACKEND_COUNT; ++BackendIdx) - { - for (const SuperResolutionInfo& Info : m_Variants[BackendIdx]) - { - if (Info.VariantId == VariantId) - return static_cast(BackendIdx); - } - } - return SUPER_RESOLUTION_BACKEND_COUNT; -} - -void SuperResolutionFactoryImpl::EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) -{ - Uint32 Count = 0; - for (Uint32 BackendIdx = 0; BackendIdx < SUPER_RESOLUTION_BACKEND_COUNT; ++BackendIdx) - Count += static_cast(m_Variants[BackendIdx].size()); - - if (Variants == nullptr) - { - NumVariants = Count; - return; - } - - const Uint32 MaxVariants = NumVariants; - NumVariants = 0; - for (Uint32 BackendIdx = 0; BackendIdx < SUPER_RESOLUTION_BACKEND_COUNT; ++BackendIdx) - { - for (const SuperResolutionInfo& Info : m_Variants[BackendIdx]) - { - if (NumVariants >= MaxVariants) - break; - Variants[NumVariants++] = Info; - } - } -} - -void SuperResolutionFactoryImpl::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const -{ - Settings = {}; - - const SUPER_RESOLUTION_BACKEND Backend = FindVariant(Attribs.VariantId); - if (Backend == SUPER_RESOLUTION_BACKEND_COUNT) - { - LOG_WARNING_MESSAGE("Super resolution variant not found for the specified VariantId"); - return; - } - - switch (Backend) - { - case SUPER_RESOLUTION_BACKEND_D3D12_DSR: - case SUPER_RESOLUTION_BACKEND_METAL_FX: - case SUPER_RESOLUTION_BACKEND_SOFTWARE: - LOG_WARNING_MESSAGE("Unknown super resolution backend"); - break; - - default: - LOG_WARNING_MESSAGE("Unknown super resolution backend"); - break; - } -} - -void SuperResolutionFactoryImpl::CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) -{ - DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); - if (ppUpscaler == nullptr) - return; - - DEV_CHECK_ERR(*ppUpscaler == nullptr, "*ppUpscaler is not null. Overwriting reference to existing object may cause memory leaks"); - - *ppUpscaler = nullptr; - - const SUPER_RESOLUTION_BACKEND Backend = FindVariant(Desc.VariantId); - if (Backend == SUPER_RESOLUTION_BACKEND_COUNT) - { - LOG_ERROR_MESSAGE("Super resolution variant not found for the specified VariantId. Call EnumerateVariants() to get valid variant IDs."); - return; - } - - try - { - switch (Backend) - { - case SUPER_RESOLUTION_BACKEND_D3D12_DSR: - case SUPER_RESOLUTION_BACKEND_METAL_FX: - case SUPER_RESOLUTION_BACKEND_SOFTWARE: - LOG_WARNING_MESSAGE("Unknown super resolution backend"); - break; - - default: - LOG_ERROR_MESSAGE("Unknown super resolution backend"); - break; - } - } - catch (...) - { - LOG_ERROR("Failed to create super resolution upscaler '", (Desc.Name ? Desc.Name : ""), "'"); - } -} - -void SuperResolutionFactoryImpl::SetMessageCallback(DebugMessageCallbackType MessageCallback) const -{ - SetDebugMessageCallback(MessageCallback); -} - -void SuperResolutionFactoryImpl::SetBreakOnError(bool BreakOnError) const -{ - PlatformDebug::SetBreakOnError(BreakOnError); -} - -void SuperResolutionFactoryImpl::SetMemoryAllocator(IMemoryAllocator* pAllocator) const -{ - SetRawAllocator(pAllocator); -} - -} // namespace - - -API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) -{ - DEV_CHECK_ERR(ppFactory != nullptr, "ppFactory must not be null"); - if (ppFactory == nullptr) - return; - - *ppFactory = nullptr; - - try - { - SuperResolutionFactoryImpl* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryImpl instance", SuperResolutionFactoryImpl)(pDevice); - pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); - } - catch (...) - { - LOG_ERROR("Failed to create super resolution factory"); - } -} - -} // namespace Diligent - -extern "C" -{ - API_QUALIFIER - void Diligent_CreateSuperResolutionFactory(Diligent::IRenderDevice* pDevice, - Diligent::ISuperResolutionFactory** ppFactory) - { - Diligent::CreateSuperResolutionFactory(pDevice, ppFactory); - } -} diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp new file mode 100644 index 0000000000..ec52f53d9d --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp @@ -0,0 +1,122 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionFactoryBase.hpp" + +#include "SuperResolutionFactoryLoader.h" + +#include "PlatformDebug.hpp" +#include "EngineMemory.h" + +namespace Diligent +{ + +#if D3D12_SUPPORTED +void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); +#endif + +SuperResolutionFactoryBase::SuperResolutionFactoryBase(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : + TBase{pRefCounters}, + m_pDevice{pDevice} +{ +} + +void SuperResolutionFactoryBase::EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) +{ + if (Variants == nullptr) + { + NumVariants = static_cast(m_Variants.size()); + return; + } + + NumVariants = std::min(NumVariants, static_cast(m_Variants.size())); + memcpy(Variants, m_Variants.data(), NumVariants * sizeof(SuperResolutionInfo)); +} + +void SuperResolutionFactoryBase::SetMessageCallback(DebugMessageCallbackType MessageCallback) const +{ + SetDebugMessageCallback(MessageCallback); +} + +void SuperResolutionFactoryBase::SetBreakOnError(bool BreakOnError) const +{ + PlatformDebug::SetBreakOnError(BreakOnError); +} + +void SuperResolutionFactoryBase::SetMemoryAllocator(IMemoryAllocator* pAllocator) const +{ + SetRawAllocator(pAllocator); +} + + +API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) +{ + if (ppFactory == nullptr) + { + DEV_ERROR("ppFactory must not be null"); + return; + } + DEV_CHECK_ERR(*ppFactory == nullptr, "ppFactory is not null. Overwriting it may cause memory leak"); + + *ppFactory = nullptr; + if (pDevice == nullptr) + { + DEV_ERROR("pDevice must not be null"); + return; + } + + RENDER_DEVICE_TYPE DeviceType = pDevice->GetDeviceInfo().Type; + try + { + switch (DeviceType) + { + case RENDER_DEVICE_TYPE_D3D12: +#if D3D12_SUPPORTED + CreateSuperResolutionFactoryD3D12(pDevice, ppFactory); +#endif + break; + + default: + LOG_ERROR_MESSAGE("Super resolution is not supported on this device type: ", DeviceType); + } + } + catch (...) + { + LOG_ERROR("Failed to create super resolution factory"); + } +} + +} // namespace Diligent + +extern "C" +{ + API_QUALIFIER + void Diligent_CreateSuperResolutionFactory(Diligent::IRenderDevice* pDevice, + Diligent::ISuperResolutionFactory** ppFactory) + { + Diligent::CreateSuperResolutionFactory(pDevice, ppFactory); + } +} diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp new file mode 100644 index 0000000000..6e9eaa3d0b --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp @@ -0,0 +1,84 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionFactoryBase.hpp" + +#include "EngineMemory.h" +#include "DebugUtilities.hpp" + +namespace Diligent +{ + +namespace +{ + +class SuperResolutionFactoryD3D12 final : public SuperResolutionFactoryBase +{ +public: + using TBase = SuperResolutionFactoryBase; + + SuperResolutionFactoryD3D12(IReferenceCounters* pRefCounters, IRenderDevice* pDevice); + + virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const override final; + + virtual void DILIGENT_CALL_TYPE CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final; + +private: + void PopulateVariants(); +}; + + +SuperResolutionFactoryD3D12::SuperResolutionFactoryD3D12(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : + TBase{pRefCounters, pDevice} +{ + PopulateVariants(); +} + +void SuperResolutionFactoryD3D12::PopulateVariants() +{ +} + +void SuperResolutionFactoryD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const +{ + Settings = {}; +} + +void SuperResolutionFactoryD3D12::CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) +{ +} + +} // namespace + +void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) +{ + VERIFY(pDevice != nullptr, "pDevice must not be null"); + VERIFY(pDevice->GetDeviceInfo().Type == RENDER_DEVICE_TYPE_D3D12, "Expected a D3D12 device"); + + SuperResolutionFactoryD3D12* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryD3D12 instance", SuperResolutionFactoryD3D12)(pDevice); + pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); +} + +} // namespace Diligent From 43e4555019fe228db872b485e6da1c708a4b2da3 Mon Sep 17 00:00:00 2001 From: assiduous Date: Sun, 15 Mar 2026 16:31:59 -0700 Subject: [PATCH 05/14] Create super-resolution factory for Metal device --- CMakeLists.txt | 10 ++++++---- .../SuperResolution/src/SuperResolutionFactoryBase.cpp | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 980206ca2e..39ab3cc9bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -201,12 +201,14 @@ elseif(PLATFORM_LINUX) set(ARCHIVER_SUPPORTED TRUE CACHE INTERNAL "Archiver is supported on Linux platform") target_compile_definitions(Diligent-PublicBuildSettings INTERFACE PLATFORM_LINUX=1) elseif(PLATFORM_MACOS) - set(GL_SUPPORTED TRUE CACHE INTERNAL "OpenGL is supported on MacOS platform") - set(VULKAN_SUPPORTED TRUE CACHE INTERNAL "Vulkan is enabled through MoltenVK on MacOS platform") - set(ARCHIVER_SUPPORTED TRUE CACHE INTERNAL "Archiver is supported on MacOS platform") + set(GL_SUPPORTED TRUE CACHE INTERNAL "OpenGL is supported on MacOS platform") + set(VULKAN_SUPPORTED TRUE CACHE INTERNAL "Vulkan is enabled through MoltenVK on MacOS platform") + set(ARCHIVER_SUPPORTED TRUE CACHE INTERNAL "Archiver is supported on MacOS platform") + set(SUPER_RESOLUTION_SUPPORTED TRUE CACHE INTERNAL "Super resolution is supported on MacOS platform") target_compile_definitions(Diligent-PublicBuildSettings INTERFACE PLATFORM_MACOS=1 PLATFORM_APPLE=1) elseif(PLATFORM_IOS) - set(GLES_SUPPORTED TRUE CACHE INTERNAL "OpenGLES is supported on iOS platform") + set(GLES_SUPPORTED TRUE CACHE INTERNAL "OpenGLES is supported on iOS platform") + set(SUPER_RESOLUTION_SUPPORTED TRUE CACHE INTERNAL "Super resolution is supported on iOS platform") target_compile_definitions(Diligent-PublicBuildSettings INTERFACE PLATFORM_IOS=1 PLATFORM_APPLE=1) elseif(PLATFORM_TVOS) target_compile_definitions(Diligent-PublicBuildSettings INTERFACE PLATFORM_TVOS=1 PLATFORM_APPLE=1) diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp index ec52f53d9d..f578c46345 100644 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp @@ -38,6 +38,10 @@ namespace Diligent void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); #endif +#if METAL_SUPPORTED +void CreateSuperResolutionFactoryMtl(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); +#endif + SuperResolutionFactoryBase::SuperResolutionFactoryBase(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : TBase{pRefCounters}, m_pDevice{pDevice} @@ -99,6 +103,12 @@ API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperRe #endif break; + case RENDER_DEVICE_TYPE_METAL: +#if METAL_SUPPORTED + CreateSuperResolutionFactoryMtl(pDevice, ppFactory); +#endif + break; + default: LOG_ERROR_MESSAGE("Super resolution is not supported on this device type: ", DeviceType); } From baab22df6fc3cea49d0eae794ab9a5b7e1416bc1 Mon Sep 17 00:00:00 2001 From: assiduous Date: Sun, 15 Mar 2026 16:39:39 -0700 Subject: [PATCH 06/14] Rename SuperResolution_D3D12.cpp to SuperResolutionD3D12.cpp --- Graphics/SuperResolution/CMakeLists.txt | 2 +- .../src/{SuperResolution_D3D12.cpp => SuperResolutionD3D12.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename Graphics/SuperResolution/src/{SuperResolution_D3D12.cpp => SuperResolutionD3D12.cpp} (100%) diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index 0d7d2e41e7..0468fe7bd6 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -22,7 +22,7 @@ set(SOURCE if(D3D12_SUPPORTED) list(APPEND SOURCE - src/SuperResolution_D3D12.cpp + src/SuperResolutionD3D12.cpp src/SuperResolutionFactoryD3D12.cpp ) endif() diff --git a/Graphics/SuperResolution/src/SuperResolution_D3D12.cpp b/Graphics/SuperResolution/src/SuperResolutionD3D12.cpp similarity index 100% rename from Graphics/SuperResolution/src/SuperResolution_D3D12.cpp rename to Graphics/SuperResolution/src/SuperResolutionD3D12.cpp From e5bfc095588cde52cc402a03ef4288b16ef42520 Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Thu, 19 Mar 2026 15:10:46 +0600 Subject: [PATCH 07/14] Add super-resolution support: DLSS (D3D11/D3D12/Vulkan), DirectSR (D3D12), MetalFX (Metal) --- .github/workflows/msvc_analysis.yml | 2 +- Graphics/SuperResolution/CMakeLists.txt | 96 +++- .../include/DLSSProviderD3D11.hpp | 58 +++ .../include/DLSSProviderD3D12.hpp | 58 +++ .../include/DLSSProviderVk.hpp | 58 +++ .../include/DSRProviderD3D12.hpp | 60 +++ .../include/SuperResolutionBase.hpp | 56 ++- .../include/SuperResolutionDLSS.hpp | 61 +++ .../include/SuperResolutionFactoryBase.hpp | 65 ++- .../include/SuperResolutionVariants.hpp | 49 ++ .../SuperResolution/src/DLSSProviderD3D11.cpp | 198 ++++++++ .../SuperResolution/src/DLSSProviderD3D12.cpp | 212 +++++++++ .../SuperResolution/src/DLSSProviderVk.cpp | 252 +++++++++++ .../SuperResolution/src/DSRProviderD3D12.cpp | 393 ++++++++++++++++ .../src/SuperResolutionBase.cpp | 218 +++++++++ .../src/SuperResolutionDLSS.cpp | 149 ++++++ .../src/SuperResolutionFactory.cpp | 86 ++++ .../src/SuperResolutionFactoryBase.cpp | 128 +++--- .../src/SuperResolutionFactoryD3D11.cpp | 50 +++ .../src/SuperResolutionFactoryD3D12.cpp | 50 +-- ...3D12.cpp => SuperResolutionFactoryMtl.cpp} | 11 +- .../src/SuperResolutionFactoryVk.cpp | 50 +++ Tests/DiligentCoreAPITest/CMakeLists.txt | 4 + .../src/SuperResolutionTest.cpp | 423 ++++++++++++++++++ 24 files changed, 2658 insertions(+), 129 deletions(-) create mode 100644 Graphics/SuperResolution/include/DLSSProviderD3D11.hpp create mode 100644 Graphics/SuperResolution/include/DLSSProviderD3D12.hpp create mode 100644 Graphics/SuperResolution/include/DLSSProviderVk.hpp create mode 100644 Graphics/SuperResolution/include/DSRProviderD3D12.hpp create mode 100644 Graphics/SuperResolution/include/SuperResolutionDLSS.hpp create mode 100644 Graphics/SuperResolution/include/SuperResolutionVariants.hpp create mode 100644 Graphics/SuperResolution/src/DLSSProviderD3D11.cpp create mode 100644 Graphics/SuperResolution/src/DLSSProviderD3D12.cpp create mode 100644 Graphics/SuperResolution/src/DLSSProviderVk.cpp create mode 100644 Graphics/SuperResolution/src/DSRProviderD3D12.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolutionBase.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolutionDLSS.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolutionFactory.cpp create mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp rename Graphics/SuperResolution/src/{SuperResolutionD3D12.cpp => SuperResolutionFactoryMtl.cpp} (87%) create mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp create mode 100644 Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp diff --git a/.github/workflows/msvc_analysis.yml b/.github/workflows/msvc_analysis.yml index 1ed71e73a6..069a66650a 100644 --- a/.github/workflows/msvc_analysis.yml +++ b/.github/workflows/msvc_analysis.yml @@ -11,7 +11,7 @@ jobs: platform: "Win32" toolset: "x64" build_type: "Debug" - cmake_args: "-DDILIGENT_NO_GLSLANG=ON -DDILIGENT_NO_HLSL=ON" + cmake_args: "-DDILIGENT_NO_GLSLANG=ON -DDILIGENT_NO_HLSL=ON -DDILIGENT_NO_DLSS=ON -DDILIGENT_NO_DSR=ON" cmake_generator: "Visual Studio 17 2022" runs-on: windows-2022 diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index 0468fe7bd6..d1b31ee818 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -4,9 +4,50 @@ include(../../BuildTools/CMake/BuildUtils.cmake) project(Diligent-SuperResolution CXX) +set(DILIGENT_DLSS_SUPPORTED FALSE CACHE INTERNAL "DLSS is not supported") +set(DILIGENT_DSR_SUPPORTED FALSE CACHE INTERNAL "DirectSR is not supported") + +if(PLATFORM_WIN32) + set(DILIGENT_DLSS_SUPPORTED TRUE CACHE INTERNAL "DLSS is supported on Win32 platform") + set(DILIGENT_DSR_SUPPORTED TRUE CACHE INTERNAL "DirectSR is supported on Win32 platform") +endif() + +if(${DILIGENT_NO_DLSS}) + set(DILIGENT_DLSS_SUPPORTED FALSE CACHE INTERNAL "DLSS is forcibly disabled") +endif() +if(${DILIGENT_NO_DSR}) + set(DILIGENT_DSR_SUPPORTED FALSE CACHE INTERNAL "DirectSR is forcibly disabled") +endif() + +if(DILIGENT_DSR_SUPPORTED) + # Fetch DirectSR headers + FetchContent_DeclareShallowGit(DirectSR-Headers + GIT_REPOSITORY https://github.com/MikhailGorobets/DirectSR-Headers.git + GIT_TAG master + ) + FetchContent_MakeAvailable(DirectSR-Headers) + if(TARGET DirectSR-AgilitySDK) + set_target_properties(DirectSR-AgilitySDK PROPERTIES FOLDER DiligentCore/ThirdParty) + endif() +endif() + +if(DILIGENT_DLSS_SUPPORTED) + # Fetch NVIDIA DLSS SDK headers + FetchContent_DeclareShallowGit(DLSS-Headers + GIT_REPOSITORY https://github.com/NVIDIA/DLSS.git + GIT_TAG main + ) + FetchContent_MakeAvailable(DLSS-Headers) +endif() + set(INCLUDE include/SuperResolutionBase.hpp include/SuperResolutionFactoryBase.hpp + include/SuperResolutionVariants.hpp + include/DLSSProviderD3D12.hpp + include/DLSSProviderD3D11.hpp + include/DLSSProviderVk.hpp + include/DSRProviderD3D12.hpp ) set(INTERFACE @@ -16,15 +57,22 @@ set(INTERFACE ) set(SOURCE + src/SuperResolutionBase.cpp src/SuperResolutionFactoryBase.cpp + src/SuperResolutionFactory.cpp + src/SuperResolutionFactoryD3D12.cpp + src/SuperResolutionFactoryD3D11.cpp + src/SuperResolutionFactoryVk.cpp + src/SuperResolutionFactoryMtl.cpp + src/DLSSProviderD3D12.cpp + src/DLSSProviderD3D11.cpp + src/DLSSProviderVk.cpp + src/DSRProviderD3D12.cpp ) -if(D3D12_SUPPORTED) - list(APPEND - SOURCE - src/SuperResolutionD3D12.cpp - src/SuperResolutionFactoryD3D12.cpp - ) +if(DILIGENT_DLSS_SUPPORTED) + list(APPEND INCLUDE include/SuperResolutionDLSS.hpp) + list(APPEND SOURCE src/SuperResolutionDLSS.cpp) endif() set(DLL_SOURCE @@ -57,6 +105,14 @@ PRIVATE ../GraphicsEngineNextGenBase/include ) +if(DILIGENT_DLSS_SUPPORTED) + target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DLSS_SUPPORTED=1) +endif() + +if(DILIGENT_DSR_SUPPORTED) + target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DSR_SUPPORTED=1) +endif() + target_compile_definitions(Diligent-SuperResolution-shared PUBLIC DILIGENT_SUPER_RESOLUTION_SHARED=1) target_link_libraries(Diligent-SuperResolution-static @@ -69,11 +125,35 @@ PRIVATE Diligent-ShaderTools ) -if(D3D12_SUPPORTED) - target_link_libraries(Diligent-SuperResolution-static PRIVATE Diligent-GraphicsEngineD3D12-static) +if(DILIGENT_DSR_SUPPORTED) + target_link_libraries(Diligent-SuperResolution-static PRIVATE Diligent-GraphicsEngineD3D12-static DirectSR-Headers) target_include_directories(Diligent-SuperResolution-static PRIVATE ../GraphicsEngineD3D12/include) endif() +if(DILIGENT_DLSS_SUPPORTED) + set(DLSS_SDK_DIR ${FETCHCONTENT_BASE_DIR}/dlss-headers-src) + target_include_directories(Diligent-SuperResolution-static PRIVATE ${DLSS_SDK_DIR}/include) + + # Link NGX static library (dynamic CRT /MD variant) + target_link_libraries(Diligent-SuperResolution-static PRIVATE + debug ${DLSS_SDK_DIR}/lib/Windows_x86_64/x64/nvsdk_ngx_d_dbg.lib + optimized ${DLSS_SDK_DIR}/lib/Windows_x86_64/x64/nvsdk_ngx_d.lib + ) + + if(D3D12_SUPPORTED) + target_link_libraries(Diligent-SuperResolution-static PRIVATE Diligent-GraphicsEngineD3D12-static) + target_include_directories(Diligent-SuperResolution-static PRIVATE ../GraphicsEngineD3D12/include) + endif() + if(D3D11_SUPPORTED) + target_link_libraries(Diligent-SuperResolution-static PRIVATE Diligent-GraphicsEngineD3D11-static) + target_include_directories(Diligent-SuperResolution-static PRIVATE ../GraphicsEngineD3D11/include) + endif() + if(VULKAN_SUPPORTED) + target_link_libraries(Diligent-SuperResolution-static PRIVATE Diligent-GraphicsEngineVk-static Vulkan::Headers) + target_include_directories(Diligent-SuperResolution-static PRIVATE ../GraphicsEngineVulkan/include) + endif() +endif() + target_link_libraries(Diligent-SuperResolution-shared PUBLIC Diligent-SuperResolutionInterface diff --git a/Graphics/SuperResolution/include/DLSSProviderD3D11.hpp b/Graphics/SuperResolution/include/DLSSProviderD3D11.hpp new file mode 100644 index 0000000000..63a88a8a75 --- /dev/null +++ b/Graphics/SuperResolution/include/DLSSProviderD3D11.hpp @@ -0,0 +1,58 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +#include "SuperResolutionFactory.h" +#include "SuperResolution.h" +#include "RefCntAutoPtr.hpp" + +#include + +struct NVSDK_NGX_Parameter; + +namespace Diligent +{ + +class DLSSProviderD3D11 final +{ +public: + DLSSProviderD3D11(IRenderDevice* pDevice); + + ~DLSSProviderD3D11(); + + void EnumerateVariants(std::vector& Variants); + + void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings); + + void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler); + +private: + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; + +} // namespace Diligent diff --git a/Graphics/SuperResolution/include/DLSSProviderD3D12.hpp b/Graphics/SuperResolution/include/DLSSProviderD3D12.hpp new file mode 100644 index 0000000000..45c3618bd6 --- /dev/null +++ b/Graphics/SuperResolution/include/DLSSProviderD3D12.hpp @@ -0,0 +1,58 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +#include "SuperResolutionFactory.h" +#include "SuperResolution.h" +#include "RefCntAutoPtr.hpp" + +#include + +struct NVSDK_NGX_Parameter; + +namespace Diligent +{ + +class DLSSProviderD3D12 final +{ +public: + DLSSProviderD3D12(IRenderDevice* pDevice); + + ~DLSSProviderD3D12(); + + void EnumerateVariants(std::vector& Variants); + + void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings); + + void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler); + +private: + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; + +} // namespace Diligent diff --git a/Graphics/SuperResolution/include/DLSSProviderVk.hpp b/Graphics/SuperResolution/include/DLSSProviderVk.hpp new file mode 100644 index 0000000000..bfe690cb1c --- /dev/null +++ b/Graphics/SuperResolution/include/DLSSProviderVk.hpp @@ -0,0 +1,58 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +#include "SuperResolutionFactory.h" +#include "SuperResolution.h" +#include "RefCntAutoPtr.hpp" + +#include + +struct NVSDK_NGX_Parameter; + +namespace Diligent +{ + +class DLSSProviderVk final +{ +public: + DLSSProviderVk(IRenderDevice* pDevice); + + ~DLSSProviderVk(); + + void EnumerateVariants(std::vector& Variants); + + void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings); + + void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler); + +private: + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; + +} // namespace Diligent diff --git a/Graphics/SuperResolution/include/DSRProviderD3D12.hpp b/Graphics/SuperResolution/include/DSRProviderD3D12.hpp new file mode 100644 index 0000000000..3ef038f0f5 --- /dev/null +++ b/Graphics/SuperResolution/include/DSRProviderD3D12.hpp @@ -0,0 +1,60 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +#include "SuperResolutionFactory.h" +#include "SuperResolution.h" +#include "RefCntAutoPtr.hpp" + +#include + +struct IDSRDevice; + +namespace Diligent +{ + +class DSRProviderD3D12 final +{ +public: + DSRProviderD3D12(IRenderDevice* pDevice); + + ~DSRProviderD3D12(); + + void EnumerateVariants(std::vector& Variants); + + void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings); + + void CreateSuperResolution(const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler); + +private: + RefCntAutoPtr m_pDevice; + IDSRDevice* m_pDSRDevice = nullptr; +}; + +} // namespace Diligent diff --git a/Graphics/SuperResolution/include/SuperResolutionBase.hpp b/Graphics/SuperResolution/include/SuperResolutionBase.hpp index 34ecec4327..24c0d5c66c 100644 --- a/Graphics/SuperResolution/include/SuperResolutionBase.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionBase.hpp @@ -26,8 +26,10 @@ #pragma once -#include "SuperResolution.h" #include "ObjectBase.hpp" +#include "SuperResolution.h" +#include "SuperResolutionFactory.h" +#include "GraphicsAccessories.hpp" #include #include @@ -35,18 +37,55 @@ namespace Diligent { +#define LOG_SUPER_RESOLUTION_ERROR_AND_THROW(Name, ...) LOG_ERROR_AND_THROW("Super resolution upscaler '", ((Name) != nullptr ? (Name) : ""), "': ", ##__VA_ARGS__) + +#define VERIFY_SUPER_RESOLUTION(Name, Expr, ...) \ + do \ + { \ + if (!(Expr)) \ + { \ + LOG_SUPER_RESOLUTION_ERROR_AND_THROW(Name, __VA_ARGS__); \ + } \ + } while (false) + +/// Validates super resolution description and throws an exception in case of an error. +void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false); + +/// Validates super resolution description for temporal upscaling and throws an exception in case of an error. +void ValidateTemporalSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false); + +/// Validates super resolution source settings attributes and throws an exception in case of an error. +void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& Attribs) noexcept(false); + +/// Validates execute super resolution attributes and throws an exception in case of an error. +void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, + const ExecuteSuperResolutionAttribs& Attribs) noexcept(false); + +/// Validates execute super resolution attributes for temporal upscaling and throws an exception in case of an error. +void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, + const ExecuteSuperResolutionAttribs& Attribs) noexcept(false); + class SuperResolutionBase : public ObjectBase { public: using TBase = ObjectBase; + struct JitterOffset + { + float X = 0.0f; + float Y = 0.0f; + }; + SuperResolutionBase(IReferenceCounters* pRefCounters, const SuperResolutionDesc& Desc) : TBase{pRefCounters}, - m_Name{Desc.Name != nullptr ? Desc.Name : ""}, m_Desc{Desc} { - m_Desc.Name = m_Name.c_str(); + if (Desc.Name != nullptr) + { + m_Name = Desc.Name; + m_Desc.Name = m_Name.c_str(); + } } IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolution, TBase) @@ -72,15 +111,12 @@ class SuperResolutionBase : public ObjectBase } protected: - struct JitterOffset - { - float X = 0.0f; - float Y = 0.0f; - }; - - const std::string m_Name; SuperResolutionDesc m_Desc; + std::string m_Name; std::vector m_JitterPattern; }; +/// Populates a Halton(2,3) jitter pattern centered at origin. +void PopulateHaltonJitterPattern(std::vector& JitterPattern, Uint32 PatternSize); + } // namespace Diligent diff --git a/Graphics/SuperResolution/include/SuperResolutionDLSS.hpp b/Graphics/SuperResolution/include/SuperResolutionDLSS.hpp new file mode 100644 index 0000000000..2534a30f22 --- /dev/null +++ b/Graphics/SuperResolution/include/SuperResolutionDLSS.hpp @@ -0,0 +1,61 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +/// \file +/// Shared DLSS utilities used by per-API DLSS backend implementations. + +#include + +#include + +#include "SuperResolutionFactory.h" +#include "SuperResolution.h" + +struct NVSDK_NGX_Parameter; + +namespace Diligent +{ + +extern const char* DLSSProjectId; +extern const wchar_t* DLSSAppDataPath; + +/// Maps Diligent optimization type to NGX performance/quality preset. +NVSDK_NGX_PerfQuality_Value OptimizationTypeToNGXPerfQuality(SUPER_RESOLUTION_OPTIMIZATION_TYPE Type); + +/// Maps Diligent super resolution flags to DLSS feature flags. +Int32 SuperResolutionFlagsToDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags); + +/// Populates DLSS variant info using NGX capability parameters. +void EnumerateDLSSVariants(NVSDK_NGX_Parameter* pNGXParams, std::vector& Variants); + +/// Queries DLSS optimal source settings using NGX capability parameters. +void GetDLSSSourceSettings(NVSDK_NGX_Parameter* pNGXParams, + const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings); + +} // namespace Diligent diff --git a/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp b/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp index 69cb1a7f39..6964975d22 100644 --- a/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp @@ -27,34 +27,91 @@ #pragma once #include "SuperResolutionFactory.h" +#include "SuperResolution.h" #include "ObjectBase.hpp" -#include "RefCntAutoPtr.hpp" #include +#include namespace Diligent { +struct BackendEntry +{ + struct IHolder + { + virtual ~IHolder() = default; + + virtual void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) = 0; + + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) = 0; + }; + + template + struct Holder final : IHolder + { + T Instance; + template + explicit Holder(Args&&... args) : + Instance(std::forward(args)...) {} + + void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) override { Instance.GetSourceSettings(Attribs, Settings); } + + void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override { Instance.CreateSuperResolution(Desc, ppUpscaler); } + }; + + std::unique_ptr pBackend; + std::vector Variants; +}; + class SuperResolutionFactoryBase : public ObjectBase { public: using TBase = ObjectBase; - SuperResolutionFactoryBase(IReferenceCounters* pRefCounters, IRenderDevice* pDevice); + SuperResolutionFactoryBase(IReferenceCounters* pRefCounters); IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolutionFactory, TBase) virtual void DILIGENT_CALL_TYPE EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) override final; + virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const override final; + + virtual void DILIGENT_CALL_TYPE CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final; + virtual void DILIGENT_CALL_TYPE SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final; virtual void DILIGENT_CALL_TYPE SetBreakOnError(bool BreakOnError) const override final; virtual void DILIGENT_CALL_TYPE SetMemoryAllocator(IMemoryAllocator* pAllocator) const override final; + template + void AddBackend(Args&&... args); + private: - RefCntAutoPtr m_pDevice; - std::vector m_Variants{}; + BackendEntry* FindBackend(const INTERFACE_ID& VariantId) const; + + std::vector m_Backends; }; +template +void SuperResolutionFactoryBase::AddBackend(Args&&... args) +{ + try + { + auto pHolder = std::make_unique>(std::forward(args)...); + + BackendEntry Entry; + pHolder->Instance.EnumerateVariants(Entry.Variants); + if (Entry.Variants.empty()) + return; + + Entry.pBackend = std::move(pHolder); + m_Backends.push_back(std::move(Entry)); + } + catch (...) + { + } +} + } // namespace Diligent diff --git a/Graphics/SuperResolution/include/SuperResolutionVariants.hpp b/Graphics/SuperResolution/include/SuperResolutionVariants.hpp new file mode 100644 index 0000000000..1499aea97f --- /dev/null +++ b/Graphics/SuperResolution/include/SuperResolutionVariants.hpp @@ -0,0 +1,49 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#pragma once + +/// \file +/// Super resolution upscaler variant IDs + +#include "InterfaceID.h" + +namespace Diligent +{ + +// {7B3A8D2E-1F4C-4E9A-B5D0-6C8E2F1A3B5D} +static constexpr INTERFACE_ID VariantId_DLSS = + {0x7b3a8d2e, 0x1f4c, 0x4e9a, {0xb5, 0xd0, 0x6c, 0x8e, 0x2f, 0x1a, 0x3b, 0x5d}}; + +// {C4D70001-A1B2-4C3D-8E9F-0A1B2C3D4E5F} +static constexpr INTERFACE_ID VariantId_MetalFXSpatial = + {0xc4d70001, 0xa1b2, 0x4c3d, {0x8e, 0x9f, 0x0a, 0x1b, 0x2c, 0x3d, 0x4e, 0x5f}}; + +// {C4D70002-A1B2-4C3D-8E9F-0A1B2C3D4E5F} +static constexpr INTERFACE_ID VariantId_MetalFXTemporal = + {0xc4d70002, 0xa1b2, 0x4c3d, {0x8e, 0x9f, 0x0a, 0x1b, 0x2c, 0x3d, 0x4e, 0x5f}}; + +} // namespace Diligent diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp new file mode 100644 index 0000000000..cb211ecf5a --- /dev/null +++ b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp @@ -0,0 +1,198 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DLSSProviderD3D11.hpp" + +#if D3D11_SUPPORTED && DILIGENT_DLSS_SUPPORTED + +# include "SuperResolutionDLSS.hpp" +# include "SuperResolutionBase.hpp" +# include "SuperResolutionVariants.hpp" + +# include + +# include "../../GraphicsEngineD3D11/include/pch.h" +# include "RenderDeviceD3D11Impl.hpp" +# include "DeviceContextD3D11Impl.hpp" +# include "TextureBaseD3D11.hpp" + +namespace Diligent +{ + +namespace +{ + +class SuperResolutionD3D11_DLSS final : public SuperResolutionBase +{ +public: + SuperResolutionD3D11_DLSS(IReferenceCounters* pRefCounters, + IRenderDevice* pDevice, + const SuperResolutionDesc& Desc, + NVSDK_NGX_Parameter* pNGXParams) : + SuperResolutionBase{pRefCounters, Desc}, + m_pDevice{pDevice}, + m_pNGXParams{pNGXParams} + { + ValidateTemporalSuperResolutionDesc(m_Desc); + PopulateHaltonJitterPattern(m_JitterPattern, 64); + } + + ~SuperResolutionD3D11_DLSS() + { + if (m_pDLSSFeature != nullptr) + NVSDK_NGX_D3D11_ReleaseFeature(m_pDLSSFeature); + } + + virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final + { + ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + + if (m_pDLSSFeature == nullptr) + CreateFeature(Attribs); + + DeviceContextD3D11Impl* pCtxImpl = ClassPtrCast(Attribs.pContext); + + auto GetD3D11Resource = [](ITextureView* pView) -> ID3D11Resource* { + if (pView != nullptr) + return ClassPtrCast(pView->GetTexture())->GetD3D11Texture(); + return nullptr; + }; + + ID3D11DeviceContext* pd3d11DeviceContext = pCtxImpl->GetD3D11DeviceContext(); + + NVSDK_NGX_D3D11_DLSS_Eval_Params EvalParams = {}; + EvalParams.Feature.pInColor = GetD3D11Resource(Attribs.pColorTextureSRV); + EvalParams.Feature.pInOutput = GetD3D11Resource(Attribs.pOutputTextureView); + EvalParams.pInDepth = GetD3D11Resource(Attribs.pDepthTextureSRV); + EvalParams.pInMotionVectors = GetD3D11Resource(Attribs.pMotionVectorsSRV); + EvalParams.pInExposureTexture = GetD3D11Resource(Attribs.pExposureTextureSRV); + EvalParams.pInTransparencyMask = GetD3D11Resource(Attribs.pReactiveMaskTextureSRV); + EvalParams.pInBiasCurrentColorMask = GetD3D11Resource(Attribs.pIgnoreHistoryMaskTextureSRV); + EvalParams.Feature.InSharpness = Attribs.Sharpness; + EvalParams.InJitterOffsetX = Attribs.JitterX; + EvalParams.InJitterOffsetY = Attribs.JitterY; + EvalParams.InReset = Attribs.ResetHistory ? 1 : 0; + EvalParams.InMVScaleX = Attribs.MotionVectorScaleX; + EvalParams.InMVScaleY = Attribs.MotionVectorScaleY; + EvalParams.InRenderSubrectDimensions.Width = m_Desc.InputWidth; + EvalParams.InRenderSubrectDimensions.Height = m_Desc.InputHeight; + EvalParams.InPreExposure = Attribs.PreExposure; + EvalParams.InExposureScale = Attribs.ExposureScale; + + NVSDK_NGX_Result Result = NGX_D3D11_EVALUATE_DLSS_EXT(pd3d11DeviceContext, m_pDLSSFeature, m_pNGXParams, &EvalParams); + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_MESSAGE("DLSS D3D11 evaluation failed. NGX Result: ", static_cast(Result)); + } + +private: + void CreateFeature(const ExecuteSuperResolutionAttribs& Attribs) + { + Int32 DLSSCreateFeatureFlags = SuperResolutionFlagsToDLSSFeatureFlags(m_Desc.Flags); + if (Attribs.CameraNear > Attribs.CameraFar) + DLSSCreateFeatureFlags |= NVSDK_NGX_DLSS_Feature_Flags_DepthInverted; + + NVSDK_NGX_DLSS_Create_Params DLSSCreateParams = {}; + DLSSCreateParams.Feature.InWidth = m_Desc.InputWidth; + DLSSCreateParams.Feature.InHeight = m_Desc.InputHeight; + DLSSCreateParams.Feature.InTargetWidth = m_Desc.OutputWidth; + DLSSCreateParams.Feature.InTargetHeight = m_Desc.OutputHeight; + DLSSCreateParams.InFeatureCreateFlags = DLSSCreateFeatureFlags; + + ID3D11DeviceContext* pd3d11Ctx = ClassPtrCast(Attribs.pContext)->GetD3D11DeviceContext(); + NVSDK_NGX_Result Result = NGX_D3D11_CREATE_DLSS_EXT(pd3d11Ctx, &m_pDLSSFeature, m_pNGXParams, &DLSSCreateParams); + + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("Failed to create DLSS D3D11 feature. NGX Result: ", static_cast(Result)); + } + + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; + +} // anonymous namespace + + +DLSSProviderD3D11::DLSSProviderD3D11(IRenderDevice* pDevice) : + m_pDevice{pDevice} +{ + ID3D11Device* pd3d11Device = ClassPtrCast(pDevice)->GetD3D11Device(); + NVSDK_NGX_Result Result = NVSDK_NGX_D3D11_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d11Device); + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("NVIDIA NGX D3D11 initialization failed. Result: ", static_cast(Result)); + + Result = NVSDK_NGX_D3D11_GetCapabilityParameters(&m_pNGXParams); + if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) + LOG_ERROR_AND_THROW("Failed to get NGX D3D11 capability parameters. Result: ", static_cast(Result)); +} + +DLSSProviderD3D11::~DLSSProviderD3D11() +{ + if (m_pNGXParams != nullptr) + NVSDK_NGX_D3D11_DestroyParameters(m_pNGXParams); + NVSDK_NGX_D3D11_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D11Device()); +} + +void DLSSProviderD3D11::EnumerateVariants(std::vector& Variants) +{ + EnumerateDLSSVariants(m_pNGXParams, Variants); +} + +void DLSSProviderD3D11::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) +{ + GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); +} + +void DLSSProviderD3D11::CreateSuperResolution(const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) +{ + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + + SuperResolutionD3D11_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D11_DLSS instance", SuperResolutionD3D11_DLSS)(m_pDevice, Desc, m_pNGXParams); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); +} + +} // namespace Diligent + +#else + +namespace Diligent +{ + +DLSSProviderD3D11::DLSSProviderD3D11(IRenderDevice*) +{ + LOG_INFO_MESSAGE("DLSS is not supported on this platform for D3D11 backend"); +} +DLSSProviderD3D11::~DLSSProviderD3D11() {} +void DLSSProviderD3D11::EnumerateVariants(std::vector&) {} +void DLSSProviderD3D11::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} +void DLSSProviderD3D11::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} + +} // namespace Diligent + +#endif diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp new file mode 100644 index 0000000000..5596ab5e54 --- /dev/null +++ b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp @@ -0,0 +1,212 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DLSSProviderD3D12.hpp" + +#if D3D12_SUPPORTED && DILIGENT_DLSS_SUPPORTED + +# include "SuperResolutionDLSS.hpp" +# include "SuperResolutionBase.hpp" +# include "SuperResolutionVariants.hpp" + +# include + +# include "../../GraphicsEngineD3D12/include/pch.h" +# include "RenderDeviceD3D12Impl.hpp" +# include "DeviceContextD3D12Impl.hpp" +# include "TextureD3D12Impl.hpp" + +namespace Diligent +{ + +namespace +{ + +class SuperResolutionD3D12_DLSS final : public SuperResolutionBase +{ +public: + SuperResolutionD3D12_DLSS(IReferenceCounters* pRefCounters, + IRenderDevice* pDevice, + const SuperResolutionDesc& Desc, + NVSDK_NGX_Parameter* pNGXParams) : + SuperResolutionBase{pRefCounters, Desc}, + m_pDevice{pDevice}, + m_pNGXParams{pNGXParams} + { + ValidateTemporalSuperResolutionDesc(m_Desc); + PopulateHaltonJitterPattern(m_JitterPattern, 64); + } + + ~SuperResolutionD3D12_DLSS() + { + if (m_pDLSSFeature != nullptr) + NVSDK_NGX_D3D12_ReleaseFeature(m_pDLSSFeature); + } + + virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final + { + ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + + if (m_pDLSSFeature == nullptr) + CreateFeature(Attribs); + + DeviceContextD3D12Impl* pCtxImpl = ClassPtrCast(Attribs.pContext); + + auto GetD3D12Resource = [](ITextureView* pView) -> ID3D12Resource* { + if (pView != nullptr) + return ClassPtrCast(pView->GetTexture())->GetD3D12Resource(); + return nullptr; + }; + + pCtxImpl->TransitionTextureState(Attribs.pColorTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + pCtxImpl->TransitionTextureState(Attribs.pDepthTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + pCtxImpl->TransitionTextureState(Attribs.pMotionVectorsSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + pCtxImpl->TransitionTextureState(Attribs.pOutputTextureView->GetTexture(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + if (Attribs.pExposureTextureSRV) + pCtxImpl->TransitionTextureState(Attribs.pExposureTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + if (Attribs.pReactiveMaskTextureSRV) + pCtxImpl->TransitionTextureState(Attribs.pReactiveMaskTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + if (Attribs.pIgnoreHistoryMaskTextureSRV) + pCtxImpl->TransitionTextureState(Attribs.pIgnoreHistoryMaskTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + ID3D12GraphicsCommandList* pCmdList = pCtxImpl->GetD3D12CommandList(); + + NVSDK_NGX_D3D12_DLSS_Eval_Params EvalParams = {}; + EvalParams.Feature.pInColor = GetD3D12Resource(Attribs.pColorTextureSRV); + EvalParams.Feature.pInOutput = GetD3D12Resource(Attribs.pOutputTextureView); + EvalParams.pInDepth = GetD3D12Resource(Attribs.pDepthTextureSRV); + EvalParams.pInMotionVectors = GetD3D12Resource(Attribs.pMotionVectorsSRV); + EvalParams.pInExposureTexture = GetD3D12Resource(Attribs.pExposureTextureSRV); + EvalParams.pInTransparencyMask = GetD3D12Resource(Attribs.pReactiveMaskTextureSRV); + EvalParams.pInBiasCurrentColorMask = GetD3D12Resource(Attribs.pIgnoreHistoryMaskTextureSRV); + EvalParams.Feature.InSharpness = Attribs.Sharpness; + EvalParams.InJitterOffsetX = Attribs.JitterX; + EvalParams.InJitterOffsetY = Attribs.JitterY; + EvalParams.InReset = Attribs.ResetHistory ? 1 : 0; + EvalParams.InMVScaleX = Attribs.MotionVectorScaleX; + EvalParams.InMVScaleY = Attribs.MotionVectorScaleY; + EvalParams.InRenderSubrectDimensions.Width = m_Desc.InputWidth; + EvalParams.InRenderSubrectDimensions.Height = m_Desc.InputHeight; + EvalParams.InPreExposure = Attribs.PreExposure; + EvalParams.InExposureScale = Attribs.ExposureScale; + + NVSDK_NGX_Result Result = NGX_D3D12_EVALUATE_DLSS_EXT(pCmdList, m_pDLSSFeature, m_pNGXParams, &EvalParams); + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_MESSAGE("DLSS D3D12 evaluation failed. NGX Result: ", static_cast(Result)); + + pCtxImpl->TransitionTextureState(Attribs.pOutputTextureView->GetTexture(), D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE); + pCtxImpl->Flush(); + } + +private: + void CreateFeature(const ExecuteSuperResolutionAttribs& Attribs) + { + Int32 DLSSCreateFeatureFlags = SuperResolutionFlagsToDLSSFeatureFlags(m_Desc.Flags); + if (Attribs.CameraNear > Attribs.CameraFar) + DLSSCreateFeatureFlags |= NVSDK_NGX_DLSS_Feature_Flags_DepthInverted; + + NVSDK_NGX_DLSS_Create_Params DLSSCreateParams = {}; + DLSSCreateParams.Feature.InWidth = m_Desc.InputWidth; + DLSSCreateParams.Feature.InHeight = m_Desc.InputHeight; + DLSSCreateParams.Feature.InTargetWidth = m_Desc.OutputWidth; + DLSSCreateParams.Feature.InTargetHeight = m_Desc.OutputHeight; + DLSSCreateParams.InFeatureCreateFlags = DLSSCreateFeatureFlags; + + ID3D12GraphicsCommandList* pCmdList = ClassPtrCast(Attribs.pContext)->GetD3D12CommandList(); + NVSDK_NGX_Result Result = NGX_D3D12_CREATE_DLSS_EXT(pCmdList, 1, 1, &m_pDLSSFeature, m_pNGXParams, &DLSSCreateParams); + + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("Failed to create DLSS D3D12 feature. NGX Result: ", static_cast(Result)); + } + + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; + +} // anonymous namespace + + +DLSSProviderD3D12::DLSSProviderD3D12(IRenderDevice* pDevice) : + m_pDevice{pDevice} +{ + ID3D12Device* pd3d12Device = ClassPtrCast(pDevice)->GetD3D12Device(); + NVSDK_NGX_Result Result = NVSDK_NGX_D3D12_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d12Device); + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("NVIDIA NGX D3D12 initialization failed. Result: ", static_cast(Result)); + + Result = NVSDK_NGX_D3D12_GetCapabilityParameters(&m_pNGXParams); + if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) + LOG_ERROR_AND_THROW("Failed to get NGX D3D12 capability parameters. Result: ", static_cast(Result)); +} + +DLSSProviderD3D12::~DLSSProviderD3D12() +{ + if (m_pNGXParams != nullptr) + NVSDK_NGX_D3D12_DestroyParameters(m_pNGXParams); + NVSDK_NGX_D3D12_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D12Device()); +} + +void DLSSProviderD3D12::EnumerateVariants(std::vector& Variants) +{ + EnumerateDLSSVariants(m_pNGXParams, Variants); +} + +void DLSSProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) +{ + GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); +} + +void DLSSProviderD3D12::CreateSuperResolution(const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) +{ + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + + SuperResolutionD3D12_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DLSS instance", SuperResolutionD3D12_DLSS)(m_pDevice, Desc, m_pNGXParams); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); +} + +} // namespace Diligent + +#else + +namespace Diligent +{ + +DLSSProviderD3D12::DLSSProviderD3D12(IRenderDevice*) +{ + LOG_INFO_MESSAGE("DLSS is not supported on this platform for D3D12 backend"); +} +DLSSProviderD3D12::~DLSSProviderD3D12() {} +void DLSSProviderD3D12::EnumerateVariants(std::vector&) {} +void DLSSProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} +void DLSSProviderD3D12::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} + +} // namespace Diligent + +#endif diff --git a/Graphics/SuperResolution/src/DLSSProviderVk.cpp b/Graphics/SuperResolution/src/DLSSProviderVk.cpp new file mode 100644 index 0000000000..ab813a9f37 --- /dev/null +++ b/Graphics/SuperResolution/src/DLSSProviderVk.cpp @@ -0,0 +1,252 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DLSSProviderVk.hpp" + +#if VULKAN_SUPPORTED && DILIGENT_DLSS_SUPPORTED + +# include "SuperResolutionDLSS.hpp" +# include "SuperResolutionBase.hpp" +# include "SuperResolutionVariants.hpp" + +# include "../../GraphicsEngineVulkan/include/pch.h" +# include +# include "RenderDeviceVkImpl.hpp" +# include "DeviceContextVkImpl.hpp" +# include "TextureVkImpl.hpp" +# include "TextureViewVkImpl.hpp" +# include "VulkanTypeConversions.hpp" + +namespace Diligent +{ + +namespace +{ + +class SuperResolutionVk_DLSS final : public SuperResolutionBase +{ +public: + SuperResolutionVk_DLSS(IReferenceCounters* pRefCounters, + IRenderDevice* pDevice, + const SuperResolutionDesc& Desc, + NVSDK_NGX_Parameter* pNGXParams) : + SuperResolutionBase{pRefCounters, Desc}, + m_pDevice{pDevice}, + m_pNGXParams{pNGXParams} + { + ValidateTemporalSuperResolutionDesc(m_Desc); + PopulateHaltonJitterPattern(m_JitterPattern, 64); + } + + ~SuperResolutionVk_DLSS() + { + if (m_pDLSSFeature != nullptr) + NVSDK_NGX_VULKAN_ReleaseFeature(m_pDLSSFeature); + } + + virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final + { + ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + + if (m_pDLSSFeature == nullptr) + CreateFeature(Attribs); + + DeviceContextVkImpl* pCtxImpl = ClassPtrCast(Attribs.pContext); + + auto CreateNGXResourceVK = [](ITextureView* pView, VkImageAspectFlags AspectMask, bool bReadWrite) -> NVSDK_NGX_Resource_VK { + TextureVkImpl* pTexVk = ClassPtrCast(pView->GetTexture()); + TextureViewVkImpl* pViewVk = ClassPtrCast(pView); + const TextureDesc& TexDesc = pTexVk->GetDesc(); + + VkImageSubresourceRange SubresourceRange = {}; + SubresourceRange.aspectMask = AspectMask; + SubresourceRange.baseMipLevel = 0; + SubresourceRange.levelCount = 1; + SubresourceRange.baseArrayLayer = 0; + SubresourceRange.layerCount = 1; + + return NVSDK_NGX_Create_ImageView_Resource_VK(pViewVk->GetVulkanImageView(), pTexVk->GetVkImage(), SubresourceRange, TexFormatToVkFormat(TexDesc.Format), TexDesc.Width, TexDesc.Height, bReadWrite); + }; + + VkCommandBuffer vkCmdBuffer = pCtxImpl->GetVkCommandBuffer(); + + NVSDK_NGX_Resource_VK ColorResource = CreateNGXResourceVK(Attribs.pColorTextureSRV, VK_IMAGE_ASPECT_COLOR_BIT, false); + NVSDK_NGX_Resource_VK OutputResource = CreateNGXResourceVK(Attribs.pOutputTextureView, VK_IMAGE_ASPECT_COLOR_BIT, true); + NVSDK_NGX_Resource_VK DepthResource = CreateNGXResourceVK(Attribs.pDepthTextureSRV, VK_IMAGE_ASPECT_DEPTH_BIT, false); + NVSDK_NGX_Resource_VK MotionResource = CreateNGXResourceVK(Attribs.pMotionVectorsSRV, VK_IMAGE_ASPECT_COLOR_BIT, false); + + NVSDK_NGX_Resource_VK ExposureResource = {}; + if (Attribs.pExposureTextureSRV) + ExposureResource = CreateNGXResourceVK(Attribs.pExposureTextureSRV, VK_IMAGE_ASPECT_COLOR_BIT, false); + + NVSDK_NGX_Resource_VK TransparencyMaskResource = {}; + if (Attribs.pReactiveMaskTextureSRV) + TransparencyMaskResource = CreateNGXResourceVK(Attribs.pReactiveMaskTextureSRV, VK_IMAGE_ASPECT_COLOR_BIT, false); + + NVSDK_NGX_Resource_VK BiasCurrentColorMaskResource = {}; + if (Attribs.pIgnoreHistoryMaskTextureSRV) + BiasCurrentColorMaskResource = CreateNGXResourceVK(Attribs.pIgnoreHistoryMaskTextureSRV, VK_IMAGE_ASPECT_COLOR_BIT, false); + + NVSDK_NGX_VK_DLSS_Eval_Params EvalParams = {}; + EvalParams.Feature.pInColor = &ColorResource; + EvalParams.Feature.pInOutput = &OutputResource; + EvalParams.pInDepth = &DepthResource; + EvalParams.pInMotionVectors = &MotionResource; + EvalParams.pInExposureTexture = Attribs.pExposureTextureSRV ? &ExposureResource : nullptr; + EvalParams.pInTransparencyMask = Attribs.pReactiveMaskTextureSRV ? &TransparencyMaskResource : nullptr; + EvalParams.pInBiasCurrentColorMask = Attribs.pIgnoreHistoryMaskTextureSRV ? &BiasCurrentColorMaskResource : nullptr; + EvalParams.Feature.InSharpness = Attribs.Sharpness; + EvalParams.InJitterOffsetX = Attribs.JitterX; + EvalParams.InJitterOffsetY = Attribs.JitterY; + EvalParams.InReset = Attribs.ResetHistory ? 1 : 0; + EvalParams.InMVScaleX = Attribs.MotionVectorScaleX; + EvalParams.InMVScaleY = Attribs.MotionVectorScaleY; + EvalParams.InRenderSubrectDimensions.Width = m_Desc.InputWidth; + EvalParams.InRenderSubrectDimensions.Height = m_Desc.InputHeight; + EvalParams.InPreExposure = Attribs.PreExposure; + EvalParams.InExposureScale = Attribs.ExposureScale; + + NVSDK_NGX_Result Result = NGX_VULKAN_EVALUATE_DLSS_EXT(vkCmdBuffer, m_pDLSSFeature, m_pNGXParams, &EvalParams); + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_MESSAGE("DLSS Vulkan evaluation failed. NGX Result: ", static_cast(Result)); + } + +private: + void CreateFeature(const ExecuteSuperResolutionAttribs& Attribs) + { + Int32 DLSSCreateFeatureFlags = SuperResolutionFlagsToDLSSFeatureFlags(m_Desc.Flags); + if (Attribs.CameraNear > Attribs.CameraFar) + DLSSCreateFeatureFlags |= NVSDK_NGX_DLSS_Feature_Flags_DepthInverted; + + NVSDK_NGX_DLSS_Create_Params DLSSCreateParams = {}; + DLSSCreateParams.Feature.InWidth = m_Desc.InputWidth; + DLSSCreateParams.Feature.InHeight = m_Desc.InputHeight; + DLSSCreateParams.Feature.InTargetWidth = m_Desc.OutputWidth; + DLSSCreateParams.Feature.InTargetHeight = m_Desc.OutputHeight; + DLSSCreateParams.InFeatureCreateFlags = DLSSCreateFeatureFlags; + + VkCommandBuffer vkCmdBuffer = ClassPtrCast(Attribs.pContext)->GetVkCommandBuffer(); + NVSDK_NGX_Result Result = NGX_VULKAN_CREATE_DLSS_EXT(vkCmdBuffer, 1, 1, &m_pDLSSFeature, m_pNGXParams, &DLSSCreateParams); + + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("Failed to create DLSS Vulkan feature. NGX Result: ", static_cast(Result)); + } + + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; + +} // anonymous namespace + + +DLSSProviderVk::DLSSProviderVk(IRenderDevice* pDevice) : + m_pDevice{pDevice} +{ + RenderDeviceVkImpl* pDeviceVk = ClassPtrCast(pDevice); + VkInstance vkInstance = pDeviceVk->GetVkInstance(); + VkPhysicalDevice vkPhysDevice = pDeviceVk->GetVkPhysicalDevice(); + VkDevice vkDevice = pDeviceVk->GetVkDevice(); + + NVSDK_NGX_Result Result = NVSDK_NGX_VULKAN_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, vkInstance, vkPhysDevice, vkDevice); + + { + Uint32 ExtCount = 0; + VkExtensionProperties* pExtensions = nullptr; + NVSDK_NGX_FeatureDiscoveryInfo FeatureInfo = {}; + NVSDK_NGX_Result ExtResult = NVSDK_NGX_VULKAN_GetFeatureDeviceExtensionRequirements(vkInstance, vkPhysDevice, &FeatureInfo, &ExtCount, &pExtensions); + if (NVSDK_NGX_SUCCEED(ExtResult) && ExtCount > 0 && pExtensions != nullptr) + { + /* TODO: Need to implement IsExtensionEnabled in VulkanUtilities::LogicalDevice + const VulkanUtilities::LogicalDevice& LogicDevice = pDeviceVk->GetLogicalDevice(); + for (Uint32 ExtensionIdx = 0; ExtensionIdx < ExtCount; ++ExtensionIdx) + { + if (!LogicDevice.IsExtensionEnabled(pExtensions[ExtensionIdx].extensionName)) + { + LOG_ERROR_AND_THROW("DLSS requires Vulkan device extension '", pExtensions[ExtensionIdx].extensionName, + "' which is not supported by the physical device. " + "Enable it via EngineVkCreateInfo::ppDeviceExtensionNames."); + } + } + */ + } + } + + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("NVIDIA NGX Vulkan initialization failed. Result: ", static_cast(Result)); + + Result = NVSDK_NGX_VULKAN_GetCapabilityParameters(&m_pNGXParams); + if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) + LOG_ERROR_AND_THROW("Failed to get NGX Vulkan capability parameters. Result: ", static_cast(Result)); +} + +DLSSProviderVk::~DLSSProviderVk() +{ + if (m_pNGXParams != nullptr) + NVSDK_NGX_VULKAN_DestroyParameters(m_pNGXParams); + NVSDK_NGX_VULKAN_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetVkDevice()); +} + +void DLSSProviderVk::EnumerateVariants(std::vector& Variants) +{ + EnumerateDLSSVariants(m_pNGXParams, Variants); +} + +void DLSSProviderVk::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) +{ + GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); +} + +void DLSSProviderVk::CreateSuperResolution(const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) +{ + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + + SuperResolutionVk_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionVk_DLSS instance", SuperResolutionVk_DLSS)(m_pDevice, Desc, m_pNGXParams); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); +} + +} // namespace Diligent + +#else + +namespace Diligent +{ + +DLSSProviderVk::DLSSProviderVk(IRenderDevice*) +{ + LOG_INFO_MESSAGE("DLSS is not supported on this platform for Vulkan backend"); +} +DLSSProviderVk::~DLSSProviderVk() {} +void DLSSProviderVk::EnumerateVariants(std::vector&) {} +void DLSSProviderVk::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} +void DLSSProviderVk::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} + +} // namespace Diligent + +#endif diff --git a/Graphics/SuperResolution/src/DSRProviderD3D12.cpp b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp new file mode 100644 index 0000000000..b801fad01e --- /dev/null +++ b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp @@ -0,0 +1,393 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "DSRProviderD3D12.hpp" + +#if DILIGENT_DSR_SUPPORTED + +# include "SuperResolutionBase.hpp" +# include "../../GraphicsEngineD3D12/include/pch.h" + +# include + +# include "RenderDeviceD3D12Impl.hpp" +# include "DeviceContextD3D12Impl.hpp" +# include "DXGITypeConversions.hpp" + +namespace Diligent +{ + +namespace +{ + +CComPtr CreateDSRDevice(IRenderDevice* pDevice) +{ + HMODULE hD3D12 = GetModuleHandleA("d3d12.dll"); + if (!hD3D12) + { + LOG_WARNING_MESSAGE("d3d12.dll is not loaded. DirectSR features will be disabled."); + return {}; + } + + using D3D12GetInterfaceProcType = HRESULT(WINAPI*)(REFCLSID, REFIID, void**); + D3D12GetInterfaceProcType pfnD3D12GetInterface = reinterpret_cast(GetProcAddress(hD3D12, "D3D12GetInterface")); + if (!pfnD3D12GetInterface) + { + LOG_WARNING_MESSAGE("D3D12GetInterface is not available. DirectSR features will be disabled."); + return {}; + } + + CComPtr pDSRFactory; + if (HRESULT hr = pfnD3D12GetInterface(CLSID_D3D12DSRDeviceFactory, IID_PPV_ARGS(&pDSRFactory)); FAILED(hr)) + { + LOG_WARNING_MESSAGE("Failed to create DirectSR device factory. HRESULT: ", hr); + return {}; + } + + ID3D12Device* pd3d12Device = ClassPtrCast(pDevice)->GetD3D12Device(); + + CComPtr pDSRDevice; + if (HRESULT hr = pDSRFactory->CreateDSRDevice(pd3d12Device, 0, IID_PPV_ARGS(&pDSRDevice)); FAILED(hr)) + { + LOG_WARNING_MESSAGE("Failed to create DirectSR device. HRESULT: ", hr); + return {}; + } + + LOG_INFO_MESSAGE("DirectSR device initialized successfully. ", pDSRDevice->GetNumSuperResVariants(), " upscaler variant(s) found."); + return pDSRDevice; +} + +DSR_SUPERRES_CREATE_ENGINE_FLAGS SuperResolutionFlagsToDSRFlags(SUPER_RESOLUTION_FLAGS Flags) +{ + DSR_SUPERRES_CREATE_ENGINE_FLAGS DSRFlags = DSR_SUPERRES_CREATE_ENGINE_FLAG_NONE; + + if (Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) + DSRFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_AUTO_EXPOSURE; + if (Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) + DSRFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_ENABLE_SHARPENING; + + return DSRFlags; +} + +class SuperResolutionD3D12_DSR final : public SuperResolutionBase +{ +public: + SuperResolutionD3D12_DSR(IReferenceCounters* pRefCounters, + RenderDeviceD3D12Impl* pDevice, + const SuperResolutionDesc& Desc, + IDSRDevice* pDSRDevice); + + ~SuperResolutionD3D12_DSR(); + + virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final; + +private: + RefCntAutoPtr m_pDevice; + CComPtr m_pDSREngine; + std::vector> m_DSRUpscalers; +}; + +SuperResolutionD3D12_DSR::SuperResolutionD3D12_DSR(IReferenceCounters* pRefCounters, + RenderDeviceD3D12Impl* pDevice, + const SuperResolutionDesc& Desc, + IDSRDevice* pDSRDevice) : + SuperResolutionBase{pRefCounters, Desc}, + m_pDevice{pDevice}, + m_DSRUpscalers(pDevice->GetCommandQueueCount()) +{ + + ValidateTemporalSuperResolutionDesc(m_Desc); + VERIFY_SUPER_RESOLUTION(m_Desc.Name, Desc.MotionFormat == TEX_FORMAT_RG16_FLOAT, "MotionFormat must be TEX_FORMAT_RG16_FLOAT. Got: ", GetTextureFormatAttribs(Desc.MotionFormat).Name); + VERIFY_SUPER_RESOLUTION(m_Desc.Name, (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) != 0 || Desc.ExposureFormat != TEX_FORMAT_UNKNOWN, + "ExposureFormat must not be TEX_FORMAT_UNKNOWN when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set. " + "Either enable auto-exposure or specify a valid ExposureFormat (e.g. TEX_FORMAT_R32_FLOAT)."); + + VERIFY_SUPER_RESOLUTION(m_Desc.Name, pDSRDevice != nullptr, "DirectSR device is not available"); + + DSR_SUPERRES_CREATE_ENGINE_PARAMETERS CreateInfo = {}; + CreateInfo.VariantId = reinterpret_cast(Desc.VariantId); + CreateInfo.TargetFormat = TexFormatToDXGI_Format(Desc.OutputFormat); + CreateInfo.SourceColorFormat = TexFormatToDXGI_Format(Desc.ColorFormat); + CreateInfo.SourceDepthFormat = TexFormatToDXGI_Format(Desc.DepthFormat); + CreateInfo.ExposureScaleFormat = TexFormatToDXGI_Format(Desc.ExposureFormat); + CreateInfo.Flags = SuperResolutionFlagsToDSRFlags(Desc.Flags); + CreateInfo.MaxSourceSize = {Desc.InputWidth, Desc.InputHeight}; + CreateInfo.TargetSize = {Desc.OutputWidth, Desc.OutputHeight}; + + if (HRESULT hr = pDSRDevice->CreateSuperResEngine(&CreateInfo, IID_PPV_ARGS(&m_pDSREngine)); FAILED(hr)) + LOG_ERROR_AND_THROW("Failed to create DirectSR super resolution engine. HRESULT: ", hr); + + // Cache the optimal jitter pattern + { + DSR_SIZE SourceSize = {Desc.InputWidth, Desc.InputHeight}; + DSR_SIZE TargetSize = {Desc.OutputWidth, Desc.OutputHeight}; + Uint32 PatternSize = 0; + + if (HRESULT hr = m_pDSREngine->GetOptimalJitterPattern(SourceSize, TargetSize, &PatternSize, nullptr); SUCCEEDED(hr) && PatternSize > 0) + { + std::vector DSRPattern(PatternSize); + if (hr = m_pDSREngine->GetOptimalJitterPattern(SourceSize, TargetSize, &PatternSize, DSRPattern.data()); SUCCEEDED(hr)) + { + m_JitterPattern.resize(PatternSize); + for (Uint32 i = 0; i < PatternSize; ++i) + { + m_JitterPattern[i].X = DSRPattern[i].X; + m_JitterPattern[i].Y = DSRPattern[i].Y; + } + } + } + else + { + PopulateHaltonJitterPattern(m_JitterPattern, 64); + LOG_WARNING_MESSAGE("Failed to get optimal jitter pattern from DirectSR engine. HRESULT: ", hr); + } + } +} + +SuperResolutionD3D12_DSR::~SuperResolutionD3D12_DSR() = default; + +void DILIGENT_CALL_TYPE SuperResolutionD3D12_DSR::Execute(const ExecuteSuperResolutionAttribs& Attribs) +{ + ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraNear > 0, "CameraNear must be greater than zero for temporal upscaling"); + VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFar > 0, "CameraFar must be greater than zero for temporal upscaling."); + VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFovAngleVert > 0, "CameraFovAngleVert must be greater than zero for temporal upscaling."); + VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.TimeDeltaInSeconds >= 0, "TimeDeltaInSeconds must be non-negative."); + + DeviceContextD3D12Impl* pCtx = ClassPtrCast(Attribs.pContext); + + const SoftwareQueueIndex QueueId = pCtx->GetCommandQueueId(); + VERIFY_EXPR(static_cast(QueueId) < m_DSRUpscalers.size()); + + // Lazily create an upscaler for this queue on first use. + CComPtr& pDSRUpscaler = m_DSRUpscalers[static_cast(QueueId)]; + if (!pDSRUpscaler) + { + m_pDevice->LockCmdQueueAndRun(QueueId, [&](ICommandQueueD3D12* pCmdQueue) { + if (HRESULT hr = m_pDSREngine->CreateUpscaler(pCmdQueue->GetD3D12CommandQueue(), IID_PPV_ARGS(&pDSRUpscaler)); FAILED(hr)) + LOG_ERROR_AND_THROW("Failed to create DirectSR upscaler for queue ", static_cast(QueueId), ". HRESULT: ", hr); + }); + } + + auto GetD3D12Resource = [](ITextureView* pView) -> ID3D12Resource* { + if (pView != nullptr) + { + TextureD3D12Impl* pTexD3D12 = ClassPtrCast(pView->GetTexture()); + return pTexD3D12->GetD3D12Resource(); + } + return nullptr; + }; + + DSR_SUPERRES_UPSCALER_EXECUTE_PARAMETERS ExecuteParams = {}; + + ExecuteParams.pTargetTexture = GetD3D12Resource(Attribs.pOutputTextureView); + ExecuteParams.TargetRegion = {0, 0, static_cast(m_Desc.OutputWidth), static_cast(m_Desc.OutputHeight)}; + ExecuteParams.pSourceColorTexture = GetD3D12Resource(Attribs.pColorTextureSRV); + ExecuteParams.SourceColorRegion = {0, 0, static_cast(m_Desc.InputWidth), static_cast(m_Desc.InputHeight)}; + ExecuteParams.pSourceDepthTexture = GetD3D12Resource(Attribs.pDepthTextureSRV); + ExecuteParams.SourceDepthRegion = {0, 0, static_cast(m_Desc.InputWidth), static_cast(m_Desc.InputHeight)}; + ExecuteParams.pMotionVectorsTexture = GetD3D12Resource(Attribs.pMotionVectorsSRV); + ExecuteParams.MotionVectorsRegion = {0, 0, static_cast(m_Desc.InputWidth), static_cast(m_Desc.InputHeight)}; + ExecuteParams.MotionVectorScale = {Attribs.MotionVectorScaleX, Attribs.MotionVectorScaleY}; + ExecuteParams.CameraJitter = {Attribs.JitterX, Attribs.JitterY}; + ExecuteParams.ExposureScale = Attribs.ExposureScale; + ExecuteParams.PreExposure = Attribs.PreExposure; + ExecuteParams.Sharpness = Attribs.Sharpness; + ExecuteParams.CameraNear = Attribs.CameraNear; + ExecuteParams.CameraFar = Attribs.CameraFar; + ExecuteParams.CameraFovAngleVert = Attribs.CameraFovAngleVert; + ExecuteParams.pExposureScaleTexture = GetD3D12Resource(Attribs.pExposureTextureSRV); + ExecuteParams.pIgnoreHistoryMaskTexture = GetD3D12Resource(Attribs.pIgnoreHistoryMaskTextureSRV); + ExecuteParams.IgnoreHistoryMaskRegion = {0, 0, static_cast(m_Desc.InputWidth), static_cast(m_Desc.InputHeight)}; + ExecuteParams.pReactiveMaskTexture = GetD3D12Resource(Attribs.pReactiveMaskTextureSRV); + ExecuteParams.ReactiveMaskRegion = {0, 0, static_cast(m_Desc.InputWidth), static_cast(m_Desc.InputHeight)}; + + DSR_SUPERRES_UPSCALER_EXECUTE_FLAGS Flags = DSR_SUPERRES_UPSCALER_EXECUTE_FLAG_NONE; + if (Attribs.ResetHistory) + Flags |= DSR_SUPERRES_UPSCALER_EXECUTE_FLAG_RESET_HISTORY; + + // Transition all textures to the states expected by DirectSR and flush the context. + // DirectSR submits its own command list(s) to the command queue, so all rendering work must be submitted before DirectSR reads the inputs. + // Input textures must be in D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, output must be in D3D12_RESOURCE_STATE_UNORDERED_ACCESS. + DeviceContextD3D12Impl* pDeviceCtx = ClassPtrCast(Attribs.pContext); + pDeviceCtx->TransitionTextureState(Attribs.pColorTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + pDeviceCtx->TransitionTextureState(Attribs.pDepthTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + pDeviceCtx->TransitionTextureState(Attribs.pMotionVectorsSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + pDeviceCtx->TransitionTextureState(Attribs.pOutputTextureView->GetTexture(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + if (Attribs.pExposureTextureSRV) + pDeviceCtx->TransitionTextureState(Attribs.pExposureTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + if (Attribs.pReactiveMaskTextureSRV) + pDeviceCtx->TransitionTextureState(Attribs.pReactiveMaskTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + if (Attribs.pIgnoreHistoryMaskTextureSRV) + pDeviceCtx->TransitionTextureState(Attribs.pIgnoreHistoryMaskTextureSRV->GetTexture(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + pDeviceCtx->Flush(); + + if (HRESULT hr = pDSRUpscaler->Execute(&ExecuteParams, Attribs.TimeDeltaInSeconds, Flags); FAILED(hr)) + LOG_ERROR_MESSAGE("DirectSR Execute failed. HRESULT: ", hr); + + pDeviceCtx->TransitionTextureState(Attribs.pOutputTextureView->GetTexture(), D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE); +} + +} // anonymous namespace + +DSRProviderD3D12::DSRProviderD3D12(IRenderDevice* pDevice) : + m_pDevice{pDevice}, + m_pDSRDevice{CreateDSRDevice(pDevice).Detach()} +{ +} + +DSRProviderD3D12::~DSRProviderD3D12() +{ + if (m_pDSRDevice) + m_pDSRDevice->Release(); +} + +void DSRProviderD3D12::EnumerateVariants(std::vector& Variants) +{ + if (!m_pDSRDevice) + return; + + static_assert(sizeof(SuperResolutionInfo::VariantId) == sizeof(DSR_SUPERRES_VARIANT_DESC::VariantId), "GUID/INTERFACE_ID size mismatch"); + + const Uint32 DSRNumVariants = m_pDSRDevice->GetNumSuperResVariants(); + for (Uint32 Idx = 0; Idx < DSRNumVariants; ++Idx) + { + DSR_SUPERRES_VARIANT_DESC VariantDesc = {}; + if (FAILED(m_pDSRDevice->GetSuperResVariantDesc(Idx, &VariantDesc))) + continue; + + SuperResolutionInfo Info{}; + Info.Type = SUPER_RESOLUTION_TYPE_TEMPORAL; + + Info.TemporalCapFlags = SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_NATIVE; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_EXPOSURE_SCALE_TEXTURE) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_EXPOSURE_SCALE_TEXTURE; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_IGNORE_HISTORY_MASK) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_IGNORE_HISTORY_MASK; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_REACTIVE_MASK) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_REACTIVE_MASK; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_SHARPNESS) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS; + + snprintf(Info.Name, sizeof(Info.Name), "DSR: %s", VariantDesc.VariantName); + memcpy(&Info.VariantId, &VariantDesc.VariantId, sizeof(Info.VariantId)); + + Variants.push_back(Info); + } +} + +void DSRProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) +{ + Settings = {}; + + DEV_CHECK_ERR(m_pDSRDevice != nullptr, "DirectSR device must not be null"); + ValidateSourceSettingsAttribs(Attribs); + + DSR_OPTIMIZATION_TYPE DSROptType = DSR_OPTIMIZATION_TYPE_BALANCED; + switch (Attribs.OptimizationType) + { + // clang-format off + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY: DSROptType = DSR_OPTIMIZATION_TYPE_MAX_QUALITY; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_QUALITY: DSROptType = DSR_OPTIMIZATION_TYPE_HIGH_QUALITY; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED: DSROptType = DSR_OPTIMIZATION_TYPE_BALANCED; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE: DSROptType = DSR_OPTIMIZATION_TYPE_HIGH_PERFORMANCE; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE: DSROptType = DSR_OPTIMIZATION_TYPE_MAX_PERFORMANCE; break; + default: break; + // clang-format on + } + + const Uint32 NumVariants = m_pDSRDevice->GetNumSuperResVariants(); + Uint32 VariantIndex = UINT32_MAX; + for (Uint32 Idx = 0; Idx < NumVariants; ++Idx) + { + DSR_SUPERRES_VARIANT_DESC VariantDesc = {}; + if (SUCCEEDED(m_pDSRDevice->GetSuperResVariantDesc(Idx, &VariantDesc))) + { + if (memcmp(&VariantDesc.VariantId, &Attribs.VariantId, sizeof(GUID)) == 0) + { + VariantIndex = Idx; + break; + } + } + } + + if (VariantIndex == UINT32_MAX) + { + LOG_WARNING_MESSAGE("DirectSR variant not found for the specified VariantId"); + return; + } + + DSR_SIZE TargetSize = {Attribs.OutputWidth, Attribs.OutputHeight}; + + DSR_SUPERRES_CREATE_ENGINE_FLAGS DSRCreateFlags = DSR_SUPERRES_CREATE_ENGINE_FLAG_NONE; + if (Attribs.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) + DSRCreateFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_AUTO_EXPOSURE; + if (Attribs.Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) + DSRCreateFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_ENABLE_SHARPENING; + + DSR_SUPERRES_SOURCE_SETTINGS SourceSettings = {}; + if (HRESULT hr = m_pDSRDevice->QuerySuperResSourceSettings(VariantIndex, TargetSize, TexFormatToDXGI_Format(Attribs.OutputFormat), DSROptType, DSRCreateFlags, &SourceSettings); SUCCEEDED(hr)) + { + Settings.OptimalInputWidth = SourceSettings.OptimalSize.Width; + Settings.OptimalInputHeight = SourceSettings.OptimalSize.Height; + } + else + { + LOG_WARNING_MESSAGE("DirectSR QuerySuperResSourceSettings failed. HRESULT: ", hr); + } +} + +void DSRProviderD3D12::CreateSuperResolution(const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) +{ + DEV_CHECK_ERR(m_pDSRDevice != nullptr, "DirectSR device must not be null"); + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); + + RenderDeviceD3D12Impl* pDeviceD3D12 = ClassPtrCast(m_pDevice.RawPtr()); + SuperResolutionD3D12_DSR* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DSR instance", SuperResolutionD3D12_DSR)(pDeviceD3D12, Desc, m_pDSRDevice); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); +} + +} // namespace Diligent + +#else + +namespace Diligent +{ + +DSRProviderD3D12::DSRProviderD3D12(IRenderDevice*) +{ + LOG_INFO_MESSAGE("DirectSR is not supported on this platform"); +} +DSRProviderD3D12::~DSRProviderD3D12() {} +void DSRProviderD3D12::EnumerateVariants(std::vector&) {} +void DSRProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} +void DSRProviderD3D12::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} + +} // namespace Diligent + +#endif diff --git a/Graphics/SuperResolution/src/SuperResolutionBase.cpp b/Graphics/SuperResolution/src/SuperResolutionBase.cpp new file mode 100644 index 0000000000..4e0c2bcba3 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionBase.cpp @@ -0,0 +1,218 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionBase.hpp" + +namespace Diligent +{ + +namespace +{ + +float HaltonSequence(Uint32 Base, Uint32 Index) +{ + float Result = 0.0f; + float Frac = 1.0f / static_cast(Base); + Uint32 Idx = Index + 1; + while (Idx > 0) + { + Result += Frac * static_cast(Idx % Base); + Idx /= Base; + Frac /= static_cast(Base); + } + return Result; +} + +} // namespace + +void PopulateHaltonJitterPattern(std::vector& JitterPattern, Uint32 PatternSize) +{ + JitterPattern.resize(PatternSize); + for (Uint32 Idx = 0; Idx < PatternSize; ++Idx) + { + JitterPattern[Idx].X = HaltonSequence(2, Idx) - 0.5f; + JitterPattern[Idx].Y = HaltonSequence(3, Idx) - 0.5f; + } +} + +void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& Attribs) noexcept(false) +{ + if (Attribs.OutputWidth == 0 || Attribs.OutputHeight == 0) + LOG_ERROR_AND_THROW("Output resolution must be greater than zero"); + if (Attribs.OptimizationType >= SUPER_RESOLUTION_OPTIMIZATION_TYPE_COUNT) + LOG_ERROR_AND_THROW("Invalid optimization type"); +} + +void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false) +{ + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.OutputWidth > 0 && Desc.OutputHeight > 0, "Output resolution must be greater than zero"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.OutputFormat != TEX_FORMAT_UNKNOWN, "OutputFormat must not be TEX_FORMAT_UNKNOWN"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.ColorFormat != TEX_FORMAT_UNKNOWN, "ColorFormat must not be TEX_FORMAT_UNKNOWN"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.InputWidth > 0 && Desc.InputHeight > 0, "InputWidth and InputHeight must be greater than zero"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.InputWidth <= Desc.OutputWidth && Desc.InputHeight <= Desc.OutputHeight, + "Input resolution must not exceed output resolution"); +} + +void ValidateTemporalSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false) +{ + ValidateSuperResolutionDesc(Desc); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.DepthFormat != TEX_FORMAT_UNKNOWN, "DepthFormat must not be TEX_FORMAT_UNKNOWN for temporal upscaling"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.MotionFormat != TEX_FORMAT_UNKNOWN, "MotionFormat must not be TEX_FORMAT_UNKNOWN for temporal upscaling"); +} + +void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, + const ExecuteSuperResolutionAttribs& Attribs) noexcept(false) +{ + VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pContext != nullptr, "Device context must not be null"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pColorTextureSRV != nullptr, "Color texture SRV must not be null"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pOutputTextureView != nullptr, "Output texture view must not be null"); + + // Validate color texture + if (Attribs.pColorTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pColorTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pColorTextureSRV->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Color texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Color texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ColorFormat, + "Color texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ColorFormat (", GetTextureFormatAttribs(Desc.ColorFormat).Name, ")"); + } + + // Validate output texture + if (Attribs.pOutputTextureView != nullptr) + { + const TextureDesc& TexDesc = Attribs.pOutputTextureView->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pOutputTextureView->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_RENDER_TARGET || ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, + "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_RENDER_TARGET or TEXTURE_VIEW_UNORDERED_ACCESS"); + VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == Desc.OutputWidth && TexDesc.Height == Desc.OutputHeight, + "Output texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must match the upscaler output resolution (", Desc.OutputWidth, "x", Desc.OutputHeight, ")"); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.OutputFormat, + "Output texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected OutputFormat (", GetTextureFormatAttribs(Desc.OutputFormat).Name, ")"); + } +} + +void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, + const ExecuteSuperResolutionAttribs& Attribs) noexcept(false) +{ + ValidateExecuteSuperResolutionAttribs(Desc, Attribs); + + VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pDepthTextureSRV != nullptr, "Depth texture SRV must not be null for temporal upscaling"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pMotionVectorsSRV != nullptr, "Motion vectors SRV must not be null for temporal upscaling"); + VERIFY_SUPER_RESOLUTION(Desc.Name, (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) != 0 || Attribs.pExposureTextureSRV != nullptr, + "Exposure texture SRV must not be null when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set"); + + // Validate output texture view type (DirectSR requires UAV) + if (Attribs.pOutputTextureView != nullptr) + { + const TextureDesc& TexDesc = Attribs.pOutputTextureView->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pOutputTextureView->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, + "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_UNORDERED_ACCESS"); + } + + // Validate depth texture + if (Attribs.pDepthTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pDepthTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pDepthTextureSRV->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Depth texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Depth texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.DepthFormat, + "Depth texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected DepthFormat (", GetTextureFormatAttribs(Desc.DepthFormat).Name, ")"); + } + + // Validate motion vectors texture + if (Attribs.pMotionVectorsSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pMotionVectorsSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pMotionVectorsSRV->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Motion vectors view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Motion vectors texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.MotionFormat, + "Motion vectors view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected MotionFormat (", GetTextureFormatAttribs(Desc.MotionFormat).Name, ")"); + } + + // Validate exposure texture + if (Attribs.pExposureTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pExposureTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pExposureTextureSRV->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Exposure texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == 1 && TexDesc.Height == 1, + "Exposure texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be 1x1"); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ExposureFormat, + "Exposure texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ExposureFormat (", GetTextureFormatAttribs(Desc.ExposureFormat).Name, ")"); + } + + // Validate reactive mask texture + if (Attribs.pReactiveMaskTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pReactiveMaskTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pReactiveMaskTextureSRV->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Reactive mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.ReactiveMaskFormat != TEX_FORMAT_UNKNOWN, + "Reactive mask texture '", TexDesc.Name, "' provided but ReactiveMaskFormat was not set in SuperResolutionDesc"); + VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Reactive mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ReactiveMaskFormat, + "Reactive mask view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ReactiveMaskFormat (", GetTextureFormatAttribs(Desc.ReactiveMaskFormat).Name, ")"); + } + + // Validate ignore history mask texture + if (Attribs.pIgnoreHistoryMaskTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetDesc(); + VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Ignore history mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Ignore history mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + } +} + +} // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionDLSS.cpp b/Graphics/SuperResolution/src/SuperResolutionDLSS.cpp new file mode 100644 index 0000000000..e2a7b9cce7 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionDLSS.cpp @@ -0,0 +1,149 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionDLSS.hpp" +#include "SuperResolutionBase.hpp" +#include "SuperResolutionVariants.hpp" +#include "DebugUtilities.hpp" + +#include +#include +#include + +namespace Diligent +{ + +const char* DLSSProjectId = "750fed3a-efba-42ba-801b-22d4cbad9148"; +const wchar_t* DLSSAppDataPath = L"."; + +NVSDK_NGX_PerfQuality_Value OptimizationTypeToNGXPerfQuality(SUPER_RESOLUTION_OPTIMIZATION_TYPE Type) +{ + switch (Type) + { + // clang-format off + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY: return NVSDK_NGX_PerfQuality_Value_UltraQuality; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_QUALITY: return NVSDK_NGX_PerfQuality_Value_MaxQuality; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED: return NVSDK_NGX_PerfQuality_Value_Balanced; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE: return NVSDK_NGX_PerfQuality_Value_MaxPerf; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE: return NVSDK_NGX_PerfQuality_Value_UltraPerformance; + default: return NVSDK_NGX_PerfQuality_Value_Balanced; + // clang-format on + } +} + +Int32 SuperResolutionFlagsToDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags) +{ + Int32 DLSSFlags = NVSDK_NGX_DLSS_Feature_Flags_None; + + if (Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) + DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_AutoExposure; + if (Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) + DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_DoSharpening; + + DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_MVLowRes; + DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_IsHDR; + + return DLSSFlags; +} + +void EnumerateDLSSVariants(NVSDK_NGX_Parameter* pNGXParams, std::vector& Variants) +{ + DEV_CHECK_ERR(pNGXParams != nullptr, "NGX parameters must not be null"); + + Int32 NeedsUpdatedDriver = 0; + NVSDK_NGX_Parameter_GetI(pNGXParams, NVSDK_NGX_Parameter_SuperSampling_NeedsUpdatedDriver, &NeedsUpdatedDriver); + if (NeedsUpdatedDriver) + LOG_WARNING_MESSAGE("NVIDIA DLSS requires an updated driver."); + + Int32 DLSSAvailable = 0; + NVSDK_NGX_Result Result = NVSDK_NGX_Parameter_GetI(pNGXParams, NVSDK_NGX_Parameter_SuperSampling_Available, &DLSSAvailable); + if (NVSDK_NGX_FAILED(Result)) + { + LOG_WARNING_MESSAGE("Failed to query DLSS availability. Result: ", static_cast(Result)); + return; + } + + if (DLSSAvailable) + { + SuperResolutionInfo Info{}; + Info.Type = SUPER_RESOLUTION_TYPE_TEMPORAL; + Info.TemporalCapFlags = SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_NATIVE | + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_EXPOSURE_SCALE_TEXTURE | + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_IGNORE_HISTORY_MASK | + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_REACTIVE_MASK | + SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS; + + snprintf(Info.Name, sizeof(Info.Name), "NGX: DLSS"); + Info.VariantId = VariantId_DLSS; + + Variants.push_back(Info); + LOG_INFO_MESSAGE("NVIDIA DLSS is available: ", Info.Name); + } + else + { + LOG_INFO_MESSAGE("NVIDIA DLSS is not available on this hardware."); + } +} + +void GetDLSSSourceSettings(NVSDK_NGX_Parameter* pNGXParams, + const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) +{ + Settings = {}; + + ValidateSourceSettingsAttribs(Attribs); + + NVSDK_NGX_PerfQuality_Value PerfQuality = OptimizationTypeToNGXPerfQuality(Attribs.OptimizationType); + + Uint32 OptimalWidth = 0; + Uint32 OptimalHeight = 0; + Uint32 MaxWidth = 0; + Uint32 MaxHeight = 0; + Uint32 MinWidth = 0; + Uint32 MinHeight = 0; + float Sharpness = 0.0f; + + NVSDK_NGX_Result Result = NGX_DLSS_GET_OPTIMAL_SETTINGS( + pNGXParams, + Attribs.OutputWidth, Attribs.OutputHeight, + PerfQuality, + &OptimalWidth, &OptimalHeight, + &MaxWidth, &MaxHeight, + &MinWidth, &MinHeight, + &Sharpness); + + if (NVSDK_NGX_SUCCEED(Result) && OptimalWidth > 0 && OptimalHeight > 0) + { + Settings.OptimalInputWidth = OptimalWidth; + Settings.OptimalInputHeight = OptimalHeight; + } + else + { + LOG_WARNING_MESSAGE("Failed to get DLSS optimal settings. Result: ", static_cast(Result)); + } +} + +} // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp new file mode 100644 index 0000000000..790c9899b0 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp @@ -0,0 +1,86 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionFactoryLoader.h" +#include "DebugUtilities.hpp" + +namespace Diligent +{ + +void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); +void CreateSuperResolutionFactoryD3D11(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); +void CreateSuperResolutionFactoryVk(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); +void CreateSuperResolutionFactoryMtl(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); + +API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) +{ + DEV_CHECK_ERR(ppFactory != nullptr, "ppFactory must not be null"); + if (ppFactory == nullptr) + return; + + *ppFactory = nullptr; + + DEV_CHECK_ERR(pDevice != nullptr, "pDevice must not be null"); + if (pDevice == nullptr) + return; + + try + { + switch (pDevice->GetDeviceInfo().Type) + { + case RENDER_DEVICE_TYPE_D3D12: + CreateSuperResolutionFactoryD3D12(pDevice, ppFactory); + break; + case RENDER_DEVICE_TYPE_D3D11: + CreateSuperResolutionFactoryD3D11(pDevice, ppFactory); + break; + case RENDER_DEVICE_TYPE_VULKAN: + CreateSuperResolutionFactoryVk(pDevice, ppFactory); + break; + case RENDER_DEVICE_TYPE_METAL: + CreateSuperResolutionFactoryMtl(pDevice, ppFactory); + break; + default: + break; + } + } + catch (...) + { + LOG_ERROR("Failed to create super resolution factory"); + } +} + +} // namespace Diligent + +extern "C" +{ + API_QUALIFIER + void Diligent_CreateSuperResolutionFactory(Diligent::IRenderDevice* pDevice, + Diligent::ISuperResolutionFactory** ppFactory) + { + Diligent::CreateSuperResolutionFactory(pDevice, ppFactory); + } +} diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp index f578c46345..df41689d70 100644 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp @@ -25,108 +25,108 @@ */ #include "SuperResolutionFactoryBase.hpp" - -#include "SuperResolutionFactoryLoader.h" - -#include "PlatformDebug.hpp" #include "EngineMemory.h" +#include "PlatformDebug.hpp" +#include "DebugUtilities.hpp" namespace Diligent { -#if D3D12_SUPPORTED -void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); -#endif - -#if METAL_SUPPORTED -void CreateSuperResolutionFactoryMtl(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); -#endif +SuperResolutionFactoryBase::SuperResolutionFactoryBase(IReferenceCounters* pRefCounters) : + TBase{pRefCounters} +{ +} -SuperResolutionFactoryBase::SuperResolutionFactoryBase(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : - TBase{pRefCounters}, - m_pDevice{pDevice} +BackendEntry* SuperResolutionFactoryBase::FindBackend(const INTERFACE_ID& VariantId) const { + for (const BackendEntry& Entry : m_Backends) + { + for (const SuperResolutionInfo& Info : Entry.Variants) + { + if (Info.VariantId == VariantId) + return const_cast(&Entry); + } + } + return nullptr; } void SuperResolutionFactoryBase::EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) { + Uint32 Count = 0; + for (const BackendEntry& Entry : m_Backends) + Count += static_cast(Entry.Variants.size()); + if (Variants == nullptr) { - NumVariants = static_cast(m_Variants.size()); + NumVariants = Count; return; } - NumVariants = std::min(NumVariants, static_cast(m_Variants.size())); - memcpy(Variants, m_Variants.data(), NumVariants * sizeof(SuperResolutionInfo)); + const Uint32 MaxVariants = NumVariants; + NumVariants = 0; + for (const BackendEntry& Entry : m_Backends) + { + for (const SuperResolutionInfo& Info : Entry.Variants) + { + if (NumVariants >= MaxVariants) + return; + Variants[NumVariants++] = Info; + } + } } -void SuperResolutionFactoryBase::SetMessageCallback(DebugMessageCallbackType MessageCallback) const +void SuperResolutionFactoryBase::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const { - SetDebugMessageCallback(MessageCallback); -} + Settings = {}; -void SuperResolutionFactoryBase::SetBreakOnError(bool BreakOnError) const -{ - PlatformDebug::SetBreakOnError(BreakOnError); -} + BackendEntry* pEntry = FindBackend(Attribs.VariantId); + if (pEntry == nullptr) + { + LOG_WARNING_MESSAGE("Super resolution variant not found for the specified VariantId"); + return; + } -void SuperResolutionFactoryBase::SetMemoryAllocator(IMemoryAllocator* pAllocator) const -{ - SetRawAllocator(pAllocator); + pEntry->pBackend->GetSourceSettings(Attribs, Settings); } - -API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) +void SuperResolutionFactoryBase::CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) { - if (ppFactory == nullptr) - { - DEV_ERROR("ppFactory must not be null"); + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + if (ppUpscaler == nullptr) return; - } - DEV_CHECK_ERR(*ppFactory == nullptr, "ppFactory is not null. Overwriting it may cause memory leak"); - *ppFactory = nullptr; - if (pDevice == nullptr) + *ppUpscaler = nullptr; + + BackendEntry* pEntry = FindBackend(Desc.VariantId); + if (pEntry == nullptr) { - DEV_ERROR("pDevice must not be null"); + LOG_ERROR_MESSAGE("Super resolution variant not found for the specified VariantId. Call EnumerateVariants() to get valid variant IDs."); return; } - RENDER_DEVICE_TYPE DeviceType = pDevice->GetDeviceInfo().Type; try { - switch (DeviceType) - { - case RENDER_DEVICE_TYPE_D3D12: -#if D3D12_SUPPORTED - CreateSuperResolutionFactoryD3D12(pDevice, ppFactory); -#endif - break; - - case RENDER_DEVICE_TYPE_METAL: -#if METAL_SUPPORTED - CreateSuperResolutionFactoryMtl(pDevice, ppFactory); -#endif - break; - - default: - LOG_ERROR_MESSAGE("Super resolution is not supported on this device type: ", DeviceType); - } + pEntry->pBackend->CreateSuperResolution(Desc, ppUpscaler); } catch (...) { - LOG_ERROR("Failed to create super resolution factory"); + LOG_ERROR("Failed to create super resolution upscaler '", (Desc.Name ? Desc.Name : ""), "'"); } } -} // namespace Diligent +void SuperResolutionFactoryBase::SetMessageCallback(DebugMessageCallbackType MessageCallback) const +{ + SetDebugMessageCallback(MessageCallback); +} -extern "C" +void SuperResolutionFactoryBase::SetBreakOnError(bool BreakOnError) const { - API_QUALIFIER - void Diligent_CreateSuperResolutionFactory(Diligent::IRenderDevice* pDevice, - Diligent::ISuperResolutionFactory** ppFactory) - { - Diligent::CreateSuperResolutionFactory(pDevice, ppFactory); - } + PlatformDebug::SetBreakOnError(BreakOnError); +} + +void SuperResolutionFactoryBase::SetMemoryAllocator(IMemoryAllocator* pAllocator) const +{ + SetRawAllocator(pAllocator); } + +} // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp new file mode 100644 index 0000000000..585a4d1f87 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp @@ -0,0 +1,50 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionFactoryBase.hpp" +#include "DLSSProviderD3D11.hpp" +#include "EngineMemory.h" + +namespace Diligent +{ + +class SuperResolutionFactoryD3D11 final : public SuperResolutionFactoryBase +{ +public: + SuperResolutionFactoryD3D11(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : + SuperResolutionFactoryBase(pRefCounters) + { + AddBackend(pDevice); + } +}; + +void CreateSuperResolutionFactoryD3D11(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) +{ + auto* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryD3D11 instance", SuperResolutionFactoryD3D11)(pDevice); + pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); +} + +} // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp index 6e9eaa3d0b..7b83a8a90c 100644 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp @@ -25,59 +25,27 @@ */ #include "SuperResolutionFactoryBase.hpp" - +#include "DLSSProviderD3D12.hpp" +#include "DSRProviderD3D12.hpp" #include "EngineMemory.h" -#include "DebugUtilities.hpp" namespace Diligent { -namespace -{ - class SuperResolutionFactoryD3D12 final : public SuperResolutionFactoryBase { public: - using TBase = SuperResolutionFactoryBase; - - SuperResolutionFactoryD3D12(IReferenceCounters* pRefCounters, IRenderDevice* pDevice); - - virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const override final; - - virtual void DILIGENT_CALL_TYPE CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final; - -private: - void PopulateVariants(); + SuperResolutionFactoryD3D12(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : + SuperResolutionFactoryBase(pRefCounters) + { + AddBackend(pDevice); + AddBackend(pDevice); + } }; - -SuperResolutionFactoryD3D12::SuperResolutionFactoryD3D12(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : - TBase{pRefCounters, pDevice} -{ - PopulateVariants(); -} - -void SuperResolutionFactoryD3D12::PopulateVariants() -{ -} - -void SuperResolutionFactoryD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const -{ - Settings = {}; -} - -void SuperResolutionFactoryD3D12::CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) -{ -} - -} // namespace - void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) { - VERIFY(pDevice != nullptr, "pDevice must not be null"); - VERIFY(pDevice->GetDeviceInfo().Type == RENDER_DEVICE_TYPE_D3D12, "Expected a D3D12 device"); - - SuperResolutionFactoryD3D12* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryD3D12 instance", SuperResolutionFactoryD3D12)(pDevice); + auto* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryD3D12 instance", SuperResolutionFactoryD3D12)(pDevice); pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); } diff --git a/Graphics/SuperResolution/src/SuperResolutionD3D12.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryMtl.cpp similarity index 87% rename from Graphics/SuperResolution/src/SuperResolutionD3D12.cpp rename to Graphics/SuperResolution/src/SuperResolutionFactoryMtl.cpp index 92fcc323a6..ce618ffb40 100644 --- a/Graphics/SuperResolution/src/SuperResolutionD3D12.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryMtl.cpp @@ -24,9 +24,18 @@ * of the possibility of such damages. */ -#include "SuperResolution.h" +#include "SuperResolutionFactory.h" + +#if !METAL_SUPPORTED namespace Diligent { +void CreateSuperResolutionFactoryMtl(IRenderDevice* /*pDevice*/, ISuperResolutionFactory** ppFactory) +{ + *ppFactory = nullptr; +} + } // namespace Diligent + +#endif diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp new file mode 100644 index 0000000000..76bbe2db35 --- /dev/null +++ b/Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp @@ -0,0 +1,50 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionFactoryBase.hpp" +#include "DLSSProviderVk.hpp" +#include "EngineMemory.h" + +namespace Diligent +{ + +class SuperResolutionFactoryVk final : public SuperResolutionFactoryBase +{ +public: + SuperResolutionFactoryVk(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : + SuperResolutionFactoryBase(pRefCounters) + { + AddBackend(pDevice); + } +}; + +void CreateSuperResolutionFactoryVk(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) +{ + auto* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryVk instance", SuperResolutionFactoryVk)(pDevice); + pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); +} + +} // namespace Diligent diff --git a/Tests/DiligentCoreAPITest/CMakeLists.txt b/Tests/DiligentCoreAPITest/CMakeLists.txt index 89b20f6dda..582162170b 100644 --- a/Tests/DiligentCoreAPITest/CMakeLists.txt +++ b/Tests/DiligentCoreAPITest/CMakeLists.txt @@ -121,6 +121,10 @@ PRIVATE if(SUPER_RESOLUTION_SUPPORTED) target_link_libraries(DiligentCoreAPITest PRIVATE Diligent-SuperResolution-static) + if(DILIGENT_DSR_SUPPORTED) + target_link_libraries(DiligentCoreAPITest PRIVATE DirectSR-AgilitySDK) + copy_directsr_dlls(DiligentCoreAPITest) + endif() endif() if(TARGET Diligent-HLSL2GLSLConverterLib) diff --git a/Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp b/Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp new file mode 100644 index 0000000000..d017aee3fc --- /dev/null +++ b/Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp @@ -0,0 +1,423 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "GPUTestingEnvironment.hpp" +#include "GraphicsAccessories.hpp" +#include "SuperResolutionFactory.h" +#include "SuperResolutionFactoryLoader.h" + +#include "gtest/gtest.h" + +using namespace Diligent; +using namespace Diligent::Testing; + +extern "C" +{ + int TestSuperResolutionCInterface(void* pUpscaler); + int TestSuperResolutionFactoryCInterface(void* pFactory); +} + +namespace +{ + +static ISuperResolutionFactory* GetFactory() +{ + auto* pDevice = GPUTestingEnvironment::GetInstance()->GetDevice(); + static RefCntAutoPtr pFactory; + if (!pFactory) + LoadAndCreateSuperResolutionFactory(pDevice, &pFactory); + return pFactory; +} + +static const SuperResolutionInfo* FindVariantByType(const SuperResolutionInfo* pVariants, Uint32 NumVariants, SUPER_RESOLUTION_TYPE Type) +{ + for (Uint32 VariantIdx = 0; VariantIdx < NumVariants; ++VariantIdx) + { + if (pVariants[VariantIdx].Type == Type) + return &pVariants[VariantIdx]; + } + return nullptr; +} + +TEST(SuperResolutionTest, EnumerateVariants) +{ + auto* pFactory = GetFactory(); + ASSERT_NE(pFactory, nullptr); + + Uint32 NumVariants = 0; + pFactory->EnumerateVariants(NumVariants, nullptr); + if (NumVariants == 0) + { + GTEST_SKIP() << "No super resolution variants available on this device"; + } + + std::vector Variants(NumVariants); + pFactory->EnumerateVariants(NumVariants, Variants.data()); + + for (Uint32 VariantIdx = 0; VariantIdx < NumVariants; ++VariantIdx) + { + EXPECT_NE(Variants[VariantIdx].Name[0], '\0') << "Variant " << VariantIdx << " has empty name"; + EXPECT_NE(Variants[VariantIdx].VariantId, IID_Unknown) << "Variant " << VariantIdx << " has unknown UID"; + } +} + +TEST(SuperResolutionTest, QuerySourceSettings) +{ + auto* pFactory = GetFactory(); + ASSERT_NE(pFactory, nullptr); + + Uint32 NumVariants = 0; + pFactory->EnumerateVariants(NumVariants, nullptr); + if (NumVariants == 0) + { + GTEST_SKIP() << "No super resolution variants available on this device"; + } + + std::vector Variants(NumVariants); + pFactory->EnumerateVariants(NumVariants, Variants.data()); + + for (Uint32 VariantIdx = 0; VariantIdx < NumVariants; ++VariantIdx) + { + SuperResolutionSourceSettingsAttribs Attribs; + Attribs.VariantId = Variants[VariantIdx].VariantId; + Attribs.OutputWidth = 1920; + Attribs.OutputHeight = 1080; + Attribs.OptimizationType = SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED; + Attribs.OutputFormat = TEX_FORMAT_RGBA16_FLOAT; + Attribs.Flags = SUPER_RESOLUTION_FLAG_NONE; + + SuperResolutionSourceSettings Settings; + pFactory->GetSourceSettings(Attribs, Settings); + + EXPECT_GT(Settings.OptimalInputWidth, 0u) << "Variant " << Variants[VariantIdx].Name; + EXPECT_GT(Settings.OptimalInputHeight, 0u) << "Variant " << Variants[VariantIdx].Name; + EXPECT_LE(Settings.OptimalInputWidth, 1920u) << "Variant " << Variants[VariantIdx].Name; + EXPECT_LE(Settings.OptimalInputHeight, 1080u) << "Variant " << Variants[VariantIdx].Name; + } + + // Test all optimization types produce monotonically decreasing input resolution + // (enum is ordered from MAX_QUALITY=0 to MAX_PERFORMANCE) + { + const auto& Variant = Variants[0]; + + Uint32 PrevWidth = 0; + for (Uint8 OptimizationType = SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY; OptimizationType < SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE; ++OptimizationType) + { + SuperResolutionSourceSettingsAttribs Attribs; + Attribs.VariantId = Variant.VariantId; + Attribs.OutputWidth = 1920; + Attribs.OutputHeight = 1080; + Attribs.OptimizationType = static_cast(OptimizationType); + Attribs.OutputFormat = TEX_FORMAT_RGBA16_FLOAT; + Attribs.Flags = SUPER_RESOLUTION_FLAG_NONE; + + SuperResolutionSourceSettings Settings; + pFactory->GetSourceSettings(Attribs, Settings); + + // First iteration: just record. Subsequent: input should decrease or stay same. + if (PrevWidth > 0) + EXPECT_LE(Settings.OptimalInputWidth, PrevWidth) << "OptimizationType " << OptimizationType; + PrevWidth = Settings.OptimalInputWidth; + } + } +} + +TEST(SuperResolutionTest, CreateTemporalUpscaler) +{ + auto* pFactory = GetFactory(); + ASSERT_NE(pFactory, nullptr); + + Uint32 NumVariants = 0; + pFactory->EnumerateVariants(NumVariants, nullptr); + if (NumVariants == 0) + { + GTEST_SKIP() << "No super resolution variants available on this device"; + } + + std::vector Variants(NumVariants); + pFactory->EnumerateVariants(NumVariants, Variants.data()); + + const auto* pTemporalInfo = FindVariantByType(Variants.data(), NumVariants, SUPER_RESOLUTION_TYPE_TEMPORAL); + if (pTemporalInfo == nullptr) + { + GTEST_SKIP() << "Temporal super resolution is not supported by this device"; + } + + GPUTestingEnvironment::ScopedReset EnvironmentAutoReset; + + // Query optimal input resolution + SuperResolutionSourceSettingsAttribs QueryAttribs; + QueryAttribs.VariantId = pTemporalInfo->VariantId; + QueryAttribs.OutputWidth = 1920; + QueryAttribs.OutputHeight = 1080; + QueryAttribs.OptimizationType = SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED; + QueryAttribs.OutputFormat = TEX_FORMAT_RGBA16_FLOAT; + QueryAttribs.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; + + SuperResolutionSourceSettings SourceSettings; + pFactory->GetSourceSettings(QueryAttribs, SourceSettings); + ASSERT_GT(SourceSettings.OptimalInputWidth, 0u); + ASSERT_GT(SourceSettings.OptimalInputHeight, 0u); + + SuperResolutionDesc Desc; + Desc.Name = "Test Temporal Upscaler"; + Desc.VariantId = pTemporalInfo->VariantId; + Desc.OutputWidth = QueryAttribs.OutputWidth; + Desc.OutputHeight = QueryAttribs.OutputHeight; + Desc.OutputFormat = QueryAttribs.OutputFormat; + Desc.InputWidth = SourceSettings.OptimalInputWidth; + Desc.InputHeight = SourceSettings.OptimalInputHeight; + Desc.ColorFormat = TEX_FORMAT_RGBA16_FLOAT; + Desc.DepthFormat = TEX_FORMAT_R32_FLOAT; + Desc.MotionFormat = TEX_FORMAT_RG16_FLOAT; + Desc.Flags = QueryAttribs.Flags; + + RefCntAutoPtr pUpscaler; + pFactory->CreateSuperResolution(Desc, &pUpscaler); + ASSERT_NE(pUpscaler, nullptr) << "Failed to create temporal super resolution upscaler"; + + const auto& RetDesc = pUpscaler->GetDesc(); + EXPECT_EQ(RetDesc.VariantId, pTemporalInfo->VariantId); + EXPECT_EQ(RetDesc.OutputWidth, 1920u); + EXPECT_EQ(RetDesc.OutputHeight, 1080u); + EXPECT_EQ(RetDesc.InputWidth, SourceSettings.OptimalInputWidth); + EXPECT_EQ(RetDesc.InputHeight, SourceSettings.OptimalInputHeight); + + // Temporal upscaler should return non-trivial jitter pattern (Halton sequence) + float JitterX = 0.0f, JitterY = 0.0f; + pUpscaler->GetJitterOffset(0, JitterX, JitterY); + EXPECT_TRUE(JitterX != 0.0f || JitterY != 0.0f); + + // Verify a few frames produce different jitter values + float PrevJitterX = JitterX, PrevJitterY = JitterY; + pUpscaler->GetJitterOffset(1, JitterX, JitterY); + EXPECT_TRUE(JitterX != PrevJitterX || JitterY != PrevJitterY); +} + +TEST(SuperResolutionTest, ExecuteTemporalUpscaler) +{ + auto* pEnv = GPUTestingEnvironment::GetInstance(); + auto* pDevice = pEnv->GetDevice(); + auto* pFactory = GetFactory(); + ASSERT_NE(pFactory, nullptr); + + Uint32 NumVariants = 0; + pFactory->EnumerateVariants(NumVariants, nullptr); + if (NumVariants == 0) + { + GTEST_SKIP() << "No super resolution variants available on this device"; + } + + std::vector Variants(NumVariants); + pFactory->EnumerateVariants(NumVariants, Variants.data()); + + const auto* pTemporalInfo = FindVariantByType(Variants.data(), NumVariants, SUPER_RESOLUTION_TYPE_TEMPORAL); + if (pTemporalInfo == nullptr) + { + GTEST_SKIP() << "Temporal super resolution is not supported by this device"; + } + + GPUTestingEnvironment::ScopedReset EnvironmentAutoReset; + + + // Query optimal input resolution + SuperResolutionSourceSettingsAttribs QueryAttribs{}; + QueryAttribs.VariantId = pTemporalInfo->VariantId; + QueryAttribs.OutputWidth = 1920; + QueryAttribs.OutputHeight = 1080; + QueryAttribs.OptimizationType = SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED; + QueryAttribs.OutputFormat = TEX_FORMAT_RGBA16_FLOAT; + QueryAttribs.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; + + SuperResolutionSourceSettings SourceSettings{}; + pFactory->GetSourceSettings(QueryAttribs, SourceSettings); + ASSERT_GT(SourceSettings.OptimalInputWidth, 0u); + ASSERT_GT(SourceSettings.OptimalInputHeight, 0u); + + // Create upscaler + SuperResolutionDesc UpscalerDesc{}; + UpscalerDesc.Name = "Test Temporal Execute Upscaler"; + UpscalerDesc.VariantId = pTemporalInfo->VariantId; + UpscalerDesc.OutputWidth = QueryAttribs.OutputWidth; + UpscalerDesc.OutputHeight = QueryAttribs.OutputHeight; + UpscalerDesc.OutputFormat = QueryAttribs.OutputFormat; + UpscalerDesc.InputWidth = SourceSettings.OptimalInputWidth; + UpscalerDesc.InputHeight = SourceSettings.OptimalInputHeight; + UpscalerDesc.ColorFormat = TEX_FORMAT_RGBA16_FLOAT; + UpscalerDesc.DepthFormat = TEX_FORMAT_R32_FLOAT; + UpscalerDesc.MotionFormat = TEX_FORMAT_RG16_FLOAT; + UpscalerDesc.Flags = QueryAttribs.Flags; + + RefCntAutoPtr pUpscaler; + pFactory->CreateSuperResolution(UpscalerDesc, &pUpscaler); + ASSERT_NE(pUpscaler, nullptr); + + + // Create input color texture + TextureDesc ColorTexDesc; + ColorTexDesc.Name = "SR Color Input"; + ColorTexDesc.Type = RESOURCE_DIM_TEX_2D; + ColorTexDesc.Width = SourceSettings.OptimalInputWidth; + ColorTexDesc.Height = SourceSettings.OptimalInputHeight; + ColorTexDesc.Format = TEX_FORMAT_RGBA16_FLOAT; + ColorTexDesc.BindFlags = BIND_SHADER_RESOURCE | BIND_RENDER_TARGET; + ColorTexDesc.Usage = USAGE_DEFAULT; + + RefCntAutoPtr pColorTex; + pDevice->CreateTexture(ColorTexDesc, nullptr, &pColorTex); + ASSERT_NE(pColorTex, nullptr); + + // Create depth texture + TextureDesc DepthTexDesc; + DepthTexDesc.Name = "SR Depth Input"; + DepthTexDesc.Type = RESOURCE_DIM_TEX_2D; + DepthTexDesc.Width = SourceSettings.OptimalInputWidth; + DepthTexDesc.Height = SourceSettings.OptimalInputHeight; + DepthTexDesc.Format = TEX_FORMAT_D32_FLOAT; + DepthTexDesc.BindFlags = BIND_SHADER_RESOURCE | BIND_DEPTH_STENCIL; + DepthTexDesc.Usage = USAGE_DEFAULT; + + RefCntAutoPtr pDepthTex; + pDevice->CreateTexture(DepthTexDesc, nullptr, &pDepthTex); + ASSERT_NE(pDepthTex, nullptr); + + // Create motion vectors texture + TextureDesc MotionTexDesc; + MotionTexDesc.Name = "SR Motion Vectors"; + MotionTexDesc.Type = RESOURCE_DIM_TEX_2D; + MotionTexDesc.Width = SourceSettings.OptimalInputWidth; + MotionTexDesc.Height = SourceSettings.OptimalInputHeight; + MotionTexDesc.Format = TEX_FORMAT_RG16_FLOAT; + MotionTexDesc.BindFlags = BIND_SHADER_RESOURCE | BIND_RENDER_TARGET; + MotionTexDesc.Usage = USAGE_DEFAULT; + + RefCntAutoPtr pMotionTex; + pDevice->CreateTexture(MotionTexDesc, nullptr, &pMotionTex); + ASSERT_NE(pMotionTex, nullptr); + + // Create output texture + TextureDesc OutputTexDesc{}; + OutputTexDesc.Name = "SR Output"; + OutputTexDesc.Type = RESOURCE_DIM_TEX_2D; + OutputTexDesc.Width = QueryAttribs.OutputWidth; + OutputTexDesc.Height = QueryAttribs.OutputHeight; + OutputTexDesc.Format = TEX_FORMAT_RGBA16_FLOAT; + OutputTexDesc.BindFlags = BIND_SHADER_RESOURCE | BIND_UNORDERED_ACCESS; + OutputTexDesc.Usage = USAGE_DEFAULT; + + RefCntAutoPtr pOutputTex; + pDevice->CreateTexture(OutputTexDesc, nullptr, &pOutputTex); + ASSERT_NE(pOutputTex, nullptr); + + // Execute temporal upscaling with reset + auto* pContext = pEnv->GetDeviceContext(); + + ExecuteSuperResolutionAttribs Attribs; + Attribs.pContext = pContext; + Attribs.pColorTextureSRV = pColorTex->GetDefaultView(TEXTURE_VIEW_SHADER_RESOURCE); + Attribs.pDepthTextureSRV = pDepthTex->GetDefaultView(TEXTURE_VIEW_SHADER_RESOURCE); + Attribs.pMotionVectorsSRV = pMotionTex->GetDefaultView(TEXTURE_VIEW_SHADER_RESOURCE); + Attribs.pOutputTextureView = pOutputTex->GetDefaultView(TEXTURE_VIEW_UNORDERED_ACCESS); + Attribs.JitterX = 0.0f; + Attribs.JitterY = 0.0f; + Attribs.MotionVectorScaleX = 1.0f; + Attribs.MotionVectorScaleY = 1.0f; + Attribs.ExposureScale = 1.0f; + Attribs.Sharpness = 0.5f; + Attribs.CameraNear = 0.1f; + Attribs.CameraFar = 1000.0f; + Attribs.CameraFovAngleVert = 1.0472f; // ~60 degrees + + Attribs.TimeDeltaInSeconds = 0.016f; + Attribs.ResetHistory = true; + pUpscaler->Execute(Attribs); + + // Execute a second frame without reset + Attribs.JitterX = -0.25f; + Attribs.JitterY = 0.25f; + Attribs.ResetHistory = False; + pUpscaler->Execute(Attribs); + + pContext->Flush(); + pContext->WaitForIdle(); +} + +TEST(SuperResolution_CInterface, Factory) +{ + auto* pFactory = GetFactory(); + ASSERT_NE(pFactory, nullptr); + EXPECT_EQ(TestSuperResolutionFactoryCInterface(pFactory), 0); +} + +TEST(SuperResolution_CInterface, SuperResolution) +{ + auto* pFactory = GetFactory(); + ASSERT_NE(pFactory, nullptr); + + Uint32 NumVariants = 0; + pFactory->EnumerateVariants(NumVariants, nullptr); + if (NumVariants == 0) + { + GTEST_SKIP() << "No super resolution variants available on this device"; + } + + std::vector Variants(NumVariants); + pFactory->EnumerateVariants(NumVariants, Variants.data()); + + GPUTestingEnvironment::ScopedReset EnvironmentAutoReset; + + SuperResolutionSourceSettingsAttribs QueryAttribs; + QueryAttribs.VariantId = Variants[0].VariantId; + QueryAttribs.OutputWidth = 1920; + QueryAttribs.OutputHeight = 1080; + QueryAttribs.OptimizationType = SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED; + QueryAttribs.OutputFormat = TEX_FORMAT_RGBA16_FLOAT; + QueryAttribs.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; + + SuperResolutionSourceSettings SourceSettings; + pFactory->GetSourceSettings(QueryAttribs, SourceSettings); + + SuperResolutionDesc Desc; + Desc.Name = "C Interface Test Upscaler"; + Desc.VariantId = Variants[0].VariantId; + Desc.OutputWidth = QueryAttribs.OutputWidth; + Desc.OutputHeight = QueryAttribs.OutputHeight; + Desc.OutputFormat = QueryAttribs.OutputFormat; + Desc.InputWidth = SourceSettings.OptimalInputWidth; + Desc.InputHeight = SourceSettings.OptimalInputHeight; + Desc.ColorFormat = TEX_FORMAT_RGBA16_FLOAT; + Desc.DepthFormat = TEX_FORMAT_R32_FLOAT; + Desc.MotionFormat = TEX_FORMAT_RG16_FLOAT; + Desc.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; + + RefCntAutoPtr pUpscaler; + pFactory->CreateSuperResolution(Desc, &pUpscaler); + ASSERT_NE(pUpscaler, nullptr); + + EXPECT_EQ(TestSuperResolutionCInterface(pUpscaler), 0); +} + +} // namespace From 18b5abb086080e08ab01aca97ed84c4344cfdbd3 Mon Sep 17 00:00:00 2001 From: assiduous Date: Thu, 19 Mar 2026 23:11:32 -0700 Subject: [PATCH 08/14] Simplify super-resolution initialization --- BuildTools/.NET/dotnet-build-package.py | 2 +- Graphics/SuperResolution/CMakeLists.txt | 54 +++-- .../include/DLSSProviderD3D11.hpp | 58 ----- .../include/DLSSProviderD3D12.hpp | 58 ----- .../include/DSRProviderD3D12.hpp | 60 ----- .../include/SuperResolutionFactoryBase.hpp | 117 --------- ...iderVk.hpp => SuperResolutionProvider.hpp} | 22 +- .../SuperResolution/src/DLSSProviderD3D11.cpp | 112 ++++----- .../SuperResolution/src/DLSSProviderD3D12.cpp | 111 ++++----- .../SuperResolution/src/DLSSProviderVk.cpp | 138 +++++------ .../SuperResolution/src/DSRProviderD3D12.cpp | 225 +++++++++--------- .../src/SuperResolutionFactory.cpp | 200 ++++++++++++++-- .../src/SuperResolutionFactoryBase.cpp | 132 ---------- .../src/SuperResolutionFactoryD3D11.cpp | 50 ---- .../src/SuperResolutionFactoryD3D12.cpp | 52 ---- .../src/SuperResolutionFactoryMtl.cpp | 41 ---- .../src/SuperResolutionFactoryVk.cpp | 50 ---- 17 files changed, 502 insertions(+), 980 deletions(-) delete mode 100644 Graphics/SuperResolution/include/DLSSProviderD3D11.hpp delete mode 100644 Graphics/SuperResolution/include/DLSSProviderD3D12.hpp delete mode 100644 Graphics/SuperResolution/include/DSRProviderD3D12.hpp delete mode 100644 Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp rename Graphics/SuperResolution/include/{DLSSProviderVk.hpp => SuperResolutionProvider.hpp} (72%) delete mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp delete mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp delete mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp delete mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryMtl.cpp delete mode 100644 Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp diff --git a/BuildTools/.NET/dotnet-build-package.py b/BuildTools/.NET/dotnet-build-package.py index b6626c9ee9..382a682fb1 100644 --- a/BuildTools/.NET/dotnet-build-package.py +++ b/BuildTools/.NET/dotnet-build-package.py @@ -148,7 +148,7 @@ def cmake_build_project(config, settings): subprocess.run(f"cmake -S . -B {settings['native-build-folder']} \ -D CMAKE_BUILD_TYPE={config} \ -D CMAKE_INSTALL_PREFIX={settings['native-build-folder']}/install -A {settings['cmake-generator-attribute']} \ - -D DILIGENT_BUILD_CORE_TESTS=ON", check=True) + -D DILIGENT_BUILD_CORE_TESTS=ON -D DILIGENT_NO_DLSS=ON -D DILIGENT_NO_DSR=ON", check=True) subprocess.run(f"cmake --build {settings['native-build-folder']} --target install --config {config}", check=True) native_dll_path = f"{project_paths['dotnet-build']}/{project_paths['dotnet-proj']}/native/{settings['nuget-name-folder']}" diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index d1b31ee818..a830da3cb9 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -7,9 +7,13 @@ project(Diligent-SuperResolution CXX) set(DILIGENT_DLSS_SUPPORTED FALSE CACHE INTERNAL "DLSS is not supported") set(DILIGENT_DSR_SUPPORTED FALSE CACHE INTERNAL "DirectSR is not supported") -if(PLATFORM_WIN32) - set(DILIGENT_DLSS_SUPPORTED TRUE CACHE INTERNAL "DLSS is supported on Win32 platform") - set(DILIGENT_DSR_SUPPORTED TRUE CACHE INTERNAL "DirectSR is supported on Win32 platform") +if(PLATFORM_WIN32 AND NOT MINGW_BUILD) + if (D3D11_SUPPORTED OR D3D12_SUPPORTED OR VULKAN_SUPPORTED) + set(DILIGENT_DLSS_SUPPORTED TRUE CACHE INTERNAL "DLSS is supported on Win32 platform") + endif() + if (D3D12_SUPPORTED) + set(DILIGENT_DSR_SUPPORTED TRUE CACHE INTERNAL "DirectSR is supported on Win32 platform") + endif() endif() if(${DILIGENT_NO_DLSS}) @@ -42,12 +46,8 @@ endif() set(INCLUDE include/SuperResolutionBase.hpp - include/SuperResolutionFactoryBase.hpp include/SuperResolutionVariants.hpp - include/DLSSProviderD3D12.hpp - include/DLSSProviderD3D11.hpp - include/DLSSProviderVk.hpp - include/DSRProviderD3D12.hpp + include/SuperResolutionProvider.hpp ) set(INTERFACE @@ -58,21 +58,27 @@ set(INTERFACE set(SOURCE src/SuperResolutionBase.cpp - src/SuperResolutionFactoryBase.cpp src/SuperResolutionFactory.cpp - src/SuperResolutionFactoryD3D12.cpp - src/SuperResolutionFactoryD3D11.cpp - src/SuperResolutionFactoryVk.cpp - src/SuperResolutionFactoryMtl.cpp - src/DLSSProviderD3D12.cpp - src/DLSSProviderD3D11.cpp - src/DLSSProviderVk.cpp - src/DSRProviderD3D12.cpp ) if(DILIGENT_DLSS_SUPPORTED) list(APPEND INCLUDE include/SuperResolutionDLSS.hpp) list(APPEND SOURCE src/SuperResolutionDLSS.cpp) + if (D3D11_SUPPORTED) + list(APPEND SOURCE src/DLSSProviderD3D11.cpp) + endif() + if (D3D12_SUPPORTED) + list(APPEND SOURCE src/DLSSProviderD3D12.cpp) + endif() + if (VULKAN_SUPPORTED) + list(APPEND SOURCE src/DLSSProviderVk.cpp) + endif() +endif() + +if (DILIGENT_DSR_SUPPORTED) + if (D3D12_SUPPORTED) + list(APPEND SOURCE src/DSRProviderD3D12.cpp) + endif() endif() set(DLL_SOURCE @@ -106,11 +112,21 @@ PRIVATE ) if(DILIGENT_DLSS_SUPPORTED) - target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DLSS_SUPPORTED=1) + if (D3D11_SUPPORTED) + target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DLSS_D3D11_SUPPORTED=1) + endif() + if (D3D12_SUPPORTED) + target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DLSS_D3D12_SUPPORTED=1) + endif() + if (VULKAN_SUPPORTED) + target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DLSS_VK_SUPPORTED=1) + endif() endif() if(DILIGENT_DSR_SUPPORTED) - target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DSR_SUPPORTED=1) + if (D3D12_SUPPORTED) + target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_DSR_D3D12_SUPPORTED=1) + endif() endif() target_compile_definitions(Diligent-SuperResolution-shared PUBLIC DILIGENT_SUPER_RESOLUTION_SHARED=1) diff --git a/Graphics/SuperResolution/include/DLSSProviderD3D11.hpp b/Graphics/SuperResolution/include/DLSSProviderD3D11.hpp deleted file mode 100644 index 63a88a8a75..0000000000 --- a/Graphics/SuperResolution/include/DLSSProviderD3D11.hpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#pragma once - -#include "SuperResolutionFactory.h" -#include "SuperResolution.h" -#include "RefCntAutoPtr.hpp" - -#include - -struct NVSDK_NGX_Parameter; - -namespace Diligent -{ - -class DLSSProviderD3D11 final -{ -public: - DLSSProviderD3D11(IRenderDevice* pDevice); - - ~DLSSProviderD3D11(); - - void EnumerateVariants(std::vector& Variants); - - void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings); - - void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler); - -private: - RefCntAutoPtr m_pDevice; - NVSDK_NGX_Parameter* m_pNGXParams = nullptr; -}; - -} // namespace Diligent diff --git a/Graphics/SuperResolution/include/DLSSProviderD3D12.hpp b/Graphics/SuperResolution/include/DLSSProviderD3D12.hpp deleted file mode 100644 index 45c3618bd6..0000000000 --- a/Graphics/SuperResolution/include/DLSSProviderD3D12.hpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#pragma once - -#include "SuperResolutionFactory.h" -#include "SuperResolution.h" -#include "RefCntAutoPtr.hpp" - -#include - -struct NVSDK_NGX_Parameter; - -namespace Diligent -{ - -class DLSSProviderD3D12 final -{ -public: - DLSSProviderD3D12(IRenderDevice* pDevice); - - ~DLSSProviderD3D12(); - - void EnumerateVariants(std::vector& Variants); - - void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings); - - void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler); - -private: - RefCntAutoPtr m_pDevice; - NVSDK_NGX_Parameter* m_pNGXParams = nullptr; -}; - -} // namespace Diligent diff --git a/Graphics/SuperResolution/include/DSRProviderD3D12.hpp b/Graphics/SuperResolution/include/DSRProviderD3D12.hpp deleted file mode 100644 index 3ef038f0f5..0000000000 --- a/Graphics/SuperResolution/include/DSRProviderD3D12.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#pragma once - -#include "SuperResolutionFactory.h" -#include "SuperResolution.h" -#include "RefCntAutoPtr.hpp" - -#include - -struct IDSRDevice; - -namespace Diligent -{ - -class DSRProviderD3D12 final -{ -public: - DSRProviderD3D12(IRenderDevice* pDevice); - - ~DSRProviderD3D12(); - - void EnumerateVariants(std::vector& Variants); - - void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings); - - void CreateSuperResolution(const SuperResolutionDesc& Desc, - ISuperResolution** ppUpscaler); - -private: - RefCntAutoPtr m_pDevice; - IDSRDevice* m_pDSRDevice = nullptr; -}; - -} // namespace Diligent diff --git a/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp b/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp deleted file mode 100644 index 6964975d22..0000000000 --- a/Graphics/SuperResolution/include/SuperResolutionFactoryBase.hpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#pragma once - -#include "SuperResolutionFactory.h" -#include "SuperResolution.h" -#include "ObjectBase.hpp" - -#include -#include - -namespace Diligent -{ - -struct BackendEntry -{ - struct IHolder - { - virtual ~IHolder() = default; - - virtual void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) = 0; - - virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) = 0; - }; - - template - struct Holder final : IHolder - { - T Instance; - template - explicit Holder(Args&&... args) : - Instance(std::forward(args)...) {} - - void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) override { Instance.GetSourceSettings(Attribs, Settings); } - - void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override { Instance.CreateSuperResolution(Desc, ppUpscaler); } - }; - - std::unique_ptr pBackend; - std::vector Variants; -}; - -class SuperResolutionFactoryBase : public ObjectBase -{ -public: - using TBase = ObjectBase; - - SuperResolutionFactoryBase(IReferenceCounters* pRefCounters); - - IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolutionFactory, TBase) - - virtual void DILIGENT_CALL_TYPE EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) override final; - - virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const override final; - - virtual void DILIGENT_CALL_TYPE CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final; - - virtual void DILIGENT_CALL_TYPE SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final; - - virtual void DILIGENT_CALL_TYPE SetBreakOnError(bool BreakOnError) const override final; - - virtual void DILIGENT_CALL_TYPE SetMemoryAllocator(IMemoryAllocator* pAllocator) const override final; - - template - void AddBackend(Args&&... args); - -private: - BackendEntry* FindBackend(const INTERFACE_ID& VariantId) const; - - std::vector m_Backends; -}; - -template -void SuperResolutionFactoryBase::AddBackend(Args&&... args) -{ - try - { - auto pHolder = std::make_unique>(std::forward(args)...); - - BackendEntry Entry; - pHolder->Instance.EnumerateVariants(Entry.Variants); - if (Entry.Variants.empty()) - return; - - Entry.pBackend = std::move(pHolder); - m_Backends.push_back(std::move(Entry)); - } - catch (...) - { - } -} - -} // namespace Diligent diff --git a/Graphics/SuperResolution/include/DLSSProviderVk.hpp b/Graphics/SuperResolution/include/SuperResolutionProvider.hpp similarity index 72% rename from Graphics/SuperResolution/include/DLSSProviderVk.hpp rename to Graphics/SuperResolution/include/SuperResolutionProvider.hpp index bfe690cb1c..24a9304bc7 100644 --- a/Graphics/SuperResolution/include/DLSSProviderVk.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionProvider.hpp @@ -28,31 +28,25 @@ #include "SuperResolutionFactory.h" #include "SuperResolution.h" -#include "RefCntAutoPtr.hpp" #include -struct NVSDK_NGX_Parameter; - namespace Diligent { -class DLSSProviderVk final +class SuperResolutionProvider { public: - DLSSProviderVk(IRenderDevice* pDevice); - - ~DLSSProviderVk(); - - void EnumerateVariants(std::vector& Variants); + virtual ~SuperResolutionProvider() + {} - void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings); + virtual void EnumerateVariants(std::vector& Variants) = 0; - void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler); + virtual void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) = 0; -private: - RefCntAutoPtr m_pDevice; - NVSDK_NGX_Parameter* m_pNGXParams = nullptr; + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) = 0; }; } // namespace Diligent diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp index cb211ecf5a..39bf79d9c3 100644 --- a/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp @@ -24,20 +24,18 @@ * of the possibility of such damages. */ -#include "DLSSProviderD3D11.hpp" +#include "SuperResolutionProvider.hpp" -#if D3D11_SUPPORTED && DILIGENT_DLSS_SUPPORTED +#include "SuperResolutionDLSS.hpp" +#include "SuperResolutionBase.hpp" +#include "SuperResolutionVariants.hpp" -# include "SuperResolutionDLSS.hpp" -# include "SuperResolutionBase.hpp" -# include "SuperResolutionVariants.hpp" +#include -# include - -# include "../../GraphicsEngineD3D11/include/pch.h" -# include "RenderDeviceD3D11Impl.hpp" -# include "DeviceContextD3D11Impl.hpp" -# include "TextureBaseD3D11.hpp" +#include "../../GraphicsEngineD3D11/include/pch.h" +#include "RenderDeviceD3D11Impl.hpp" +#include "DeviceContextD3D11Impl.hpp" +#include "TextureBaseD3D11.hpp" namespace Diligent { @@ -133,66 +131,60 @@ class SuperResolutionD3D11_DLSS final : public SuperResolutionBase NVSDK_NGX_Parameter* m_pNGXParams = nullptr; }; -} // anonymous namespace - - -DLSSProviderD3D11::DLSSProviderD3D11(IRenderDevice* pDevice) : - m_pDevice{pDevice} +class DLSSProviderD3D11 final : public SuperResolutionProvider { - ID3D11Device* pd3d11Device = ClassPtrCast(pDevice)->GetD3D11Device(); - NVSDK_NGX_Result Result = NVSDK_NGX_D3D11_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d11Device); - if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("NVIDIA NGX D3D11 initialization failed. Result: ", static_cast(Result)); - - Result = NVSDK_NGX_D3D11_GetCapabilityParameters(&m_pNGXParams); - if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) - LOG_ERROR_AND_THROW("Failed to get NGX D3D11 capability parameters. Result: ", static_cast(Result)); -} +public: + DLSSProviderD3D11(IRenderDevice* pDevice) : + m_pDevice{pDevice} + { + ID3D11Device* pd3d11Device = ClassPtrCast(pDevice)->GetD3D11Device(); + NVSDK_NGX_Result Result = NVSDK_NGX_D3D11_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d11Device); + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("NVIDIA NGX D3D11 initialization failed. Result: ", static_cast(Result)); -DLSSProviderD3D11::~DLSSProviderD3D11() -{ - if (m_pNGXParams != nullptr) - NVSDK_NGX_D3D11_DestroyParameters(m_pNGXParams); - NVSDK_NGX_D3D11_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D11Device()); -} + Result = NVSDK_NGX_D3D11_GetCapabilityParameters(&m_pNGXParams); + if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) + LOG_ERROR_AND_THROW("Failed to get NGX D3D11 capability parameters. Result: ", static_cast(Result)); + } -void DLSSProviderD3D11::EnumerateVariants(std::vector& Variants) -{ - EnumerateDLSSVariants(m_pNGXParams, Variants); -} + ~DLSSProviderD3D11() + { + if (m_pNGXParams != nullptr) + NVSDK_NGX_D3D11_DestroyParameters(m_pNGXParams); + NVSDK_NGX_D3D11_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D11Device()); + } -void DLSSProviderD3D11::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings) -{ - GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); -} + virtual void EnumerateVariants(std::vector& Variants) override final + { + EnumerateDLSSVariants(m_pNGXParams, Variants); + } -void DLSSProviderD3D11::CreateSuperResolution(const SuperResolutionDesc& Desc, - ISuperResolution** ppUpscaler) -{ - DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); - DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + virtual void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) override final + { + GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); + } - SuperResolutionD3D11_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D11_DLSS instance", SuperResolutionD3D11_DLSS)(m_pDevice, Desc, m_pNGXParams); - pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); -} + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final + { + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); -} // namespace Diligent + SuperResolutionD3D11_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D11_DLSS instance", SuperResolutionD3D11_DLSS)(m_pDevice, Desc, m_pNGXParams); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); + } -#else +private: + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; -namespace Diligent -{ +} // anonymous namespace -DLSSProviderD3D11::DLSSProviderD3D11(IRenderDevice*) +std::unique_ptr CreateDLSSProviderD3D11(IRenderDevice* pDevice) { - LOG_INFO_MESSAGE("DLSS is not supported on this platform for D3D11 backend"); + return pDevice->GetDeviceInfo().Type == RENDER_DEVICE_TYPE_D3D11 ? + std::make_unique(pDevice) : + nullptr; } -DLSSProviderD3D11::~DLSSProviderD3D11() {} -void DLSSProviderD3D11::EnumerateVariants(std::vector&) {} -void DLSSProviderD3D11::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} -void DLSSProviderD3D11::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} } // namespace Diligent - -#endif diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp index 5596ab5e54..083cc1f251 100644 --- a/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp @@ -24,20 +24,18 @@ * of the possibility of such damages. */ -#include "DLSSProviderD3D12.hpp" +#include "SuperResolutionProvider.hpp" -#if D3D12_SUPPORTED && DILIGENT_DLSS_SUPPORTED +#include "SuperResolutionDLSS.hpp" +#include "SuperResolutionBase.hpp" +#include "SuperResolutionVariants.hpp" -# include "SuperResolutionDLSS.hpp" -# include "SuperResolutionBase.hpp" -# include "SuperResolutionVariants.hpp" +#include -# include - -# include "../../GraphicsEngineD3D12/include/pch.h" -# include "RenderDeviceD3D12Impl.hpp" -# include "DeviceContextD3D12Impl.hpp" -# include "TextureD3D12Impl.hpp" +#include "../../GraphicsEngineD3D12/include/pch.h" +#include "RenderDeviceD3D12Impl.hpp" +#include "DeviceContextD3D12Impl.hpp" +#include "TextureD3D12Impl.hpp" namespace Diligent { @@ -147,66 +145,61 @@ class SuperResolutionD3D12_DLSS final : public SuperResolutionBase NVSDK_NGX_Parameter* m_pNGXParams = nullptr; }; -} // anonymous namespace - -DLSSProviderD3D12::DLSSProviderD3D12(IRenderDevice* pDevice) : - m_pDevice{pDevice} +class DLSSProviderD3D12 final : public SuperResolutionProvider { - ID3D12Device* pd3d12Device = ClassPtrCast(pDevice)->GetD3D12Device(); - NVSDK_NGX_Result Result = NVSDK_NGX_D3D12_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d12Device); - if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("NVIDIA NGX D3D12 initialization failed. Result: ", static_cast(Result)); - - Result = NVSDK_NGX_D3D12_GetCapabilityParameters(&m_pNGXParams); - if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) - LOG_ERROR_AND_THROW("Failed to get NGX D3D12 capability parameters. Result: ", static_cast(Result)); -} +public: + DLSSProviderD3D12(IRenderDevice* pDevice) : + m_pDevice{pDevice} + { + ID3D12Device* pd3d12Device = ClassPtrCast(pDevice)->GetD3D12Device(); + NVSDK_NGX_Result Result = NVSDK_NGX_D3D12_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d12Device); + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("NVIDIA NGX D3D12 initialization failed. Result: ", static_cast(Result)); -DLSSProviderD3D12::~DLSSProviderD3D12() -{ - if (m_pNGXParams != nullptr) - NVSDK_NGX_D3D12_DestroyParameters(m_pNGXParams); - NVSDK_NGX_D3D12_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D12Device()); -} + Result = NVSDK_NGX_D3D12_GetCapabilityParameters(&m_pNGXParams); + if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) + LOG_ERROR_AND_THROW("Failed to get NGX D3D12 capability parameters. Result: ", static_cast(Result)); + } -void DLSSProviderD3D12::EnumerateVariants(std::vector& Variants) -{ - EnumerateDLSSVariants(m_pNGXParams, Variants); -} + ~DLSSProviderD3D12() + { + if (m_pNGXParams != nullptr) + NVSDK_NGX_D3D12_DestroyParameters(m_pNGXParams); + NVSDK_NGX_D3D12_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D12Device()); + } -void DLSSProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings) -{ - GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); -} + virtual void EnumerateVariants(std::vector& Variants) override final + { + EnumerateDLSSVariants(m_pNGXParams, Variants); + } -void DLSSProviderD3D12::CreateSuperResolution(const SuperResolutionDesc& Desc, - ISuperResolution** ppUpscaler) -{ - DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); - DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + virtual void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) override final + { + GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); + } - SuperResolutionD3D12_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DLSS instance", SuperResolutionD3D12_DLSS)(m_pDevice, Desc, m_pNGXParams); - pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); -} + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final + { + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); -} // namespace Diligent + SuperResolutionD3D12_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DLSS instance", SuperResolutionD3D12_DLSS)(m_pDevice, Desc, m_pNGXParams); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); + } -#else +private: + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; -namespace Diligent -{ +} // anonymous namespace -DLSSProviderD3D12::DLSSProviderD3D12(IRenderDevice*) +std::unique_ptr CreateDLSSProviderD3D12(IRenderDevice* pDevice) { - LOG_INFO_MESSAGE("DLSS is not supported on this platform for D3D12 backend"); + return pDevice->GetDeviceInfo().Type == RENDER_DEVICE_TYPE_D3D12 ? + std::make_unique(pDevice) : + nullptr; } -DLSSProviderD3D12::~DLSSProviderD3D12() {} -void DLSSProviderD3D12::EnumerateVariants(std::vector&) {} -void DLSSProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} -void DLSSProviderD3D12::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} } // namespace Diligent - -#endif diff --git a/Graphics/SuperResolution/src/DLSSProviderVk.cpp b/Graphics/SuperResolution/src/DLSSProviderVk.cpp index ab813a9f37..f1c3ef9bc9 100644 --- a/Graphics/SuperResolution/src/DLSSProviderVk.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderVk.cpp @@ -24,21 +24,19 @@ * of the possibility of such damages. */ -#include "DLSSProviderVk.hpp" +#include "SuperResolutionProvider.hpp" -#if VULKAN_SUPPORTED && DILIGENT_DLSS_SUPPORTED +#include "SuperResolutionDLSS.hpp" +#include "SuperResolutionBase.hpp" +#include "SuperResolutionVariants.hpp" -# include "SuperResolutionDLSS.hpp" -# include "SuperResolutionBase.hpp" -# include "SuperResolutionVariants.hpp" - -# include "../../GraphicsEngineVulkan/include/pch.h" -# include -# include "RenderDeviceVkImpl.hpp" -# include "DeviceContextVkImpl.hpp" -# include "TextureVkImpl.hpp" -# include "TextureViewVkImpl.hpp" -# include "VulkanTypeConversions.hpp" +#include "../../GraphicsEngineVulkan/include/pch.h" +#include +#include "RenderDeviceVkImpl.hpp" +#include "DeviceContextVkImpl.hpp" +#include "TextureVkImpl.hpp" +#include "TextureViewVkImpl.hpp" +#include "VulkanTypeConversions.hpp" namespace Diligent { @@ -160,27 +158,28 @@ class SuperResolutionVk_DLSS final : public SuperResolutionBase NVSDK_NGX_Parameter* m_pNGXParams = nullptr; }; -} // anonymous namespace - -DLSSProviderVk::DLSSProviderVk(IRenderDevice* pDevice) : - m_pDevice{pDevice} +class DLSSProviderVk final : public SuperResolutionProvider { - RenderDeviceVkImpl* pDeviceVk = ClassPtrCast(pDevice); - VkInstance vkInstance = pDeviceVk->GetVkInstance(); - VkPhysicalDevice vkPhysDevice = pDeviceVk->GetVkPhysicalDevice(); - VkDevice vkDevice = pDeviceVk->GetVkDevice(); +public: + DLSSProviderVk(IRenderDevice* pDevice) : + m_pDevice{pDevice} + { + RenderDeviceVkImpl* pDeviceVk = ClassPtrCast(pDevice); + VkInstance vkInstance = pDeviceVk->GetVkInstance(); + VkPhysicalDevice vkPhysDevice = pDeviceVk->GetVkPhysicalDevice(); + VkDevice vkDevice = pDeviceVk->GetVkDevice(); - NVSDK_NGX_Result Result = NVSDK_NGX_VULKAN_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, vkInstance, vkPhysDevice, vkDevice); + NVSDK_NGX_Result Result = NVSDK_NGX_VULKAN_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, vkInstance, vkPhysDevice, vkDevice); - { - Uint32 ExtCount = 0; - VkExtensionProperties* pExtensions = nullptr; - NVSDK_NGX_FeatureDiscoveryInfo FeatureInfo = {}; - NVSDK_NGX_Result ExtResult = NVSDK_NGX_VULKAN_GetFeatureDeviceExtensionRequirements(vkInstance, vkPhysDevice, &FeatureInfo, &ExtCount, &pExtensions); - if (NVSDK_NGX_SUCCEED(ExtResult) && ExtCount > 0 && pExtensions != nullptr) { - /* TODO: Need to implement IsExtensionEnabled in VulkanUtilities::LogicalDevice + Uint32 ExtCount = 0; + VkExtensionProperties* pExtensions = nullptr; + NVSDK_NGX_FeatureDiscoveryInfo FeatureInfo = {}; + NVSDK_NGX_Result ExtResult = NVSDK_NGX_VULKAN_GetFeatureDeviceExtensionRequirements(vkInstance, vkPhysDevice, &FeatureInfo, &ExtCount, &pExtensions); + if (NVSDK_NGX_SUCCEED(ExtResult) && ExtCount > 0 && pExtensions != nullptr) + { + /* TODO: Need to implement IsExtensionEnabled in VulkanUtilities::LogicalDevice const VulkanUtilities::LogicalDevice& LogicDevice = pDeviceVk->GetLogicalDevice(); for (Uint32 ExtensionIdx = 0; ExtensionIdx < ExtCount; ++ExtensionIdx) { @@ -192,61 +191,54 @@ DLSSProviderVk::DLSSProviderVk(IRenderDevice* pDevice) : } } */ + } } - } - - if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("NVIDIA NGX Vulkan initialization failed. Result: ", static_cast(Result)); - Result = NVSDK_NGX_VULKAN_GetCapabilityParameters(&m_pNGXParams); - if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) - LOG_ERROR_AND_THROW("Failed to get NGX Vulkan capability parameters. Result: ", static_cast(Result)); -} - -DLSSProviderVk::~DLSSProviderVk() -{ - if (m_pNGXParams != nullptr) - NVSDK_NGX_VULKAN_DestroyParameters(m_pNGXParams); - NVSDK_NGX_VULKAN_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetVkDevice()); -} + if (NVSDK_NGX_FAILED(Result)) + LOG_ERROR_AND_THROW("NVIDIA NGX Vulkan initialization failed. Result: ", static_cast(Result)); -void DLSSProviderVk::EnumerateVariants(std::vector& Variants) -{ - EnumerateDLSSVariants(m_pNGXParams, Variants); -} + Result = NVSDK_NGX_VULKAN_GetCapabilityParameters(&m_pNGXParams); + if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) + LOG_ERROR_AND_THROW("Failed to get NGX Vulkan capability parameters. Result: ", static_cast(Result)); + } -void DLSSProviderVk::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings) -{ - GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); -} + ~DLSSProviderVk() + { + if (m_pNGXParams != nullptr) + NVSDK_NGX_VULKAN_DestroyParameters(m_pNGXParams); + NVSDK_NGX_VULKAN_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetVkDevice()); + } -void DLSSProviderVk::CreateSuperResolution(const SuperResolutionDesc& Desc, - ISuperResolution** ppUpscaler) -{ - DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); - DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + void EnumerateVariants(std::vector& Variants) + { + EnumerateDLSSVariants(m_pNGXParams, Variants); + } + void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) + { + GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); + } - SuperResolutionVk_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionVk_DLSS instance", SuperResolutionVk_DLSS)(m_pDevice, Desc, m_pNGXParams); - pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); -} + void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) + { + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); -} // namespace Diligent + SuperResolutionVk_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionVk_DLSS instance", SuperResolutionVk_DLSS)(m_pDevice, Desc, m_pNGXParams); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); + } -#else +private: + RefCntAutoPtr m_pDevice; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; +}; -namespace Diligent -{ +} // anonymous namespace -DLSSProviderVk::DLSSProviderVk(IRenderDevice*) +std::unique_ptr CreateDLSSProviderVk(IRenderDevice* pDevice) { - LOG_INFO_MESSAGE("DLSS is not supported on this platform for Vulkan backend"); + return pDevice->GetDeviceInfo().Type == RENDER_DEVICE_TYPE_VULKAN ? + std::make_unique(pDevice) : + nullptr; } -DLSSProviderVk::~DLSSProviderVk() {} -void DLSSProviderVk::EnumerateVariants(std::vector&) {} -void DLSSProviderVk::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} -void DLSSProviderVk::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} } // namespace Diligent - -#endif diff --git a/Graphics/SuperResolution/src/DSRProviderD3D12.cpp b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp index b801fad01e..2f0b0447cc 100644 --- a/Graphics/SuperResolution/src/DSRProviderD3D12.cpp +++ b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp @@ -24,18 +24,16 @@ * of the possibility of such damages. */ -#include "DSRProviderD3D12.hpp" +#include "SuperResolutionProvider.hpp" -#if DILIGENT_DSR_SUPPORTED +#include "SuperResolutionBase.hpp" +#include "../../GraphicsEngineD3D12/include/pch.h" -# include "SuperResolutionBase.hpp" -# include "../../GraphicsEngineD3D12/include/pch.h" +#include -# include - -# include "RenderDeviceD3D12Impl.hpp" -# include "DeviceContextD3D12Impl.hpp" -# include "DXGITypeConversions.hpp" +#include "RenderDeviceD3D12Impl.hpp" +#include "DeviceContextD3D12Impl.hpp" +#include "DXGITypeConversions.hpp" namespace Diligent { @@ -251,143 +249,140 @@ void DILIGENT_CALL_TYPE SuperResolutionD3D12_DSR::Execute(const ExecuteSuperReso pDeviceCtx->TransitionTextureState(Attribs.pOutputTextureView->GetTexture(), D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE); } -} // anonymous namespace - -DSRProviderD3D12::DSRProviderD3D12(IRenderDevice* pDevice) : - m_pDevice{pDevice}, - m_pDSRDevice{CreateDSRDevice(pDevice).Detach()} +class DSRProviderD3D12 final : public SuperResolutionProvider { -} +public: + DSRProviderD3D12(IRenderDevice* pDevice) : + m_pDevice{pDevice}, + m_pDSRDevice{CreateDSRDevice(pDevice).Detach()} + { + } -DSRProviderD3D12::~DSRProviderD3D12() -{ - if (m_pDSRDevice) - m_pDSRDevice->Release(); -} + ~DSRProviderD3D12() + { + if (m_pDSRDevice) + m_pDSRDevice->Release(); + } -void DSRProviderD3D12::EnumerateVariants(std::vector& Variants) -{ - if (!m_pDSRDevice) - return; + virtual void EnumerateVariants(std::vector& Variants) override final + { + if (!m_pDSRDevice) + return; - static_assert(sizeof(SuperResolutionInfo::VariantId) == sizeof(DSR_SUPERRES_VARIANT_DESC::VariantId), "GUID/INTERFACE_ID size mismatch"); + static_assert(sizeof(SuperResolutionInfo::VariantId) == sizeof(DSR_SUPERRES_VARIANT_DESC::VariantId), "GUID/INTERFACE_ID size mismatch"); - const Uint32 DSRNumVariants = m_pDSRDevice->GetNumSuperResVariants(); - for (Uint32 Idx = 0; Idx < DSRNumVariants; ++Idx) - { - DSR_SUPERRES_VARIANT_DESC VariantDesc = {}; - if (FAILED(m_pDSRDevice->GetSuperResVariantDesc(Idx, &VariantDesc))) - continue; - - SuperResolutionInfo Info{}; - Info.Type = SUPER_RESOLUTION_TYPE_TEMPORAL; - - Info.TemporalCapFlags = SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_NATIVE; - if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_EXPOSURE_SCALE_TEXTURE) - Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_EXPOSURE_SCALE_TEXTURE; - if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_IGNORE_HISTORY_MASK) - Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_IGNORE_HISTORY_MASK; - if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_REACTIVE_MASK) - Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_REACTIVE_MASK; - if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_SHARPNESS) - Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS; - - snprintf(Info.Name, sizeof(Info.Name), "DSR: %s", VariantDesc.VariantName); - memcpy(&Info.VariantId, &VariantDesc.VariantId, sizeof(Info.VariantId)); - - Variants.push_back(Info); + const Uint32 DSRNumVariants = m_pDSRDevice->GetNumSuperResVariants(); + for (Uint32 Idx = 0; Idx < DSRNumVariants; ++Idx) + { + DSR_SUPERRES_VARIANT_DESC VariantDesc = {}; + if (FAILED(m_pDSRDevice->GetSuperResVariantDesc(Idx, &VariantDesc))) + continue; + + SuperResolutionInfo Info{}; + Info.Type = SUPER_RESOLUTION_TYPE_TEMPORAL; + + Info.TemporalCapFlags = SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_NATIVE; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_EXPOSURE_SCALE_TEXTURE) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_EXPOSURE_SCALE_TEXTURE; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_IGNORE_HISTORY_MASK) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_IGNORE_HISTORY_MASK; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_REACTIVE_MASK) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_REACTIVE_MASK; + if (VariantDesc.Flags & DSR_SUPERRES_VARIANT_FLAG_SUPPORTS_SHARPNESS) + Info.TemporalCapFlags |= SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS; + + snprintf(Info.Name, sizeof(Info.Name), "DSR: %s", VariantDesc.VariantName); + memcpy(&Info.VariantId, &VariantDesc.VariantId, sizeof(Info.VariantId)); + + Variants.push_back(Info); + } } -} -void DSRProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings) -{ - Settings = {}; + virtual void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) override final + { + Settings = {}; - DEV_CHECK_ERR(m_pDSRDevice != nullptr, "DirectSR device must not be null"); - ValidateSourceSettingsAttribs(Attribs); + DEV_CHECK_ERR(m_pDSRDevice != nullptr, "DirectSR device must not be null"); + ValidateSourceSettingsAttribs(Attribs); - DSR_OPTIMIZATION_TYPE DSROptType = DSR_OPTIMIZATION_TYPE_BALANCED; - switch (Attribs.OptimizationType) - { - // clang-format off + DSR_OPTIMIZATION_TYPE DSROptType = DSR_OPTIMIZATION_TYPE_BALANCED; + switch (Attribs.OptimizationType) + { + // clang-format off case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY: DSROptType = DSR_OPTIMIZATION_TYPE_MAX_QUALITY; break; case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_QUALITY: DSROptType = DSR_OPTIMIZATION_TYPE_HIGH_QUALITY; break; case SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED: DSROptType = DSR_OPTIMIZATION_TYPE_BALANCED; break; case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE: DSROptType = DSR_OPTIMIZATION_TYPE_HIGH_PERFORMANCE; break; case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE: DSROptType = DSR_OPTIMIZATION_TYPE_MAX_PERFORMANCE; break; default: break; - // clang-format on - } + // clang-format on + } - const Uint32 NumVariants = m_pDSRDevice->GetNumSuperResVariants(); - Uint32 VariantIndex = UINT32_MAX; - for (Uint32 Idx = 0; Idx < NumVariants; ++Idx) - { - DSR_SUPERRES_VARIANT_DESC VariantDesc = {}; - if (SUCCEEDED(m_pDSRDevice->GetSuperResVariantDesc(Idx, &VariantDesc))) + const Uint32 NumVariants = m_pDSRDevice->GetNumSuperResVariants(); + Uint32 VariantIndex = UINT32_MAX; + for (Uint32 Idx = 0; Idx < NumVariants; ++Idx) { - if (memcmp(&VariantDesc.VariantId, &Attribs.VariantId, sizeof(GUID)) == 0) + DSR_SUPERRES_VARIANT_DESC VariantDesc = {}; + if (SUCCEEDED(m_pDSRDevice->GetSuperResVariantDesc(Idx, &VariantDesc))) { - VariantIndex = Idx; - break; + if (memcmp(&VariantDesc.VariantId, &Attribs.VariantId, sizeof(GUID)) == 0) + { + VariantIndex = Idx; + break; + } } } - } - if (VariantIndex == UINT32_MAX) - { - LOG_WARNING_MESSAGE("DirectSR variant not found for the specified VariantId"); - return; - } + if (VariantIndex == UINT32_MAX) + { + LOG_WARNING_MESSAGE("DirectSR variant not found for the specified VariantId"); + return; + } - DSR_SIZE TargetSize = {Attribs.OutputWidth, Attribs.OutputHeight}; + DSR_SIZE TargetSize = {Attribs.OutputWidth, Attribs.OutputHeight}; - DSR_SUPERRES_CREATE_ENGINE_FLAGS DSRCreateFlags = DSR_SUPERRES_CREATE_ENGINE_FLAG_NONE; - if (Attribs.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) - DSRCreateFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_AUTO_EXPOSURE; - if (Attribs.Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) - DSRCreateFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_ENABLE_SHARPENING; + DSR_SUPERRES_CREATE_ENGINE_FLAGS DSRCreateFlags = DSR_SUPERRES_CREATE_ENGINE_FLAG_NONE; + if (Attribs.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) + DSRCreateFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_AUTO_EXPOSURE; + if (Attribs.Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) + DSRCreateFlags |= DSR_SUPERRES_CREATE_ENGINE_FLAG_ENABLE_SHARPENING; - DSR_SUPERRES_SOURCE_SETTINGS SourceSettings = {}; - if (HRESULT hr = m_pDSRDevice->QuerySuperResSourceSettings(VariantIndex, TargetSize, TexFormatToDXGI_Format(Attribs.OutputFormat), DSROptType, DSRCreateFlags, &SourceSettings); SUCCEEDED(hr)) - { - Settings.OptimalInputWidth = SourceSettings.OptimalSize.Width; - Settings.OptimalInputHeight = SourceSettings.OptimalSize.Height; - } - else - { - LOG_WARNING_MESSAGE("DirectSR QuerySuperResSourceSettings failed. HRESULT: ", hr); + DSR_SUPERRES_SOURCE_SETTINGS SourceSettings = {}; + if (HRESULT hr = m_pDSRDevice->QuerySuperResSourceSettings(VariantIndex, TargetSize, TexFormatToDXGI_Format(Attribs.OutputFormat), DSROptType, DSRCreateFlags, &SourceSettings); SUCCEEDED(hr)) + { + Settings.OptimalInputWidth = SourceSettings.OptimalSize.Width; + Settings.OptimalInputHeight = SourceSettings.OptimalSize.Height; + } + else + { + LOG_WARNING_MESSAGE("DirectSR QuerySuperResSourceSettings failed. HRESULT: ", hr); + } } -} - -void DSRProviderD3D12::CreateSuperResolution(const SuperResolutionDesc& Desc, - ISuperResolution** ppUpscaler) -{ - DEV_CHECK_ERR(m_pDSRDevice != nullptr, "DirectSR device must not be null"); - DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); - RenderDeviceD3D12Impl* pDeviceD3D12 = ClassPtrCast(m_pDevice.RawPtr()); - SuperResolutionD3D12_DSR* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DSR instance", SuperResolutionD3D12_DSR)(pDeviceD3D12, Desc, m_pDSRDevice); - pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); -} + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, + ISuperResolution** ppUpscaler) override final + { + DEV_CHECK_ERR(m_pDSRDevice != nullptr, "DirectSR device must not be null"); + DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); -} // namespace Diligent + RenderDeviceD3D12Impl* pDeviceD3D12 = ClassPtrCast(m_pDevice.RawPtr()); + SuperResolutionD3D12_DSR* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DSR instance", SuperResolutionD3D12_DSR)(pDeviceD3D12, Desc, m_pDSRDevice); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); + } -#else +private: + RefCntAutoPtr m_pDevice; + IDSRDevice* m_pDSRDevice = nullptr; +}; -namespace Diligent -{ +} // anonymous namespace -DSRProviderD3D12::DSRProviderD3D12(IRenderDevice*) +std::unique_ptr CreateDSRProviderD3D12(IRenderDevice* pDevice) { - LOG_INFO_MESSAGE("DirectSR is not supported on this platform"); + return pDevice->GetDeviceInfo().Type == RENDER_DEVICE_TYPE_D3D12 ? + std::make_unique(pDevice) : + nullptr; } -DSRProviderD3D12::~DSRProviderD3D12() {} -void DSRProviderD3D12::EnumerateVariants(std::vector&) {} -void DSRProviderD3D12::GetSourceSettings(const SuperResolutionSourceSettingsAttribs&, SuperResolutionSourceSettings&) {} -void DSRProviderD3D12::CreateSuperResolution(const SuperResolutionDesc&, ISuperResolution**) {} } // namespace Diligent - -#endif diff --git a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp index 790c9899b0..c0600c4c16 100644 --- a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp @@ -25,15 +25,188 @@ */ #include "SuperResolutionFactoryLoader.h" +#include "SuperResolutionProvider.hpp" #include "DebugUtilities.hpp" +#include "ObjectBase.hpp" +#include "EngineMemory.h" +#include "PlatformDebug.hpp" namespace Diligent { -void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); -void CreateSuperResolutionFactoryD3D11(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); -void CreateSuperResolutionFactoryVk(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); -void CreateSuperResolutionFactoryMtl(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory); +#if DILIGENT_DLSS_D3D11_SUPPORTED +std::unique_ptr CreateDLSSProviderD3D11(IRenderDevice* pDevice); +#endif + +#if DILIGENT_DLSS_D3D12_SUPPORTED +std::unique_ptr CreateDLSSProviderD3D12(IRenderDevice* pDevice); +#endif + +#if DILIGENT_DLSS_VK_SUPPORTED +std::unique_ptr CreateDLSSProviderVk(IRenderDevice* pDevice); +#endif + +#if DILIGENT_DSR_D3D12_SUPPORTED +std::unique_ptr CreateDSRProviderD3D12(IRenderDevice* pDevice); +#endif + +#if DILIGENT_METALFX_SUPPORTED +std::unique_ptr CreateMetalFXProvider(IRenderDevice* pDevice); +#endif + +namespace +{ + +class SuperResolutionFactory : public ObjectBase +{ +public: + using TBase = ObjectBase; + + SuperResolutionFactory(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : + TBase{pRefCounters} + { + auto AddProvider = [this](IRenderDevice* pDevice, + std::unique_ptr CreateProvider(IRenderDevice*), + const char* ProviderName) { + try + { + ProviderInfo ProvInfo; + ProvInfo.Provider = CreateProvider(pDevice); + if (ProvInfo.Provider) + { + ProvInfo.Provider->EnumerateVariants(ProvInfo.Variants); + if (!ProvInfo.Variants.empty()) + { + m_TotalVariants += static_cast(ProvInfo.Variants.size()); + m_Providers.push_back(std::move(ProvInfo)); + } + } + } + catch (...) + { + LOG_ERROR_MESSAGE("Failed to create super resolution provider '", ProviderName, "'"); + } + }; + +#ifdef DILIGENT_DLSS_D3D11_SUPPORTED + AddProvider(pDevice, CreateDLSSProviderD3D11, "DLSS D3D11"); +#endif +#ifdef DILIGENT_DLSS_D3D12_SUPPORTED + AddProvider(pDevice, CreateDLSSProviderD3D12, "DLSS D3D12"); +#endif +#ifdef DILIGENT_DLSS_VK_SUPPORTED + AddProvider(pDevice, CreateDLSSProviderVk, "DLSS Vulkan"); +#endif +#ifdef DILIGENT_DSR_D3D12_SUPPORTED + AddProvider(pDevice, CreateDSRProviderD3D12, "DirectSR D3D12"); +#endif +#ifdef DILIGENT_METALFX_SUPPORTED + AddProvider(pDevice, CreateMetalFXProvider, "MetalFX"); +#endif + } + + IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolutionFactory, TBase) + + virtual void DILIGENT_CALL_TYPE EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) override final + { + if (Variants == nullptr) + { + NumVariants = m_TotalVariants; + return; + } + + NumVariants = 0; + for (const ProviderInfo& Entry : m_Providers) + { + for (const SuperResolutionInfo& Info : Entry.Variants) + { + if (NumVariants >= m_TotalVariants) + return; + Variants[NumVariants++] = Info; + } + } + } + + virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, + SuperResolutionSourceSettings& Settings) const override final + { + Settings = {}; + if (const ProviderInfo* pEntry = FindProvider(Attribs.VariantId)) + { + pEntry->Provider->GetSourceSettings(Attribs, Settings); + } + else + { + LOG_WARNING_MESSAGE("Super resolution variant not found for the specified VariantId"); + } + } + + virtual void DILIGENT_CALL_TYPE CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final + { + DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); + if (ppUpscaler == nullptr) + return; + + *ppUpscaler = nullptr; + + const ProviderInfo* pEntry = FindProvider(Desc.VariantId); + if (pEntry == nullptr) + { + LOG_ERROR_MESSAGE("Super resolution variant not found for the specified VariantId. Call EnumerateVariants() to get valid variant IDs."); + return; + } + + try + { + pEntry->Provider->CreateSuperResolution(Desc, ppUpscaler); + } + catch (...) + { + LOG_ERROR("Failed to create super resolution upscaler '", (Desc.Name ? Desc.Name : ""), "'"); + } + } + + virtual void DILIGENT_CALL_TYPE SetMessageCallback(DebugMessageCallbackType MessageCallback) const override final + { + SetDebugMessageCallback(MessageCallback); + } + + virtual void DILIGENT_CALL_TYPE SetBreakOnError(bool BreakOnError) const override final + { + PlatformDebug::SetBreakOnError(BreakOnError); + } + + virtual void DILIGENT_CALL_TYPE SetMemoryAllocator(IMemoryAllocator* pAllocator) const override final + { + SetRawAllocator(pAllocator); + } + +private: + struct ProviderInfo + { + std::unique_ptr Provider; + std::vector Variants; + }; + + const ProviderInfo* FindProvider(const INTERFACE_ID& VariantId) const + { + for (const ProviderInfo& ProvInfo : m_Providers) + { + for (const SuperResolutionInfo& SRInfo : ProvInfo.Variants) + { + if (SRInfo.VariantId == VariantId) + return &ProvInfo; + } + } + return nullptr; + } + +private: + std::vector m_Providers; + Uint32 m_TotalVariants = 0; +}; + +} // namespace API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) { @@ -49,23 +222,8 @@ API_QUALIFIER void CreateSuperResolutionFactory(IRenderDevice* pDevice, ISuperRe try { - switch (pDevice->GetDeviceInfo().Type) - { - case RENDER_DEVICE_TYPE_D3D12: - CreateSuperResolutionFactoryD3D12(pDevice, ppFactory); - break; - case RENDER_DEVICE_TYPE_D3D11: - CreateSuperResolutionFactoryD3D11(pDevice, ppFactory); - break; - case RENDER_DEVICE_TYPE_VULKAN: - CreateSuperResolutionFactoryVk(pDevice, ppFactory); - break; - case RENDER_DEVICE_TYPE_METAL: - CreateSuperResolutionFactoryMtl(pDevice, ppFactory); - break; - default: - break; - } + SuperResolutionFactory* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactory instance", SuperResolutionFactory)(pDevice); + pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); } catch (...) { diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp deleted file mode 100644 index df41689d70..0000000000 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryBase.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#include "SuperResolutionFactoryBase.hpp" -#include "EngineMemory.h" -#include "PlatformDebug.hpp" -#include "DebugUtilities.hpp" - -namespace Diligent -{ - -SuperResolutionFactoryBase::SuperResolutionFactoryBase(IReferenceCounters* pRefCounters) : - TBase{pRefCounters} -{ -} - -BackendEntry* SuperResolutionFactoryBase::FindBackend(const INTERFACE_ID& VariantId) const -{ - for (const BackendEntry& Entry : m_Backends) - { - for (const SuperResolutionInfo& Info : Entry.Variants) - { - if (Info.VariantId == VariantId) - return const_cast(&Entry); - } - } - return nullptr; -} - -void SuperResolutionFactoryBase::EnumerateVariants(Uint32& NumVariants, SuperResolutionInfo* Variants) -{ - Uint32 Count = 0; - for (const BackendEntry& Entry : m_Backends) - Count += static_cast(Entry.Variants.size()); - - if (Variants == nullptr) - { - NumVariants = Count; - return; - } - - const Uint32 MaxVariants = NumVariants; - NumVariants = 0; - for (const BackendEntry& Entry : m_Backends) - { - for (const SuperResolutionInfo& Info : Entry.Variants) - { - if (NumVariants >= MaxVariants) - return; - Variants[NumVariants++] = Info; - } - } -} - -void SuperResolutionFactoryBase::GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const -{ - Settings = {}; - - BackendEntry* pEntry = FindBackend(Attribs.VariantId); - if (pEntry == nullptr) - { - LOG_WARNING_MESSAGE("Super resolution variant not found for the specified VariantId"); - return; - } - - pEntry->pBackend->GetSourceSettings(Attribs, Settings); -} - -void SuperResolutionFactoryBase::CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) -{ - DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); - if (ppUpscaler == nullptr) - return; - - *ppUpscaler = nullptr; - - BackendEntry* pEntry = FindBackend(Desc.VariantId); - if (pEntry == nullptr) - { - LOG_ERROR_MESSAGE("Super resolution variant not found for the specified VariantId. Call EnumerateVariants() to get valid variant IDs."); - return; - } - - try - { - pEntry->pBackend->CreateSuperResolution(Desc, ppUpscaler); - } - catch (...) - { - LOG_ERROR("Failed to create super resolution upscaler '", (Desc.Name ? Desc.Name : ""), "'"); - } -} - -void SuperResolutionFactoryBase::SetMessageCallback(DebugMessageCallbackType MessageCallback) const -{ - SetDebugMessageCallback(MessageCallback); -} - -void SuperResolutionFactoryBase::SetBreakOnError(bool BreakOnError) const -{ - PlatformDebug::SetBreakOnError(BreakOnError); -} - -void SuperResolutionFactoryBase::SetMemoryAllocator(IMemoryAllocator* pAllocator) const -{ - SetRawAllocator(pAllocator); -} - -} // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp deleted file mode 100644 index 585a4d1f87..0000000000 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D11.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#include "SuperResolutionFactoryBase.hpp" -#include "DLSSProviderD3D11.hpp" -#include "EngineMemory.h" - -namespace Diligent -{ - -class SuperResolutionFactoryD3D11 final : public SuperResolutionFactoryBase -{ -public: - SuperResolutionFactoryD3D11(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : - SuperResolutionFactoryBase(pRefCounters) - { - AddBackend(pDevice); - } -}; - -void CreateSuperResolutionFactoryD3D11(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) -{ - auto* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryD3D11 instance", SuperResolutionFactoryD3D11)(pDevice); - pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); -} - -} // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp deleted file mode 100644 index 7b83a8a90c..0000000000 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryD3D12.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#include "SuperResolutionFactoryBase.hpp" -#include "DLSSProviderD3D12.hpp" -#include "DSRProviderD3D12.hpp" -#include "EngineMemory.h" - -namespace Diligent -{ - -class SuperResolutionFactoryD3D12 final : public SuperResolutionFactoryBase -{ -public: - SuperResolutionFactoryD3D12(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : - SuperResolutionFactoryBase(pRefCounters) - { - AddBackend(pDevice); - AddBackend(pDevice); - } -}; - -void CreateSuperResolutionFactoryD3D12(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) -{ - auto* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryD3D12 instance", SuperResolutionFactoryD3D12)(pDevice); - pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); -} - -} // namespace Diligent diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryMtl.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryMtl.cpp deleted file mode 100644 index ce618ffb40..0000000000 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryMtl.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#include "SuperResolutionFactory.h" - -#if !METAL_SUPPORTED - -namespace Diligent -{ - -void CreateSuperResolutionFactoryMtl(IRenderDevice* /*pDevice*/, ISuperResolutionFactory** ppFactory) -{ - *ppFactory = nullptr; -} - -} // namespace Diligent - -#endif diff --git a/Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp b/Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp deleted file mode 100644 index 76bbe2db35..0000000000 --- a/Graphics/SuperResolution/src/SuperResolutionFactoryVk.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2026 Diligent Graphics LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * In no event and under no legal theory, whether in tort (including negligence), - * contract, or otherwise, unless required by applicable law (such as deliberate - * and grossly negligent acts) or agreed to in writing, shall any Contributor be - * liable for any damages, including any direct, indirect, special, incidental, - * or consequential damages of any character arising as a result of this License or - * out of the use or inability to use the software (including but not limited to damages - * for loss of goodwill, work stoppage, computer failure or malfunction, or any and - * all other commercial damages or losses), even if such Contributor has been advised - * of the possibility of such damages. - */ - -#include "SuperResolutionFactoryBase.hpp" -#include "DLSSProviderVk.hpp" -#include "EngineMemory.h" - -namespace Diligent -{ - -class SuperResolutionFactoryVk final : public SuperResolutionFactoryBase -{ -public: - SuperResolutionFactoryVk(IReferenceCounters* pRefCounters, IRenderDevice* pDevice) : - SuperResolutionFactoryBase(pRefCounters) - { - AddBackend(pDevice); - } -}; - -void CreateSuperResolutionFactoryVk(IRenderDevice* pDevice, ISuperResolutionFactory** ppFactory) -{ - auto* pFactory = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFactoryVk instance", SuperResolutionFactoryVk)(pDevice); - pFactory->QueryInterface(IID_SuperResolutionFactory, reinterpret_cast(ppFactory)); -} - -} // namespace Diligent From 308c2da133316c0d126bf8880451a71e70ae1a39 Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Fri, 20 Mar 2026 14:20:53 +0600 Subject: [PATCH 09/14] Rework super-resolution validation --- .../include/SuperResolutionBase.hpp | 16 +- .../SuperResolution/src/DSRProviderD3D12.cpp | 8 +- .../src/SuperResolutionBase.cpp | 146 +++++++++--------- 3 files changed, 89 insertions(+), 81 deletions(-) diff --git a/Graphics/SuperResolution/include/SuperResolutionBase.hpp b/Graphics/SuperResolution/include/SuperResolutionBase.hpp index 24c0d5c66c..f665342d56 100644 --- a/Graphics/SuperResolution/include/SuperResolutionBase.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionBase.hpp @@ -39,6 +39,8 @@ namespace Diligent #define LOG_SUPER_RESOLUTION_ERROR_AND_THROW(Name, ...) LOG_ERROR_AND_THROW("Super resolution upscaler '", ((Name) != nullptr ? (Name) : ""), "': ", ##__VA_ARGS__) +#define DEV_CHECK_SUPER_RESOLUTION(Name, Expr, ...) DEV_CHECK_ERR(Expr, "Super resolution upscaler '", ((Name) != nullptr ? (Name) : ""), "': ", ##__VA_ARGS__) + #define VERIFY_SUPER_RESOLUTION(Name, Expr, ...) \ do \ { \ @@ -48,22 +50,24 @@ namespace Diligent } \ } while (false) + + /// Validates super resolution description and throws an exception in case of an error. void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false); /// Validates super resolution description for temporal upscaling and throws an exception in case of an error. void ValidateTemporalSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false); -/// Validates super resolution source settings attributes and throws an exception in case of an error. -void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& Attribs) noexcept(false); +/// Validates super resolution source settings attributes using DEV checks. +void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& Attribs); -/// Validates execute super resolution attributes and throws an exception in case of an error. +/// Validates execute super resolution attributes using DEV checks. void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, - const ExecuteSuperResolutionAttribs& Attribs) noexcept(false); + const ExecuteSuperResolutionAttribs& Attribs); -/// Validates execute super resolution attributes for temporal upscaling and throws an exception in case of an error. +/// Validates execute super resolution attributes for temporal upscaling using DEV checks. void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, - const ExecuteSuperResolutionAttribs& Attribs) noexcept(false); + const ExecuteSuperResolutionAttribs& Attribs); class SuperResolutionBase : public ObjectBase { diff --git a/Graphics/SuperResolution/src/DSRProviderD3D12.cpp b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp index 2f0b0447cc..b3f75bd779 100644 --- a/Graphics/SuperResolution/src/DSRProviderD3D12.cpp +++ b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp @@ -170,10 +170,10 @@ SuperResolutionD3D12_DSR::~SuperResolutionD3D12_DSR() = default; void DILIGENT_CALL_TYPE SuperResolutionD3D12_DSR::Execute(const ExecuteSuperResolutionAttribs& Attribs) { ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); - VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraNear > 0, "CameraNear must be greater than zero for temporal upscaling"); - VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFar > 0, "CameraFar must be greater than zero for temporal upscaling."); - VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFovAngleVert > 0, "CameraFovAngleVert must be greater than zero for temporal upscaling."); - VERIFY_SUPER_RESOLUTION(m_Desc.Name, Attribs.TimeDeltaInSeconds >= 0, "TimeDeltaInSeconds must be non-negative."); + DEV_CHECK_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraNear > 0, "CameraNear must be greater than zero for temporal upscaling"); + DEV_CHECK_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFar > 0, "CameraFar must be greater than zero for temporal upscaling."); + DEV_CHECK_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFovAngleVert > 0, "CameraFovAngleVert must be greater than zero for temporal upscaling."); + DEV_CHECK_SUPER_RESOLUTION(m_Desc.Name, Attribs.TimeDeltaInSeconds >= 0, "TimeDeltaInSeconds must be non-negative."); DeviceContextD3D12Impl* pCtx = ClassPtrCast(Attribs.pContext); diff --git a/Graphics/SuperResolution/src/SuperResolutionBase.cpp b/Graphics/SuperResolution/src/SuperResolutionBase.cpp index 4e0c2bcba3..384ce5e584 100644 --- a/Graphics/SuperResolution/src/SuperResolutionBase.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionBase.cpp @@ -58,12 +58,12 @@ void PopulateHaltonJitterPattern(std::vector& } } -void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& Attribs) noexcept(false) +void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& Attribs) { - if (Attribs.OutputWidth == 0 || Attribs.OutputHeight == 0) - LOG_ERROR_AND_THROW("Output resolution must be greater than zero"); - if (Attribs.OptimizationType >= SUPER_RESOLUTION_OPTIMIZATION_TYPE_COUNT) - LOG_ERROR_AND_THROW("Invalid optimization type"); +#ifdef DILIGENT_DEVELOPMENT + DEV_CHECK_ERR(Attribs.OutputWidth > 0 && Attribs.OutputHeight > 0, "Output resolution must be greater than zero"); + DEV_CHECK_ERR(Attribs.OptimizationType < SUPER_RESOLUTION_OPTIMIZATION_TYPE_COUNT, "Invalid optimization type"); +#endif } void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false) @@ -84,25 +84,26 @@ void ValidateTemporalSuperResolutionDesc(const SuperResolutionDesc& Desc) noexce } void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, - const ExecuteSuperResolutionAttribs& Attribs) noexcept(false) + const ExecuteSuperResolutionAttribs& Attribs) { - VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pContext != nullptr, "Device context must not be null"); - VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pColorTextureSRV != nullptr, "Color texture SRV must not be null"); - VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pOutputTextureView != nullptr, "Output texture view must not be null"); +#ifdef DILIGENT_DEVELOPMENT + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pContext != nullptr, "Device context must not be null"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pColorTextureSRV != nullptr, "Color texture SRV must not be null"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pOutputTextureView != nullptr, "Output texture view must not be null"); // Validate color texture if (Attribs.pColorTextureSRV != nullptr) { const TextureDesc& TexDesc = Attribs.pColorTextureSRV->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pColorTextureSRV->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Color texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Color texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ColorFormat, - "Color texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected ColorFormat (", GetTextureFormatAttribs(Desc.ColorFormat).Name, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Color texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Color texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ColorFormat, + "Color texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ColorFormat (", GetTextureFormatAttribs(Desc.ColorFormat).Name, ")"); } // Validate output texture @@ -110,34 +111,36 @@ void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& { const TextureDesc& TexDesc = Attribs.pOutputTextureView->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pOutputTextureView->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_RENDER_TARGET || ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, - "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_RENDER_TARGET or TEXTURE_VIEW_UNORDERED_ACCESS"); - VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == Desc.OutputWidth && TexDesc.Height == Desc.OutputHeight, - "Output texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must match the upscaler output resolution (", Desc.OutputWidth, "x", Desc.OutputHeight, ")"); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.OutputFormat, - "Output texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected OutputFormat (", GetTextureFormatAttribs(Desc.OutputFormat).Name, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_RENDER_TARGET || ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, + "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_RENDER_TARGET or TEXTURE_VIEW_UNORDERED_ACCESS"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == Desc.OutputWidth && TexDesc.Height == Desc.OutputHeight, + "Output texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must match the upscaler output resolution (", Desc.OutputWidth, "x", Desc.OutputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.OutputFormat, + "Output texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected OutputFormat (", GetTextureFormatAttribs(Desc.OutputFormat).Name, ")"); } +#endif } void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, - const ExecuteSuperResolutionAttribs& Attribs) noexcept(false) + const ExecuteSuperResolutionAttribs& Attribs) { +#ifdef DILIGENT_DEVELOPMENT ValidateExecuteSuperResolutionAttribs(Desc, Attribs); - VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pDepthTextureSRV != nullptr, "Depth texture SRV must not be null for temporal upscaling"); - VERIFY_SUPER_RESOLUTION(Desc.Name, Attribs.pMotionVectorsSRV != nullptr, "Motion vectors SRV must not be null for temporal upscaling"); - VERIFY_SUPER_RESOLUTION(Desc.Name, (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) != 0 || Attribs.pExposureTextureSRV != nullptr, - "Exposure texture SRV must not be null when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pDepthTextureSRV != nullptr, "Depth texture SRV must not be null for temporal upscaling"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pMotionVectorsSRV != nullptr, "Motion vectors SRV must not be null for temporal upscaling"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) != 0 || Attribs.pExposureTextureSRV != nullptr, + "Exposure texture SRV must not be null when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set"); // Validate output texture view type (DirectSR requires UAV) if (Attribs.pOutputTextureView != nullptr) { const TextureDesc& TexDesc = Attribs.pOutputTextureView->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pOutputTextureView->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, - "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_UNORDERED_ACCESS"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, + "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_UNORDERED_ACCESS"); } // Validate depth texture @@ -145,14 +148,14 @@ void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& { const TextureDesc& TexDesc = Attribs.pDepthTextureSRV->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pDepthTextureSRV->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Depth texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Depth texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.DepthFormat, - "Depth texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected DepthFormat (", GetTextureFormatAttribs(Desc.DepthFormat).Name, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Depth texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Depth texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.DepthFormat, + "Depth texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected DepthFormat (", GetTextureFormatAttribs(Desc.DepthFormat).Name, ")"); } // Validate motion vectors texture @@ -160,14 +163,14 @@ void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& { const TextureDesc& TexDesc = Attribs.pMotionVectorsSRV->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pMotionVectorsSRV->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Motion vectors view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Motion vectors texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.MotionFormat, - "Motion vectors view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected MotionFormat (", GetTextureFormatAttribs(Desc.MotionFormat).Name, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Motion vectors view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Motion vectors texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.MotionFormat, + "Motion vectors view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected MotionFormat (", GetTextureFormatAttribs(Desc.MotionFormat).Name, ")"); } // Validate exposure texture @@ -175,14 +178,14 @@ void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& { const TextureDesc& TexDesc = Attribs.pExposureTextureSRV->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pExposureTextureSRV->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Exposure texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == 1 && TexDesc.Height == 1, - "Exposure texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be 1x1"); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ExposureFormat, - "Exposure texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected ExposureFormat (", GetTextureFormatAttribs(Desc.ExposureFormat).Name, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Exposure texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == 1 && TexDesc.Height == 1, + "Exposure texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be 1x1"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ExposureFormat, + "Exposure texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ExposureFormat (", GetTextureFormatAttribs(Desc.ExposureFormat).Name, ")"); } // Validate reactive mask texture @@ -190,16 +193,16 @@ void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& { const TextureDesc& TexDesc = Attribs.pReactiveMaskTextureSRV->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pReactiveMaskTextureSRV->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Reactive mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.ReactiveMaskFormat != TEX_FORMAT_UNKNOWN, - "Reactive mask texture '", TexDesc.Name, "' provided but ReactiveMaskFormat was not set in SuperResolutionDesc"); - VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Reactive mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ReactiveMaskFormat, - "Reactive mask view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected ReactiveMaskFormat (", GetTextureFormatAttribs(Desc.ReactiveMaskFormat).Name, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Reactive mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Desc.ReactiveMaskFormat != TEX_FORMAT_UNKNOWN, + "Reactive mask texture '", TexDesc.Name, "' provided but ReactiveMaskFormat was not set in SuperResolutionDesc"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Reactive mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ReactiveMaskFormat, + "Reactive mask view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ReactiveMaskFormat (", GetTextureFormatAttribs(Desc.ReactiveMaskFormat).Name, ")"); } // Validate ignore history mask texture @@ -207,12 +210,13 @@ void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& { const TextureDesc& TexDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetTexture()->GetDesc(); const TextureViewDesc& ViewDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetDesc(); - VERIFY_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Ignore history mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - VERIFY_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Ignore history mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Ignore history mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Ignore history mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); } +#endif } } // namespace Diligent From 8242d2ad30f05aedb1bce1f1acb84a4d58ab3db5 Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Fri, 20 Mar 2026 14:32:38 +0600 Subject: [PATCH 10/14] Switch DLSS SDK to custom DLSS-Headers repository --- Graphics/SuperResolution/CMakeLists.txt | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index a830da3cb9..6124bf7556 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -38,8 +38,8 @@ endif() if(DILIGENT_DLSS_SUPPORTED) # Fetch NVIDIA DLSS SDK headers FetchContent_DeclareShallowGit(DLSS-Headers - GIT_REPOSITORY https://github.com/NVIDIA/DLSS.git - GIT_TAG main + GIT_REPOSITORY https://github.com/MikhailGorobets/DLSS-Headers.git + GIT_TAG master ) FetchContent_MakeAvailable(DLSS-Headers) endif() @@ -147,14 +147,7 @@ if(DILIGENT_DSR_SUPPORTED) endif() if(DILIGENT_DLSS_SUPPORTED) - set(DLSS_SDK_DIR ${FETCHCONTENT_BASE_DIR}/dlss-headers-src) - target_include_directories(Diligent-SuperResolution-static PRIVATE ${DLSS_SDK_DIR}/include) - - # Link NGX static library (dynamic CRT /MD variant) - target_link_libraries(Diligent-SuperResolution-static PRIVATE - debug ${DLSS_SDK_DIR}/lib/Windows_x86_64/x64/nvsdk_ngx_d_dbg.lib - optimized ${DLSS_SDK_DIR}/lib/Windows_x86_64/x64/nvsdk_ngx_d.lib - ) + target_link_libraries(Diligent-SuperResolution-static PRIVATE DLSS-Headers DLSS-NGX) if(D3D12_SUPPORTED) target_link_libraries(Diligent-SuperResolution-static PRIVATE Diligent-GraphicsEngineD3D12-static) From 6ee53c6a4bdb90826258cd90b857a5499938e190 Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Fri, 20 Mar 2026 14:59:24 +0600 Subject: [PATCH 11/14] Disable DLSS and DSR on 32-bit Windows; Fix unused provider warning --- Graphics/SuperResolution/CMakeLists.txt | 2 +- Graphics/SuperResolution/src/SuperResolutionFactory.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index 6124bf7556..4d1a124324 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -7,7 +7,7 @@ project(Diligent-SuperResolution CXX) set(DILIGENT_DLSS_SUPPORTED FALSE CACHE INTERNAL "DLSS is not supported") set(DILIGENT_DSR_SUPPORTED FALSE CACHE INTERNAL "DirectSR is not supported") -if(PLATFORM_WIN32 AND NOT MINGW_BUILD) +if(PLATFORM_WIN32 AND NOT MINGW_BUILD AND CMAKE_SIZEOF_VOID_P EQUAL 8) if (D3D11_SUPPORTED OR D3D12_SUPPORTED OR VULKAN_SUPPORTED) set(DILIGENT_DLSS_SUPPORTED TRUE CACHE INTERNAL "DLSS is supported on Win32 platform") endif() diff --git a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp index c0600c4c16..fa1ff59a25 100644 --- a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp @@ -31,6 +31,8 @@ #include "EngineMemory.h" #include "PlatformDebug.hpp" +#include + namespace Diligent { @@ -103,6 +105,7 @@ class SuperResolutionFactory : public ObjectBase #ifdef DILIGENT_METALFX_SUPPORTED AddProvider(pDevice, CreateMetalFXProvider, "MetalFX"); #endif + (void)AddProvider; } IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolutionFactory, TBase) From 23f1898cf65583749b67ed023c50a5f3f126d3ef Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Fri, 20 Mar 2026 17:06:56 +0600 Subject: [PATCH 12/14] Improve DLSS provider initialization and feature lifecycle --- .../include/SuperResolutionDLSS.hpp | 4 +- .../SuperResolution/src/DLSSProviderD3D11.cpp | 52 +++++++++++++----- .../SuperResolution/src/DLSSProviderD3D12.cpp | 52 +++++++++++++----- .../SuperResolution/src/DLSSProviderVk.cpp | 54 +++++++++++++------ .../src/SuperResolutionDLSS.cpp | 7 ++- 5 files changed, 122 insertions(+), 47 deletions(-) diff --git a/Graphics/SuperResolution/include/SuperResolutionDLSS.hpp b/Graphics/SuperResolution/include/SuperResolutionDLSS.hpp index 2534a30f22..d386f18e8e 100644 --- a/Graphics/SuperResolution/include/SuperResolutionDLSS.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionDLSS.hpp @@ -47,8 +47,8 @@ extern const wchar_t* DLSSAppDataPath; /// Maps Diligent optimization type to NGX performance/quality preset. NVSDK_NGX_PerfQuality_Value OptimizationTypeToNGXPerfQuality(SUPER_RESOLUTION_OPTIMIZATION_TYPE Type); -/// Maps Diligent super resolution flags to DLSS feature flags. -Int32 SuperResolutionFlagsToDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags); +/// Computes the full set of DLSS feature flags from the description and execution attributes. +Int32 ComputeDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags, const ExecuteSuperResolutionAttribs& Attribs); /// Populates DLSS variant info using NGX capability parameters. void EnumerateDLSSVariants(NVSDK_NGX_Parameter* pNGXParams, std::vector& Variants); diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp index 39bf79d9c3..b3d202dbb0 100644 --- a/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp @@ -68,8 +68,9 @@ class SuperResolutionD3D11_DLSS final : public SuperResolutionBase { ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); - if (m_pDLSSFeature == nullptr) - CreateFeature(Attribs); + NVSDK_NGX_Handle* pDLSSFeature = AcquireFeature(Attribs); + if (pDLSSFeature == nullptr) + return; DeviceContextD3D11Impl* pCtxImpl = ClassPtrCast(Attribs.pContext); @@ -100,17 +101,24 @@ class SuperResolutionD3D11_DLSS final : public SuperResolutionBase EvalParams.InPreExposure = Attribs.PreExposure; EvalParams.InExposureScale = Attribs.ExposureScale; - NVSDK_NGX_Result Result = NGX_D3D11_EVALUATE_DLSS_EXT(pd3d11DeviceContext, m_pDLSSFeature, m_pNGXParams, &EvalParams); + NVSDK_NGX_Result Result = NGX_D3D11_EVALUATE_DLSS_EXT(pd3d11DeviceContext, pDLSSFeature, m_pNGXParams, &EvalParams); if (NVSDK_NGX_FAILED(Result)) LOG_ERROR_MESSAGE("DLSS D3D11 evaluation failed. NGX Result: ", static_cast(Result)); } private: - void CreateFeature(const ExecuteSuperResolutionAttribs& Attribs) + NVSDK_NGX_Handle* AcquireFeature(const ExecuteSuperResolutionAttribs& Attribs) { - Int32 DLSSCreateFeatureFlags = SuperResolutionFlagsToDLSSFeatureFlags(m_Desc.Flags); - if (Attribs.CameraNear > Attribs.CameraFar) - DLSSCreateFeatureFlags |= NVSDK_NGX_DLSS_Feature_Flags_DepthInverted; + const Int32 DLSSCreateFeatureFlags = ComputeDLSSFeatureFlags(m_Desc.Flags, Attribs); + if (m_pDLSSFeature != nullptr && m_DLSSFeatureFlags == DLSSCreateFeatureFlags) + return m_pDLSSFeature; + + if (m_pDLSSFeature != nullptr) + { + NVSDK_NGX_D3D11_ReleaseFeature(m_pDLSSFeature); + m_pDLSSFeature = nullptr; + } + m_DLSSFeatureFlags = DLSSCreateFeatureFlags; NVSDK_NGX_DLSS_Create_Params DLSSCreateParams = {}; DLSSCreateParams.Feature.InWidth = m_Desc.InputWidth; @@ -119,16 +127,23 @@ class SuperResolutionD3D11_DLSS final : public SuperResolutionBase DLSSCreateParams.Feature.InTargetHeight = m_Desc.OutputHeight; DLSSCreateParams.InFeatureCreateFlags = DLSSCreateFeatureFlags; + NVSDK_NGX_Handle* pFeature = nullptr; ID3D11DeviceContext* pd3d11Ctx = ClassPtrCast(Attribs.pContext)->GetD3D11DeviceContext(); - NVSDK_NGX_Result Result = NGX_D3D11_CREATE_DLSS_EXT(pd3d11Ctx, &m_pDLSSFeature, m_pNGXParams, &DLSSCreateParams); + NVSDK_NGX_Result Result = NGX_D3D11_CREATE_DLSS_EXT(pd3d11Ctx, &pFeature, m_pNGXParams, &DLSSCreateParams); if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("Failed to create DLSS D3D11 feature. NGX Result: ", static_cast(Result)); + { + LOG_ERROR_MESSAGE("Failed to create DLSS D3D11 feature. NGX Result: ", static_cast(Result)); + return nullptr; + } + m_pDLSSFeature = pFeature; + return m_pDLSSFeature; } RefCntAutoPtr m_pDevice; - NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; - NVSDK_NGX_Parameter* m_pNGXParams = nullptr; + NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; + Int32 m_DLSSFeatureFlags = 0; }; class DLSSProviderD3D11 final : public SuperResolutionProvider @@ -140,18 +155,27 @@ class DLSSProviderD3D11 final : public SuperResolutionProvider ID3D11Device* pd3d11Device = ClassPtrCast(pDevice)->GetD3D11Device(); NVSDK_NGX_Result Result = NVSDK_NGX_D3D11_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d11Device); if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("NVIDIA NGX D3D11 initialization failed. Result: ", static_cast(Result)); + { + LOG_WARNING_MESSAGE("NVIDIA NGX D3D11 initialization failed. DLSS will not be available. Result: ", static_cast(Result)); + return; + } Result = NVSDK_NGX_D3D11_GetCapabilityParameters(&m_pNGXParams); if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) - LOG_ERROR_AND_THROW("Failed to get NGX D3D11 capability parameters. Result: ", static_cast(Result)); + { + LOG_WARNING_MESSAGE("Failed to get NGX D3D11 capability parameters. DLSS will not be available. Result: ", static_cast(Result)); + m_pNGXParams = nullptr; + NVSDK_NGX_D3D11_Shutdown1(pd3d11Device); + } } ~DLSSProviderD3D11() { if (m_pNGXParams != nullptr) + { NVSDK_NGX_D3D11_DestroyParameters(m_pNGXParams); - NVSDK_NGX_D3D11_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D11Device()); + NVSDK_NGX_D3D11_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D11Device()); + } } virtual void EnumerateVariants(std::vector& Variants) override final diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp index 083cc1f251..1ce0dc2ef9 100644 --- a/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp @@ -68,8 +68,9 @@ class SuperResolutionD3D12_DLSS final : public SuperResolutionBase { ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); - if (m_pDLSSFeature == nullptr) - CreateFeature(Attribs); + NVSDK_NGX_Handle* pDLSSFeature = AcquireFeature(Attribs); + if (pDLSSFeature == nullptr) + return; DeviceContextD3D12Impl* pCtxImpl = ClassPtrCast(Attribs.pContext); @@ -111,7 +112,7 @@ class SuperResolutionD3D12_DLSS final : public SuperResolutionBase EvalParams.InPreExposure = Attribs.PreExposure; EvalParams.InExposureScale = Attribs.ExposureScale; - NVSDK_NGX_Result Result = NGX_D3D12_EVALUATE_DLSS_EXT(pCmdList, m_pDLSSFeature, m_pNGXParams, &EvalParams); + NVSDK_NGX_Result Result = NGX_D3D12_EVALUATE_DLSS_EXT(pCmdList, pDLSSFeature, m_pNGXParams, &EvalParams); if (NVSDK_NGX_FAILED(Result)) LOG_ERROR_MESSAGE("DLSS D3D12 evaluation failed. NGX Result: ", static_cast(Result)); @@ -120,11 +121,18 @@ class SuperResolutionD3D12_DLSS final : public SuperResolutionBase } private: - void CreateFeature(const ExecuteSuperResolutionAttribs& Attribs) + NVSDK_NGX_Handle* AcquireFeature(const ExecuteSuperResolutionAttribs& Attribs) { - Int32 DLSSCreateFeatureFlags = SuperResolutionFlagsToDLSSFeatureFlags(m_Desc.Flags); - if (Attribs.CameraNear > Attribs.CameraFar) - DLSSCreateFeatureFlags |= NVSDK_NGX_DLSS_Feature_Flags_DepthInverted; + const Int32 DLSSCreateFeatureFlags = ComputeDLSSFeatureFlags(m_Desc.Flags, Attribs); + if (m_pDLSSFeature != nullptr && m_DLSSFeatureFlags == DLSSCreateFeatureFlags) + return m_pDLSSFeature; + + if (m_pDLSSFeature != nullptr) + { + NVSDK_NGX_D3D12_ReleaseFeature(m_pDLSSFeature); + m_pDLSSFeature = nullptr; + } + m_DLSSFeatureFlags = DLSSCreateFeatureFlags; NVSDK_NGX_DLSS_Create_Params DLSSCreateParams = {}; DLSSCreateParams.Feature.InWidth = m_Desc.InputWidth; @@ -133,16 +141,23 @@ class SuperResolutionD3D12_DLSS final : public SuperResolutionBase DLSSCreateParams.Feature.InTargetHeight = m_Desc.OutputHeight; DLSSCreateParams.InFeatureCreateFlags = DLSSCreateFeatureFlags; + NVSDK_NGX_Handle* pFeature = nullptr; ID3D12GraphicsCommandList* pCmdList = ClassPtrCast(Attribs.pContext)->GetD3D12CommandList(); - NVSDK_NGX_Result Result = NGX_D3D12_CREATE_DLSS_EXT(pCmdList, 1, 1, &m_pDLSSFeature, m_pNGXParams, &DLSSCreateParams); + NVSDK_NGX_Result Result = NGX_D3D12_CREATE_DLSS_EXT(pCmdList, 1, 1, &pFeature, m_pNGXParams, &DLSSCreateParams); if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("Failed to create DLSS D3D12 feature. NGX Result: ", static_cast(Result)); + { + LOG_ERROR_MESSAGE("Failed to create DLSS D3D12 feature. NGX Result: ", static_cast(Result)); + return nullptr; + } + m_pDLSSFeature = pFeature; + return m_pDLSSFeature; } RefCntAutoPtr m_pDevice; - NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; - NVSDK_NGX_Parameter* m_pNGXParams = nullptr; + NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; + Int32 m_DLSSFeatureFlags = 0; }; @@ -155,18 +170,27 @@ class DLSSProviderD3D12 final : public SuperResolutionProvider ID3D12Device* pd3d12Device = ClassPtrCast(pDevice)->GetD3D12Device(); NVSDK_NGX_Result Result = NVSDK_NGX_D3D12_Init_with_ProjectID(DLSSProjectId, NVSDK_NGX_ENGINE_TYPE_CUSTOM, "0", DLSSAppDataPath, pd3d12Device); if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("NVIDIA NGX D3D12 initialization failed. Result: ", static_cast(Result)); + { + LOG_WARNING_MESSAGE("NVIDIA NGX D3D12 initialization failed. DLSS will not be available. Result: ", static_cast(Result)); + return; + } Result = NVSDK_NGX_D3D12_GetCapabilityParameters(&m_pNGXParams); if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) - LOG_ERROR_AND_THROW("Failed to get NGX D3D12 capability parameters. Result: ", static_cast(Result)); + { + LOG_WARNING_MESSAGE("Failed to get NGX D3D12 capability parameters. DLSS will not be available. Result: ", static_cast(Result)); + m_pNGXParams = nullptr; + NVSDK_NGX_D3D12_Shutdown1(pd3d12Device); + } } ~DLSSProviderD3D12() { if (m_pNGXParams != nullptr) + { NVSDK_NGX_D3D12_DestroyParameters(m_pNGXParams); - NVSDK_NGX_D3D12_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D12Device()); + NVSDK_NGX_D3D12_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetD3D12Device()); + } } virtual void EnumerateVariants(std::vector& Variants) override final diff --git a/Graphics/SuperResolution/src/DLSSProviderVk.cpp b/Graphics/SuperResolution/src/DLSSProviderVk.cpp index f1c3ef9bc9..b96e34d4fb 100644 --- a/Graphics/SuperResolution/src/DLSSProviderVk.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderVk.cpp @@ -69,8 +69,9 @@ class SuperResolutionVk_DLSS final : public SuperResolutionBase { ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); - if (m_pDLSSFeature == nullptr) - CreateFeature(Attribs); + NVSDK_NGX_Handle* pDLSSFeature = AcquireFeature(Attribs); + if (pDLSSFeature == nullptr) + return; DeviceContextVkImpl* pCtxImpl = ClassPtrCast(Attribs.pContext); @@ -127,17 +128,24 @@ class SuperResolutionVk_DLSS final : public SuperResolutionBase EvalParams.InPreExposure = Attribs.PreExposure; EvalParams.InExposureScale = Attribs.ExposureScale; - NVSDK_NGX_Result Result = NGX_VULKAN_EVALUATE_DLSS_EXT(vkCmdBuffer, m_pDLSSFeature, m_pNGXParams, &EvalParams); + NVSDK_NGX_Result Result = NGX_VULKAN_EVALUATE_DLSS_EXT(vkCmdBuffer, pDLSSFeature, m_pNGXParams, &EvalParams); if (NVSDK_NGX_FAILED(Result)) LOG_ERROR_MESSAGE("DLSS Vulkan evaluation failed. NGX Result: ", static_cast(Result)); } private: - void CreateFeature(const ExecuteSuperResolutionAttribs& Attribs) + NVSDK_NGX_Handle* AcquireFeature(const ExecuteSuperResolutionAttribs& Attribs) { - Int32 DLSSCreateFeatureFlags = SuperResolutionFlagsToDLSSFeatureFlags(m_Desc.Flags); - if (Attribs.CameraNear > Attribs.CameraFar) - DLSSCreateFeatureFlags |= NVSDK_NGX_DLSS_Feature_Flags_DepthInverted; + const Int32 DLSSCreateFeatureFlags = ComputeDLSSFeatureFlags(m_Desc.Flags, Attribs); + if (m_pDLSSFeature != nullptr && m_DLSSFeatureFlags == DLSSCreateFeatureFlags) + return m_pDLSSFeature; + + if (m_pDLSSFeature != nullptr) + { + NVSDK_NGX_VULKAN_ReleaseFeature(m_pDLSSFeature); + m_pDLSSFeature = nullptr; + } + m_DLSSFeatureFlags = DLSSCreateFeatureFlags; NVSDK_NGX_DLSS_Create_Params DLSSCreateParams = {}; DLSSCreateParams.Feature.InWidth = m_Desc.InputWidth; @@ -146,16 +154,23 @@ class SuperResolutionVk_DLSS final : public SuperResolutionBase DLSSCreateParams.Feature.InTargetHeight = m_Desc.OutputHeight; DLSSCreateParams.InFeatureCreateFlags = DLSSCreateFeatureFlags; - VkCommandBuffer vkCmdBuffer = ClassPtrCast(Attribs.pContext)->GetVkCommandBuffer(); - NVSDK_NGX_Result Result = NGX_VULKAN_CREATE_DLSS_EXT(vkCmdBuffer, 1, 1, &m_pDLSSFeature, m_pNGXParams, &DLSSCreateParams); + NVSDK_NGX_Handle* pFeature = nullptr; + VkCommandBuffer vkCmdBuffer = ClassPtrCast(Attribs.pContext)->GetVkCommandBuffer(); + NVSDK_NGX_Result Result = NGX_VULKAN_CREATE_DLSS_EXT(vkCmdBuffer, 1, 1, &pFeature, m_pNGXParams, &DLSSCreateParams); if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("Failed to create DLSS Vulkan feature. NGX Result: ", static_cast(Result)); + { + LOG_ERROR_MESSAGE("Failed to create DLSS Vulkan feature. NGX Result: ", static_cast(Result)); + return nullptr; + } + m_pDLSSFeature = pFeature; + return m_pDLSSFeature; } RefCntAutoPtr m_pDevice; - NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; - NVSDK_NGX_Parameter* m_pNGXParams = nullptr; + NVSDK_NGX_Handle* m_pDLSSFeature = nullptr; + NVSDK_NGX_Parameter* m_pNGXParams = nullptr; + Int32 m_DLSSFeatureFlags = 0; }; @@ -195,18 +210,27 @@ class DLSSProviderVk final : public SuperResolutionProvider } if (NVSDK_NGX_FAILED(Result)) - LOG_ERROR_AND_THROW("NVIDIA NGX Vulkan initialization failed. Result: ", static_cast(Result)); + { + LOG_WARNING_MESSAGE("NVIDIA NGX Vulkan initialization failed. DLSS will not be available. Result: ", static_cast(Result)); + return; + } Result = NVSDK_NGX_VULKAN_GetCapabilityParameters(&m_pNGXParams); if (NVSDK_NGX_FAILED(Result) || m_pNGXParams == nullptr) - LOG_ERROR_AND_THROW("Failed to get NGX Vulkan capability parameters. Result: ", static_cast(Result)); + { + LOG_WARNING_MESSAGE("Failed to get NGX Vulkan capability parameters. DLSS will not be available. Result: ", static_cast(Result)); + m_pNGXParams = nullptr; + NVSDK_NGX_VULKAN_Shutdown1(vkDevice); + } } ~DLSSProviderVk() { if (m_pNGXParams != nullptr) + { NVSDK_NGX_VULKAN_DestroyParameters(m_pNGXParams); - NVSDK_NGX_VULKAN_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetVkDevice()); + NVSDK_NGX_VULKAN_Shutdown1(ClassPtrCast(m_pDevice.RawPtr())->GetVkDevice()); + } } void EnumerateVariants(std::vector& Variants) diff --git a/Graphics/SuperResolution/src/SuperResolutionDLSS.cpp b/Graphics/SuperResolution/src/SuperResolutionDLSS.cpp index e2a7b9cce7..7d92b7dc30 100644 --- a/Graphics/SuperResolution/src/SuperResolutionDLSS.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionDLSS.cpp @@ -54,7 +54,7 @@ NVSDK_NGX_PerfQuality_Value OptimizationTypeToNGXPerfQuality(SUPER_RESOLUTION_OP } } -Int32 SuperResolutionFlagsToDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags) +Int32 ComputeDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags, const ExecuteSuperResolutionAttribs& Attribs) { Int32 DLSSFlags = NVSDK_NGX_DLSS_Feature_Flags_None; @@ -62,6 +62,8 @@ Int32 SuperResolutionFlagsToDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags) DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_AutoExposure; if (Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_DoSharpening; + if (Attribs.CameraNear > Attribs.CameraFar) + DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_DepthInverted; DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_MVLowRes; DLSSFlags |= NVSDK_NGX_DLSS_Feature_Flags_IsHDR; @@ -71,7 +73,8 @@ Int32 SuperResolutionFlagsToDLSSFeatureFlags(SUPER_RESOLUTION_FLAGS Flags) void EnumerateDLSSVariants(NVSDK_NGX_Parameter* pNGXParams, std::vector& Variants) { - DEV_CHECK_ERR(pNGXParams != nullptr, "NGX parameters must not be null"); + if (pNGXParams == nullptr) + return; Int32 NeedsUpdatedDriver = 0; NVSDK_NGX_Parameter_GetI(pNGXParams, NVSDK_NGX_Parameter_SuperSampling_NeedsUpdatedDriver, &NeedsUpdatedDriver); From 8fc4f968942703ee633879404822a72d34abd138 Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Sat, 21 Mar 2026 00:07:18 +0600 Subject: [PATCH 13/14] Improve super resolution validation and provider API --- .../include/SuperResolutionBase.hpp | 24 +- .../include/SuperResolutionProvider.hpp | 26 +- .../interface/SuperResolutionFactory.h | 6 +- .../SuperResolution/src/DLSSProviderD3D11.cpp | 10 +- .../SuperResolution/src/DLSSProviderD3D12.cpp | 10 +- .../SuperResolution/src/DLSSProviderVk.cpp | 10 +- .../SuperResolution/src/DSRProviderD3D12.cpp | 10 +- .../src/SuperResolutionBase.cpp | 227 ++++++++++-------- .../src/SuperResolutionFactory.cpp | 22 +- 9 files changed, 203 insertions(+), 142 deletions(-) diff --git a/Graphics/SuperResolution/include/SuperResolutionBase.hpp b/Graphics/SuperResolution/include/SuperResolutionBase.hpp index f665342d56..7497eca577 100644 --- a/Graphics/SuperResolution/include/SuperResolutionBase.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionBase.hpp @@ -50,25 +50,17 @@ namespace Diligent } \ } while (false) - - -/// Validates super resolution description and throws an exception in case of an error. -void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false); - -/// Validates super resolution description for temporal upscaling and throws an exception in case of an error. -void ValidateTemporalSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false); - /// Validates super resolution source settings attributes using DEV checks. void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& Attribs); +/// Validates super resolution description and throws an exception in case of an error. +void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc, const SuperResolutionInfo& Info) noexcept(false); + /// Validates execute super resolution attributes using DEV checks. void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, const ExecuteSuperResolutionAttribs& Attribs); -/// Validates execute super resolution attributes for temporal upscaling using DEV checks. -void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, - const ExecuteSuperResolutionAttribs& Attribs); - class SuperResolutionBase : public ObjectBase { public: @@ -81,15 +73,18 @@ class SuperResolutionBase : public ObjectBase }; SuperResolutionBase(IReferenceCounters* pRefCounters, - const SuperResolutionDesc& Desc) : + const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info) : TBase{pRefCounters}, - m_Desc{Desc} + m_Desc{Desc}, + m_Info{Info} { if (Desc.Name != nullptr) { m_Name = Desc.Name; m_Desc.Name = m_Name.c_str(); } + ValidateSuperResolutionDesc(m_Desc, m_Info); } IMPLEMENT_QUERY_INTERFACE_IN_PLACE(IID_SuperResolution, TBase) @@ -116,6 +111,7 @@ class SuperResolutionBase : public ObjectBase protected: SuperResolutionDesc m_Desc; + SuperResolutionInfo m_Info; std::string m_Name; std::vector m_JitterPattern; }; diff --git a/Graphics/SuperResolution/include/SuperResolutionProvider.hpp b/Graphics/SuperResolution/include/SuperResolutionProvider.hpp index 24a9304bc7..8457ae4609 100644 --- a/Graphics/SuperResolution/include/SuperResolutionProvider.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionProvider.hpp @@ -28,7 +28,9 @@ #include "SuperResolutionFactory.h" #include "SuperResolution.h" +#include "SuperResolutionBase.hpp" +#include #include namespace Diligent @@ -43,9 +45,31 @@ class SuperResolutionProvider virtual void EnumerateVariants(std::vector& Variants) = 0; virtual void GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, - SuperResolutionSourceSettings& Settings) = 0; + SuperResolutionSourceSettings& Settings) + { + Settings = {}; + + ValidateSourceSettingsAttribs(Attribs); + + float ScaleFactor = 1.0f; + switch (Attribs.OptimizationType) + { + // clang-format off + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_QUALITY: ScaleFactor = 1.0f / 1.3f; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_QUALITY: ScaleFactor = 1.0f / 1.5f; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED: ScaleFactor = 1.0f / 1.7f; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE: ScaleFactor = 0.5f; break; + case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE: ScaleFactor = 1.0f / 3.0f; break; + default: ScaleFactor = 1.0f / 1.7f; break; + // clang-format on + } + + Settings.OptimalInputWidth = std::max(1u, static_cast(Attribs.OutputWidth * ScaleFactor)); + Settings.OptimalInputHeight = std::max(1u, static_cast(Attribs.OutputHeight * ScaleFactor)); + } virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, ISuperResolution** ppUpscaler) = 0; }; diff --git a/Graphics/SuperResolution/interface/SuperResolutionFactory.h b/Graphics/SuperResolution/interface/SuperResolutionFactory.h index a79369a7dc..03d51ed0e1 100644 --- a/Graphics/SuperResolution/interface/SuperResolutionFactory.h +++ b/Graphics/SuperResolution/interface/SuperResolutionFactory.h @@ -64,7 +64,11 @@ DILIGENT_TYPED_ENUM(SUPER_RESOLUTION_SPATIAL_CAP_FLAGS, Uint32) /// as opposed to a custom software fallback. SUPER_RESOLUTION_SPATIAL_CAP_FLAG_NATIVE = 1u << 0, - SUPER_RESOLUTION_SPATIAL_CAP_FLAG_LAST = SUPER_RESOLUTION_SPATIAL_CAP_FLAG_NATIVE + /// The upscaler supports the sharpness control parameter. + /// When set, the Sharpness field in ExecuteSuperResolutionAttribs is used. + SUPER_RESOLUTION_SPATIAL_CAP_FLAG_SHARPNESS = 1u << 1, + + SUPER_RESOLUTION_SPATIAL_CAP_FLAG_LAST = SUPER_RESOLUTION_SPATIAL_CAP_FLAG_SHARPNESS }; DEFINE_FLAG_ENUM_OPERATORS(SUPER_RESOLUTION_SPATIAL_CAP_FLAGS) diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp index b3d202dbb0..514ee596f4 100644 --- a/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderD3D11.cpp @@ -49,12 +49,12 @@ class SuperResolutionD3D11_DLSS final : public SuperResolutionBase SuperResolutionD3D11_DLSS(IReferenceCounters* pRefCounters, IRenderDevice* pDevice, const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, NVSDK_NGX_Parameter* pNGXParams) : - SuperResolutionBase{pRefCounters, Desc}, + SuperResolutionBase{pRefCounters, Desc, Info}, m_pDevice{pDevice}, m_pNGXParams{pNGXParams} { - ValidateTemporalSuperResolutionDesc(m_Desc); PopulateHaltonJitterPattern(m_JitterPattern, 64); } @@ -66,7 +66,7 @@ class SuperResolutionD3D11_DLSS final : public SuperResolutionBase virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final { - ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + ValidateExecuteSuperResolutionAttribs(m_Desc, m_Info, Attribs); NVSDK_NGX_Handle* pDLSSFeature = AcquireFeature(Attribs); if (pDLSSFeature == nullptr) @@ -188,12 +188,12 @@ class DLSSProviderD3D11 final : public SuperResolutionProvider GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); } - virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, const SuperResolutionInfo& Info, ISuperResolution** ppUpscaler) override final { DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); - SuperResolutionD3D11_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D11_DLSS instance", SuperResolutionD3D11_DLSS)(m_pDevice, Desc, m_pNGXParams); + SuperResolutionD3D11_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D11_DLSS instance", SuperResolutionD3D11_DLSS)(m_pDevice, Desc, Info, m_pNGXParams); pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); } diff --git a/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp index 1ce0dc2ef9..6368010cf8 100644 --- a/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderD3D12.cpp @@ -49,12 +49,12 @@ class SuperResolutionD3D12_DLSS final : public SuperResolutionBase SuperResolutionD3D12_DLSS(IReferenceCounters* pRefCounters, IRenderDevice* pDevice, const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, NVSDK_NGX_Parameter* pNGXParams) : - SuperResolutionBase{pRefCounters, Desc}, + SuperResolutionBase{pRefCounters, Desc, Info}, m_pDevice{pDevice}, m_pNGXParams{pNGXParams} { - ValidateTemporalSuperResolutionDesc(m_Desc); PopulateHaltonJitterPattern(m_JitterPattern, 64); } @@ -66,7 +66,7 @@ class SuperResolutionD3D12_DLSS final : public SuperResolutionBase virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final { - ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + ValidateExecuteSuperResolutionAttribs(m_Desc, m_Info, Attribs); NVSDK_NGX_Handle* pDLSSFeature = AcquireFeature(Attribs); if (pDLSSFeature == nullptr) @@ -203,12 +203,12 @@ class DLSSProviderD3D12 final : public SuperResolutionProvider GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); } - virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) override final + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, const SuperResolutionInfo& Info, ISuperResolution** ppUpscaler) override final { DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); - SuperResolutionD3D12_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DLSS instance", SuperResolutionD3D12_DLSS)(m_pDevice, Desc, m_pNGXParams); + SuperResolutionD3D12_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DLSS instance", SuperResolutionD3D12_DLSS)(m_pDevice, Desc, Info, m_pNGXParams); pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); } diff --git a/Graphics/SuperResolution/src/DLSSProviderVk.cpp b/Graphics/SuperResolution/src/DLSSProviderVk.cpp index b96e34d4fb..e03e56fd3a 100644 --- a/Graphics/SuperResolution/src/DLSSProviderVk.cpp +++ b/Graphics/SuperResolution/src/DLSSProviderVk.cpp @@ -50,12 +50,12 @@ class SuperResolutionVk_DLSS final : public SuperResolutionBase SuperResolutionVk_DLSS(IReferenceCounters* pRefCounters, IRenderDevice* pDevice, const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, NVSDK_NGX_Parameter* pNGXParams) : - SuperResolutionBase{pRefCounters, Desc}, + SuperResolutionBase{pRefCounters, Desc, Info}, m_pDevice{pDevice}, m_pNGXParams{pNGXParams} { - ValidateTemporalSuperResolutionDesc(m_Desc); PopulateHaltonJitterPattern(m_JitterPattern, 64); } @@ -67,7 +67,7 @@ class SuperResolutionVk_DLSS final : public SuperResolutionBase virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final { - ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + ValidateExecuteSuperResolutionAttribs(m_Desc, m_Info, Attribs); NVSDK_NGX_Handle* pDLSSFeature = AcquireFeature(Attribs); if (pDLSSFeature == nullptr) @@ -242,12 +242,12 @@ class DLSSProviderVk final : public SuperResolutionProvider GetDLSSSourceSettings(m_pNGXParams, Attribs, Settings); } - void CreateSuperResolution(const SuperResolutionDesc& Desc, ISuperResolution** ppUpscaler) + void CreateSuperResolution(const SuperResolutionDesc& Desc, const SuperResolutionInfo& Info, ISuperResolution** ppUpscaler) { DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); DEV_CHECK_ERR(ppUpscaler != nullptr, "ppUpscaler must not be null"); - SuperResolutionVk_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionVk_DLSS instance", SuperResolutionVk_DLSS)(m_pDevice, Desc, m_pNGXParams); + SuperResolutionVk_DLSS* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionVk_DLSS instance", SuperResolutionVk_DLSS)(m_pDevice, Desc, Info, m_pNGXParams); pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); } diff --git a/Graphics/SuperResolution/src/DSRProviderD3D12.cpp b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp index b3f75bd779..5a1c0cca26 100644 --- a/Graphics/SuperResolution/src/DSRProviderD3D12.cpp +++ b/Graphics/SuperResolution/src/DSRProviderD3D12.cpp @@ -96,6 +96,7 @@ class SuperResolutionD3D12_DSR final : public SuperResolutionBase SuperResolutionD3D12_DSR(IReferenceCounters* pRefCounters, RenderDeviceD3D12Impl* pDevice, const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, IDSRDevice* pDSRDevice); ~SuperResolutionD3D12_DSR(); @@ -111,13 +112,13 @@ class SuperResolutionD3D12_DSR final : public SuperResolutionBase SuperResolutionD3D12_DSR::SuperResolutionD3D12_DSR(IReferenceCounters* pRefCounters, RenderDeviceD3D12Impl* pDevice, const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, IDSRDevice* pDSRDevice) : - SuperResolutionBase{pRefCounters, Desc}, + SuperResolutionBase{pRefCounters, Desc, Info}, m_pDevice{pDevice}, m_DSRUpscalers(pDevice->GetCommandQueueCount()) { - ValidateTemporalSuperResolutionDesc(m_Desc); VERIFY_SUPER_RESOLUTION(m_Desc.Name, Desc.MotionFormat == TEX_FORMAT_RG16_FLOAT, "MotionFormat must be TEX_FORMAT_RG16_FLOAT. Got: ", GetTextureFormatAttribs(Desc.MotionFormat).Name); VERIFY_SUPER_RESOLUTION(m_Desc.Name, (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) != 0 || Desc.ExposureFormat != TEX_FORMAT_UNKNOWN, "ExposureFormat must not be TEX_FORMAT_UNKNOWN when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set. " @@ -169,7 +170,7 @@ SuperResolutionD3D12_DSR::~SuperResolutionD3D12_DSR() = default; void DILIGENT_CALL_TYPE SuperResolutionD3D12_DSR::Execute(const ExecuteSuperResolutionAttribs& Attribs) { - ValidateTemporalExecuteSuperResolutionAttribs(m_Desc, Attribs); + ValidateExecuteSuperResolutionAttribs(m_Desc, m_Info, Attribs); DEV_CHECK_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraNear > 0, "CameraNear must be greater than zero for temporal upscaling"); DEV_CHECK_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFar > 0, "CameraFar must be greater than zero for temporal upscaling."); DEV_CHECK_SUPER_RESOLUTION(m_Desc.Name, Attribs.CameraFovAngleVert > 0, "CameraFovAngleVert must be greater than zero for temporal upscaling."); @@ -361,13 +362,14 @@ class DSRProviderD3D12 final : public SuperResolutionProvider } virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, ISuperResolution** ppUpscaler) override final { DEV_CHECK_ERR(m_pDSRDevice != nullptr, "DirectSR device must not be null"); DEV_CHECK_ERR(m_pDevice != nullptr, "Render device must not be null"); RenderDeviceD3D12Impl* pDeviceD3D12 = ClassPtrCast(m_pDevice.RawPtr()); - SuperResolutionD3D12_DSR* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DSR instance", SuperResolutionD3D12_DSR)(pDeviceD3D12, Desc, m_pDSRDevice); + SuperResolutionD3D12_DSR* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionD3D12_DSR instance", SuperResolutionD3D12_DSR)(pDeviceD3D12, Desc, Info, m_pDSRDevice); pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); } diff --git a/Graphics/SuperResolution/src/SuperResolutionBase.cpp b/Graphics/SuperResolution/src/SuperResolutionBase.cpp index 384ce5e584..ee0c78e6a6 100644 --- a/Graphics/SuperResolution/src/SuperResolutionBase.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionBase.cpp @@ -66,7 +66,7 @@ void ValidateSourceSettingsAttribs(const SuperResolutionSourceSettingsAttribs& A #endif } -void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false) +void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc, const SuperResolutionInfo& Info) noexcept(false) { VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.OutputWidth > 0 && Desc.OutputHeight > 0, "Output resolution must be greater than zero"); VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.OutputFormat != TEX_FORMAT_UNKNOWN, "OutputFormat must not be TEX_FORMAT_UNKNOWN"); @@ -74,16 +74,48 @@ void ValidateSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.InputWidth > 0 && Desc.InputHeight > 0, "InputWidth and InputHeight must be greater than zero"); VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.InputWidth <= Desc.OutputWidth && Desc.InputHeight <= Desc.OutputHeight, "Input resolution must not exceed output resolution"); -} -void ValidateTemporalSuperResolutionDesc(const SuperResolutionDesc& Desc) noexcept(false) -{ - ValidateSuperResolutionDesc(Desc); - VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.DepthFormat != TEX_FORMAT_UNKNOWN, "DepthFormat must not be TEX_FORMAT_UNKNOWN for temporal upscaling"); - VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.MotionFormat != TEX_FORMAT_UNKNOWN, "MotionFormat must not be TEX_FORMAT_UNKNOWN for temporal upscaling"); + if (Desc.Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) + { + const bool SharpnessSupported = Info.Type == SUPER_RESOLUTION_TYPE_SPATIAL ? (Info.SpatialCapFlags & SUPER_RESOLUTION_SPATIAL_CAP_FLAG_SHARPNESS) != 0 : (Info.TemporalCapFlags & SUPER_RESOLUTION_TEMPORAL_CAP_FLAG_SHARPNESS) != 0; + + VERIFY_SUPER_RESOLUTION(Desc.Name, SharpnessSupported, + "SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING is set, but the '", Info.Name, + "' variant does not support sharpness. Check the variant's capability flags."); + } + + if (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) + { + VERIFY_SUPER_RESOLUTION(Desc.Name, Info.Type == SUPER_RESOLUTION_TYPE_TEMPORAL, + "SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is only supported for temporal upscaling."); + } + + if (Info.Type == SUPER_RESOLUTION_TYPE_TEMPORAL) + { + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.DepthFormat != TEX_FORMAT_UNKNOWN, "DepthFormat must not be TEX_FORMAT_UNKNOWN for temporal upscaling"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.MotionFormat != TEX_FORMAT_UNKNOWN, "MotionFormat must not be TEX_FORMAT_UNKNOWN for temporal upscaling"); + + if (!(Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE)) + { + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.ExposureFormat != TEX_FORMAT_UNKNOWN, + "ExposureFormat must not be TEX_FORMAT_UNKNOWN when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set for temporal upscaling"); + } + } + else + { + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.DepthFormat == TEX_FORMAT_UNKNOWN, + "DepthFormat must be TEX_FORMAT_UNKNOWN for spatial upscaling"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.MotionFormat == TEX_FORMAT_UNKNOWN, + "MotionFormat must be TEX_FORMAT_UNKNOWN for spatial upscaling"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.ReactiveMaskFormat == TEX_FORMAT_UNKNOWN, + "ReactiveMaskFormat must be TEX_FORMAT_UNKNOWN for spatial upscaling"); + VERIFY_SUPER_RESOLUTION(Desc.Name, Desc.ExposureFormat == TEX_FORMAT_UNKNOWN, + "ExposureFormat must be TEX_FORMAT_UNKNOWN for spatial upscaling"); + } } void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, const ExecuteSuperResolutionAttribs& Attribs) { #ifdef DILIGENT_DEVELOPMENT @@ -120,101 +152,96 @@ void ValidateExecuteSuperResolutionAttribs(const SuperResolutionDesc& "Output texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, ") does not match the expected OutputFormat (", GetTextureFormatAttribs(Desc.OutputFormat).Name, ")"); } -#endif -} - -void ValidateTemporalExecuteSuperResolutionAttribs(const SuperResolutionDesc& Desc, - const ExecuteSuperResolutionAttribs& Attribs) -{ -#ifdef DILIGENT_DEVELOPMENT - ValidateExecuteSuperResolutionAttribs(Desc, Attribs); - - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pDepthTextureSRV != nullptr, "Depth texture SRV must not be null for temporal upscaling"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pMotionVectorsSRV != nullptr, "Motion vectors SRV must not be null for temporal upscaling"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) != 0 || Attribs.pExposureTextureSRV != nullptr, - "Exposure texture SRV must not be null when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set"); - - // Validate output texture view type (DirectSR requires UAV) - if (Attribs.pOutputTextureView != nullptr) - { - const TextureDesc& TexDesc = Attribs.pOutputTextureView->GetTexture()->GetDesc(); - const TextureViewDesc& ViewDesc = Attribs.pOutputTextureView->GetDesc(); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, - "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_UNORDERED_ACCESS"); - } - // Validate depth texture - if (Attribs.pDepthTextureSRV != nullptr) + if (Info.Type == SUPER_RESOLUTION_TYPE_TEMPORAL) { - const TextureDesc& TexDesc = Attribs.pDepthTextureSRV->GetTexture()->GetDesc(); - const TextureViewDesc& ViewDesc = Attribs.pDepthTextureSRV->GetDesc(); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Depth texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Depth texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.DepthFormat, - "Depth texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected DepthFormat (", GetTextureFormatAttribs(Desc.DepthFormat).Name, ")"); - } - - // Validate motion vectors texture - if (Attribs.pMotionVectorsSRV != nullptr) - { - const TextureDesc& TexDesc = Attribs.pMotionVectorsSRV->GetTexture()->GetDesc(); - const TextureViewDesc& ViewDesc = Attribs.pMotionVectorsSRV->GetDesc(); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Motion vectors view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Motion vectors texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.MotionFormat, - "Motion vectors view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected MotionFormat (", GetTextureFormatAttribs(Desc.MotionFormat).Name, ")"); - } - - // Validate exposure texture - if (Attribs.pExposureTextureSRV != nullptr) - { - const TextureDesc& TexDesc = Attribs.pExposureTextureSRV->GetTexture()->GetDesc(); - const TextureViewDesc& ViewDesc = Attribs.pExposureTextureSRV->GetDesc(); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Exposure texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == 1 && TexDesc.Height == 1, - "Exposure texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be 1x1"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ExposureFormat, - "Exposure texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected ExposureFormat (", GetTextureFormatAttribs(Desc.ExposureFormat).Name, ")"); - } - - // Validate reactive mask texture - if (Attribs.pReactiveMaskTextureSRV != nullptr) - { - const TextureDesc& TexDesc = Attribs.pReactiveMaskTextureSRV->GetTexture()->GetDesc(); - const TextureViewDesc& ViewDesc = Attribs.pReactiveMaskTextureSRV->GetDesc(); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Reactive mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Desc.ReactiveMaskFormat != TEX_FORMAT_UNKNOWN, - "Reactive mask texture '", TexDesc.Name, "' provided but ReactiveMaskFormat was not set in SuperResolutionDesc"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Reactive mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ReactiveMaskFormat, - "Reactive mask view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, - ") does not match the expected ReactiveMaskFormat (", GetTextureFormatAttribs(Desc.ReactiveMaskFormat).Name, ")"); - } - - // Validate ignore history mask texture - if (Attribs.pIgnoreHistoryMaskTextureSRV != nullptr) - { - const TextureDesc& TexDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetTexture()->GetDesc(); - const TextureViewDesc& ViewDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetDesc(); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, - "Ignore history mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); - DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, - "Ignore history mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, - ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pDepthTextureSRV != nullptr, "Depth texture SRV must not be null for temporal upscaling"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Attribs.pMotionVectorsSRV != nullptr, "Motion vectors SRV must not be null for temporal upscaling"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, (Desc.Flags & SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE) != 0 || Attribs.pExposureTextureSRV != nullptr, + "Exposure texture SRV must not be null when SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE is not set"); + + // Validate output texture view type (DirectSR requires UAV) + if (Attribs.pOutputTextureView != nullptr) + { + const TextureDesc& TexDesc = Attribs.pOutputTextureView->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pOutputTextureView->GetDesc(); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_UNORDERED_ACCESS, + "Output texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_UNORDERED_ACCESS"); + } + + // Validate depth texture + if (Attribs.pDepthTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pDepthTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pDepthTextureSRV->GetDesc(); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Depth texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Depth texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.DepthFormat, + "Depth texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected DepthFormat (", GetTextureFormatAttribs(Desc.DepthFormat).Name, ")"); + } + + // Validate motion vectors texture + if (Attribs.pMotionVectorsSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pMotionVectorsSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pMotionVectorsSRV->GetDesc(); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Motion vectors view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Motion vectors texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.MotionFormat, + "Motion vectors view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected MotionFormat (", GetTextureFormatAttribs(Desc.MotionFormat).Name, ")"); + } + + // Validate exposure texture + if (Attribs.pExposureTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pExposureTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pExposureTextureSRV->GetDesc(); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Exposure texture view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width == 1 && TexDesc.Height == 1, + "Exposure texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be 1x1"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ExposureFormat, + "Exposure texture view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ExposureFormat (", GetTextureFormatAttribs(Desc.ExposureFormat).Name, ")"); + } + + // Validate reactive mask texture + if (Attribs.pReactiveMaskTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pReactiveMaskTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pReactiveMaskTextureSRV->GetDesc(); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Reactive mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, Desc.ReactiveMaskFormat != TEX_FORMAT_UNKNOWN, + "Reactive mask texture '", TexDesc.Name, "' provided but ReactiveMaskFormat was not set in SuperResolutionDesc"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Reactive mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.Format == Desc.ReactiveMaskFormat, + "Reactive mask view '", TexDesc.Name, "' format (", GetTextureFormatAttribs(ViewDesc.Format).Name, + ") does not match the expected ReactiveMaskFormat (", GetTextureFormatAttribs(Desc.ReactiveMaskFormat).Name, ")"); + } + + // Validate ignore history mask texture + if (Attribs.pIgnoreHistoryMaskTextureSRV != nullptr) + { + const TextureDesc& TexDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetTexture()->GetDesc(); + const TextureViewDesc& ViewDesc = Attribs.pIgnoreHistoryMaskTextureSRV->GetDesc(); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, ViewDesc.ViewType == TEXTURE_VIEW_SHADER_RESOURCE, + "Ignore history mask view '", TexDesc.Name, "' must be TEXTURE_VIEW_SHADER_RESOURCE"); + DEV_CHECK_SUPER_RESOLUTION(Desc.Name, TexDesc.Width >= Desc.InputWidth && TexDesc.Height >= Desc.InputHeight, + "Ignore history mask texture '", TexDesc.Name, "' dimensions (", TexDesc.Width, "x", TexDesc.Height, + ") must be at least the upscaler input resolution (", Desc.InputWidth, "x", Desc.InputHeight, ")"); + } } #endif } diff --git a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp index fa1ff59a25..c20b0d3e4d 100644 --- a/Graphics/SuperResolution/src/SuperResolutionFactory.cpp +++ b/Graphics/SuperResolution/src/SuperResolutionFactory.cpp @@ -56,6 +56,10 @@ std::unique_ptr CreateDSRProviderD3D12(IRenderDevice* p std::unique_ptr CreateMetalFXProvider(IRenderDevice* pDevice); #endif +#if DILIGENT_FSR_SUPPORTED +std::unique_ptr CreateFSRProvider(IRenderDevice* pDevice); +#endif + namespace { @@ -104,6 +108,9 @@ class SuperResolutionFactory : public ObjectBase #endif #ifdef DILIGENT_METALFX_SUPPORTED AddProvider(pDevice, CreateMetalFXProvider, "MetalFX"); +#endif +#ifdef DILIGENT_FSR_SUPPORTED + AddProvider(pDevice, CreateFSRProvider, "FSR Spatial"); #endif (void)AddProvider; } @@ -133,8 +140,9 @@ class SuperResolutionFactory : public ObjectBase virtual void DILIGENT_CALL_TYPE GetSourceSettings(const SuperResolutionSourceSettingsAttribs& Attribs, SuperResolutionSourceSettings& Settings) const override final { - Settings = {}; - if (const ProviderInfo* pEntry = FindProvider(Attribs.VariantId)) + Settings = {}; + const auto [pEntry, pInfo] = FindProvider(Attribs.VariantId); + if (pEntry != nullptr) { pEntry->Provider->GetSourceSettings(Attribs, Settings); } @@ -152,7 +160,7 @@ class SuperResolutionFactory : public ObjectBase *ppUpscaler = nullptr; - const ProviderInfo* pEntry = FindProvider(Desc.VariantId); + const auto [pEntry, pInfo] = FindProvider(Desc.VariantId); if (pEntry == nullptr) { LOG_ERROR_MESSAGE("Super resolution variant not found for the specified VariantId. Call EnumerateVariants() to get valid variant IDs."); @@ -161,7 +169,7 @@ class SuperResolutionFactory : public ObjectBase try { - pEntry->Provider->CreateSuperResolution(Desc, ppUpscaler); + pEntry->Provider->CreateSuperResolution(Desc, *pInfo, ppUpscaler); } catch (...) { @@ -191,17 +199,17 @@ class SuperResolutionFactory : public ObjectBase std::vector Variants; }; - const ProviderInfo* FindProvider(const INTERFACE_ID& VariantId) const + std::pair FindProvider(const INTERFACE_ID& VariantId) const { for (const ProviderInfo& ProvInfo : m_Providers) { for (const SuperResolutionInfo& SRInfo : ProvInfo.Variants) { if (SRInfo.VariantId == VariantId) - return &ProvInfo; + return {&ProvInfo, &SRInfo}; } } - return nullptr; + return {nullptr, nullptr}; } private: From 7515cc0561cfb23e4a6a98e096cd793c55bb5d8b Mon Sep 17 00:00:00 2001 From: MikhailGorobets Date: Sat, 21 Mar 2026 00:07:18 +0600 Subject: [PATCH 14/14] Integrate FSR spatial upscaler --- .../FormatValidation/validate_format_linux.sh | 1 + .../FormatValidation/validate_format_mac.sh | 1 + .../FormatValidation/validate_format_win.bat | 1 + Graphics/SuperResolution/CMakeLists.txt | 89 +- .../include/SuperResolutionProvider.hpp | 2 +- .../include/SuperResolutionVariants.hpp | 4 + .../SuperResolution/shaders/FSRStructures.fxh | 33 + .../shaders/FSR_ContrastAdaptiveSharpening.fx | 36 + .../shaders/FSR_EdgeAdaptiveUpsampling.fx | 73 + .../SuperResolution/shaders/FSR_FullQuad.fx | 14 + .../shaders/fsr1/ffx_common_types.h | 560 ++++ .../SuperResolution/shaders/fsr1/ffx_core.h | 80 + .../shaders/fsr1/ffx_core_cpu.h | 328 ++ .../shaders/fsr1/ffx_core_glsl.h | 1790 ++++++++++ .../shaders/fsr1/ffx_core_gpu_common.h | 2784 +++++++++++++++ .../shaders/fsr1/ffx_core_gpu_common_half.h | 2979 +++++++++++++++++ .../shaders/fsr1/ffx_core_hlsl.h | 1655 +++++++++ .../shaders/fsr1/ffx_core_portability.h | 51 + .../SuperResolution/shaders/fsr1/ffx_fsr1.h | 1251 +++++++ Graphics/SuperResolution/src/FSRProvider.cpp | 325 ++ .../src/SuperResolutionTest.cpp | 12 +- 21 files changed, 12063 insertions(+), 6 deletions(-) create mode 100644 Graphics/SuperResolution/shaders/FSRStructures.fxh create mode 100644 Graphics/SuperResolution/shaders/FSR_ContrastAdaptiveSharpening.fx create mode 100644 Graphics/SuperResolution/shaders/FSR_EdgeAdaptiveUpsampling.fx create mode 100644 Graphics/SuperResolution/shaders/FSR_FullQuad.fx create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_common_types.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_core.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_core_cpu.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_core_glsl.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common_half.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_core_hlsl.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_core_portability.h create mode 100644 Graphics/SuperResolution/shaders/fsr1/ffx_fsr1.h create mode 100644 Graphics/SuperResolution/src/FSRProvider.cpp diff --git a/BuildTools/FormatValidation/validate_format_linux.sh b/BuildTools/FormatValidation/validate_format_linux.sh index 1c68786425..b24a2b383c 100755 --- a/BuildTools/FormatValidation/validate_format_linux.sh +++ b/BuildTools/FormatValidation/validate_format_linux.sh @@ -6,6 +6,7 @@ validate_format ../../Common ../../Graphics ../../Platforms ../../Primitives ../ --exclude ../../Graphics/HLSL2GLSLConverterLib/include/GLSLDefinitions.h \ --exclude ../../Graphics/HLSL2GLSLConverterLib/include/GLSLDefinitions_inc.h \ --exclude ../../Graphics/GraphicsEngineVulkan/shaders \ + --exclude ../../Graphics/SuperResolution/shaders \ --exclude ../../Graphics/GraphicsEngine.NET \ --exclude ../../Tests/DiligentCoreAPITest/assets diff --git a/BuildTools/FormatValidation/validate_format_mac.sh b/BuildTools/FormatValidation/validate_format_mac.sh index 13be2c4bb4..262239264f 100755 --- a/BuildTools/FormatValidation/validate_format_mac.sh +++ b/BuildTools/FormatValidation/validate_format_mac.sh @@ -4,5 +4,6 @@ python3 clang-format-validate.py --clang-format-executable ./clang-format_mac_10 --exclude ../../Graphics/HLSL2GLSLConverterLib/include/GLSLDefinitions.h \ --exclude ../../Graphics/HLSL2GLSLConverterLib/include/GLSLDefinitions_inc.h \ --exclude ../../Graphics/GraphicsEngineVulkan/shaders \ +--exclude ../../Graphics/SuperResolution/shaders \ --exclude ../../Graphics/GraphicsEngine.NET \ --exclude ../../Tests/DiligentCoreAPITest/assets diff --git a/BuildTools/FormatValidation/validate_format_win.bat b/BuildTools/FormatValidation/validate_format_win.bat index 90a9c1cfe1..cf07ea4243 100644 --- a/BuildTools/FormatValidation/validate_format_win.bat +++ b/BuildTools/FormatValidation/validate_format_win.bat @@ -3,5 +3,6 @@ python clang-format-validate.py --color never --clang-format-executable clang-fo --exclude ../../Graphics/HLSL2GLSLConverterLib/include/GLSLDefinitions.h ^ --exclude ../../Graphics/HLSL2GLSLConverterLib/include/GLSLDefinitions_inc.h ^ --exclude ../../Graphics/GraphicsEngineVulkan/shaders ^ +--exclude ../../Graphics/SuperResolution/shaders ^ --exclude ../../Graphics/GraphicsEngine.NET ^ --exclude ../../Tests/DiligentCoreAPITest/assets diff --git a/Graphics/SuperResolution/CMakeLists.txt b/Graphics/SuperResolution/CMakeLists.txt index 4d1a124324..1fa66a4299 100644 --- a/Graphics/SuperResolution/CMakeLists.txt +++ b/Graphics/SuperResolution/CMakeLists.txt @@ -6,6 +6,15 @@ project(Diligent-SuperResolution CXX) set(DILIGENT_DLSS_SUPPORTED FALSE CACHE INTERNAL "DLSS is not supported") set(DILIGENT_DSR_SUPPORTED FALSE CACHE INTERNAL "DirectSR is not supported") +set(DILIGENT_FSR_SUPPORTED FALSE CACHE INTERNAL "FSR is not supported") + +if(NOT FILE2STRING_PATH STREQUAL "") + set(DILIGENT_FSR_SUPPORTED TRUE CACHE INTERNAL "FSR is supported") +endif() + +if(${DILIGENT_NO_FSR}) + set(DILIGENT_FSR_SUPPORTED FALSE CACHE INTERNAL "FSR is forcibly disabled") +endif() if(PLATFORM_WIN32 AND NOT MINGW_BUILD AND CMAKE_SIZEOF_VOID_P EQUAL 8) if (D3D11_SUPPORTED OR D3D12_SUPPORTED OR VULKAN_SUPPORTED) @@ -61,6 +70,70 @@ set(SOURCE src/SuperResolutionFactory.cpp ) +set(FSR_SHADERS + shaders/FSR_FullQuad.fx + shaders/FSRStructures.fxh + shaders/FSR_EdgeAdaptiveUpsampling.fx + shaders/FSR_ContrastAdaptiveSharpening.fx + shaders/fsr1/ffx_common_types.h + shaders/fsr1/ffx_core.h + shaders/fsr1/ffx_core_glsl.h + shaders/fsr1/ffx_core_gpu_common.h + shaders/fsr1/ffx_core_gpu_common_half.h + shaders/fsr1/ffx_core_hlsl.h + shaders/fsr1/ffx_core_portability.h + shaders/fsr1/ffx_fsr1.h +) +set_source_files_properties(${FSR_SHADERS} PROPERTIES VS_TOOL_OVERRIDE "None") + +set(FSR_SHADER_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/shaders_inc/FSR) +file(MAKE_DIRECTORY ${FSR_SHADER_OUTPUT_DIR}) + +set(FSR_SHADERS_LIST_FILE ${FSR_SHADER_OUTPUT_DIR}/FSRShaderList.h) + +if(DILIGENT_FSR_SUPPORTED) + find_package(Python3 REQUIRED) + + file(WRITE ${FSR_SHADERS_LIST_FILE} + "static const MemoryShaderSourceFileInfo g_FSRShaders[] =\n" + "{" + ) + + foreach(FILE ${FSR_SHADERS}) + get_filename_component(FILE_NAME ${FILE} NAME) + set(CONVERTED_FILE ${FSR_SHADER_OUTPUT_DIR}/${FILE_NAME}.h) + add_custom_command(OUTPUT ${CONVERTED_FILE} + COMMAND ${Python3_EXECUTABLE} ${FILE2STRING_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/${FILE} ${CONVERTED_FILE} + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/${FILE} + COMMENT "Processing FSR shader ${FILE}" + VERBATIM) + + file(APPEND ${FSR_SHADERS_LIST_FILE} + "\n {" + "\n \"${FILE_NAME}\"," + "\n #include \"${FILE_NAME}.h\"" + "\n }," + ) + + list(APPEND FSR_SHADERS_INC_LIST ${CONVERTED_FILE}) + endforeach() + + file(APPEND ${FSR_SHADERS_LIST_FILE} + "\n};\n" + ) + + set_source_files_properties(${FSR_SHADERS_INC_LIST} PROPERTIES GENERATED TRUE) +else() + message(WARNING "File2String utility is not found. FSR is disabled.") +endif() + +if(DILIGENT_FSR_SUPPORTED) + list(APPEND SOURCE src/FSRProvider.cpp) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") + set_source_files_properties(src/FSRProvider.cpp PROPERTIES COMPILE_OPTIONS "-Wno-unused-function") + endif() +endif() + if(DILIGENT_DLSS_SUPPORTED) list(APPEND INCLUDE include/SuperResolutionDLSS.hpp) list(APPEND SOURCE src/SuperResolutionDLSS.cpp) @@ -92,9 +165,17 @@ target_include_directories(Diligent-SuperResolutionInterface INTERFACE interface target_compile_definitions(Diligent-SuperResolutionInterface INTERFACE SUPER_RESOLUTION_SUPPORTED=1) add_library(Diligent-SuperResolution-static STATIC - ${SOURCE} ${INTERFACE} ${INCLUDE} + ${SOURCE} + ${INTERFACE} + ${INCLUDE} + ${FSR_SHADERS} + ${FSR_SHADERS_INC_LIST} + ${FSR_SHADERS_LIST_FILE} readme.md ) +source_group("shaders/FSR" FILES ${FSR_SHADERS}) +source_group("generated/FSR" FILES ${FSR_SHADERS_INC_LIST} ${FSR_SHADERS_LIST_FILE}) + add_library(Diligent-SuperResolution-shared SHARED readme.md ) @@ -109,6 +190,7 @@ PRIVATE ../GraphicsEngine/include ../GraphicsEngineD3DBase/include ../GraphicsEngineNextGenBase/include + ${CMAKE_CURRENT_BINARY_DIR}/shaders_inc/FSR ) if(DILIGENT_DLSS_SUPPORTED) @@ -129,6 +211,10 @@ if(DILIGENT_DSR_SUPPORTED) endif() endif() +if(DILIGENT_FSR_SUPPORTED) + target_compile_definitions(Diligent-SuperResolution-static PRIVATE DILIGENT_FSR_SUPPORTED=1) +endif() + target_compile_definitions(Diligent-SuperResolution-shared PUBLIC DILIGENT_SUPER_RESOLUTION_SHARED=1) target_link_libraries(Diligent-SuperResolution-static @@ -139,6 +225,7 @@ PRIVATE Diligent-Common Diligent-GraphicsAccessories Diligent-ShaderTools + Diligent-GraphicsTools ) if(DILIGENT_DSR_SUPPORTED) diff --git a/Graphics/SuperResolution/include/SuperResolutionProvider.hpp b/Graphics/SuperResolution/include/SuperResolutionProvider.hpp index 8457ae4609..4606ddb826 100644 --- a/Graphics/SuperResolution/include/SuperResolutionProvider.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionProvider.hpp @@ -61,7 +61,7 @@ class SuperResolutionProvider case SUPER_RESOLUTION_OPTIMIZATION_TYPE_HIGH_PERFORMANCE: ScaleFactor = 0.5f; break; case SUPER_RESOLUTION_OPTIMIZATION_TYPE_MAX_PERFORMANCE: ScaleFactor = 1.0f / 3.0f; break; default: ScaleFactor = 1.0f / 1.7f; break; - // clang-format on + // clang-format on } Settings.OptimalInputWidth = std::max(1u, static_cast(Attribs.OutputWidth * ScaleFactor)); diff --git a/Graphics/SuperResolution/include/SuperResolutionVariants.hpp b/Graphics/SuperResolution/include/SuperResolutionVariants.hpp index 1499aea97f..89069caefe 100644 --- a/Graphics/SuperResolution/include/SuperResolutionVariants.hpp +++ b/Graphics/SuperResolution/include/SuperResolutionVariants.hpp @@ -46,4 +46,8 @@ static constexpr INTERFACE_ID VariantId_MetalFXSpatial = static constexpr INTERFACE_ID VariantId_MetalFXTemporal = {0xc4d70002, 0xa1b2, 0x4c3d, {0x8e, 0x9f, 0x0a, 0x1b, 0x2c, 0x3d, 0x4e, 0x5f}}; +// {F5A10001-B2C3-4D5E-9F01-2A3B4C5D6E7F} +static constexpr INTERFACE_ID VariantId_FSRSpatial = + {0xf5a10001, 0xb2c3, 0x4d5e, {0x9f, 0x01, 0x2a, 0x3b, 0x4c, 0x5d, 0x6e, 0x7f}}; + } // namespace Diligent diff --git a/Graphics/SuperResolution/shaders/FSRStructures.fxh b/Graphics/SuperResolution/shaders/FSRStructures.fxh new file mode 100644 index 0000000000..58e92fcef2 --- /dev/null +++ b/Graphics/SuperResolution/shaders/FSRStructures.fxh @@ -0,0 +1,33 @@ +#ifndef _FSR_STRUCTURES_FXH_ +#define _FSR_STRUCTURES_FXH_ + +#ifndef __cplusplus +# ifndef DEFAULT_VALUE +# define DEFAULT_VALUE(x) +# endif +#elif !defined(DEFAULT_VALUE) +# define DEFAULT_VALUE(x) = x +#endif + +#ifndef __cplusplus +# ifndef CHECK_STRUCT_ALIGNMENT +# define CHECK_STRUCT_ALIGNMENT(s) +# endif +#endif + +struct FSRAttribs +{ + uint4 EASUConstants0; + uint4 EASUConstants1; + uint4 EASUConstants2; + uint4 EASUConstants3; + uint4 RCASConstants; + float4 SourceSize; +}; + +#ifdef CHECK_STRUCT_ALIGNMENT +CHECK_STRUCT_ALIGNMENT(FSRAttribs); +#endif + + +#endif //_FSR_STRUCTURES_FXH_ diff --git a/Graphics/SuperResolution/shaders/FSR_ContrastAdaptiveSharpening.fx b/Graphics/SuperResolution/shaders/FSR_ContrastAdaptiveSharpening.fx new file mode 100644 index 0000000000..1a363fdd3f --- /dev/null +++ b/Graphics/SuperResolution/shaders/FSR_ContrastAdaptiveSharpening.fx @@ -0,0 +1,36 @@ +#include "FSRStructures.fxh" + +cbuffer cbFSRAttribs +{ + FSRAttribs g_FSRAttribs; +} + +Texture2D g_TextureSource; + +#define FFX_GPU +#define FFX_HLSL +#define FFX_HALF 0 +#define FFX_HLSL_SM 50 +#include "ffx_core.h" + +#define FSR_RCAS_F 1 +#define FSR_RCAS_DENOISE 1 + +FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 Position) +{ + return g_TextureSource.Load(FfxInt32x3(Position, 0)); +} + +void FsrRcasInputF(FFX_PARAMETER_INOUT FfxFloat32 R, FFX_PARAMETER_INOUT FfxFloat32 G, FFX_PARAMETER_INOUT FfxFloat32 B) +{ + +} + +#include "ffx_fsr1.h" + +FfxFloat32x4 ComputeContrastAdaptiveSharpeningPS(in float4 Position : SV_Position) : SV_Target0 +{ + FfxFloat32x3 ResultColor = FfxFloat32x3(0.0, 0.0, 0.0); + FsrRcasF(ResultColor.r, ResultColor.g, ResultColor.b, FfxUInt32x2(Position.xy), g_FSRAttribs.RCASConstants); + return FfxFloat32x4(ResultColor, 1.0); +} diff --git a/Graphics/SuperResolution/shaders/FSR_EdgeAdaptiveUpsampling.fx b/Graphics/SuperResolution/shaders/FSR_EdgeAdaptiveUpsampling.fx new file mode 100644 index 0000000000..297e7505aa --- /dev/null +++ b/Graphics/SuperResolution/shaders/FSR_EdgeAdaptiveUpsampling.fx @@ -0,0 +1,73 @@ +#include "FSRStructures.fxh" + +cbuffer cbFSRAttribs +{ + FSRAttribs g_FSRAttribs; +} + +Texture2D g_TextureSource; +SamplerState g_TextureSource_sampler; + +#define FFX_GPU +#define FFX_HLSL +#define FFX_HALF 0 +#define FFX_HLSL_SM 50 +#include "ffx_core.h" + +#define FFX_FSR_EASU_FLOAT 1 + +FfxFloat32x4 FsrEasuRF(FfxFloat32x2 Texcoord) +{ +#ifdef FSR_FEATURE_TEXTURE_GATHER + return g_TextureSource.GatherRed(g_TextureSource_sampler, Texcoord); +#else + float2 Position = g_FSRAttribs.SourceSize.xy * Texcoord - float2(0.5, 0.5); + FfxFloat32x4 Gather; + Gather.x = g_TextureSource.Load(int3(int2(Position) + int2(0, 1), 0)).r; + Gather.y = g_TextureSource.Load(int3(int2(Position) + int2(1, 1), 0)).r; + Gather.z = g_TextureSource.Load(int3(int2(Position) + int2(1, 0), 0)).r; + Gather.w = g_TextureSource.Load(int3(int2(Position) + int2(0, 0), 0)).r; + return Gather; +#endif +} + +FfxFloat32x4 FsrEasuGF(FfxFloat32x2 Texcoord) +{ +#ifdef FSR_FEATURE_TEXTURE_GATHER + return g_TextureSource.GatherGreen(g_TextureSource_sampler, Texcoord); +#else + float2 Position = g_FSRAttribs.SourceSize.xy * Texcoord - float2(0.5, 0.5); + FfxFloat32x4 Gather; + Gather.x = g_TextureSource.Load(int3(int2(Position) + int2(0, 1), 0)).g; + Gather.y = g_TextureSource.Load(int3(int2(Position) + int2(1, 1), 0)).g; + Gather.z = g_TextureSource.Load(int3(int2(Position) + int2(1, 0), 0)).g; + Gather.w = g_TextureSource.Load(int3(int2(Position) + int2(0, 0), 0)).g; + return Gather; +#endif +} + +FfxFloat32x4 FsrEasuBF(FfxFloat32x2 Texcoord) +{ +#ifdef FSR_FEATURE_TEXTURE_GATHER + return g_TextureSource.GatherBlue(g_TextureSource_sampler, Texcoord); +#else + float2 Position = g_FSRAttribs.SourceSize.xy * Texcoord - float2(0.5, 0.5); + FfxFloat32x4 Gather; + Gather.x = g_TextureSource.Load(int3(int2(Position) + int2(0, 1), 0)).b; + Gather.y = g_TextureSource.Load(int3(int2(Position) + int2(1, 1), 0)).b; + Gather.z = g_TextureSource.Load(int3(int2(Position) + int2(1, 0), 0)).b; + Gather.w = g_TextureSource.Load(int3(int2(Position) + int2(0, 0), 0)).b; + return Gather; +#endif +} + +#include "ffx_fsr1.h" + +FfxFloat32x4 ComputeEdgeAdaptiveUpsamplingPS(in float4 Position : SV_Position) : SV_Target0 +{ + FfxFloat32x3 ResultColor = FfxFloat32x3(0.0, 0.0, 0.0); + ffxFsrEasuFloat(ResultColor, FfxUInt32x2(Position.xy), + g_FSRAttribs.EASUConstants0, g_FSRAttribs.EASUConstants1, + g_FSRAttribs.EASUConstants2, g_FSRAttribs.EASUConstants3); + return FfxFloat32x4(ResultColor, 1.0); +} diff --git a/Graphics/SuperResolution/shaders/FSR_FullQuad.fx b/Graphics/SuperResolution/shaders/FSR_FullQuad.fx new file mode 100644 index 0000000000..03535a368c --- /dev/null +++ b/Graphics/SuperResolution/shaders/FSR_FullQuad.fx @@ -0,0 +1,14 @@ +struct FSR_VSOutput +{ + float4 Position : SV_Position; +}; + +void FSR_FullQuadVS(in uint VertexId : SV_VertexID, out FSR_VSOutput VSOut) +{ + float2 PosXY[3]; + PosXY[0] = float2(-1.0, -1.0); + PosXY[1] = float2(-1.0, +3.0); + PosXY[2] = float2(+3.0, -1.0); + + VSOut.Position = float4(PosXY[VertexId % 3u], 0.0, 1.0); +} diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_common_types.h b/Graphics/SuperResolution/shaders/fsr1/ffx_common_types.h new file mode 100644 index 0000000000..4cc3092382 --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_common_types.h @@ -0,0 +1,560 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +#ifndef FFX_COMMON_TYPES_H +#define FFX_COMMON_TYPES_H + +#if defined(FFX_CPU) +#define FFX_PARAMETER_IN +#define FFX_PARAMETER_OUT +#define FFX_PARAMETER_INOUT +#define FFX_PARAMETER_UNIFORM +#elif defined(FFX_HLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM uniform +#elif defined(FFX_GLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found! +#endif // #if defined(FFX_CPU) + +#if defined(FFX_CPU) +/// A typedef for a boolean value. +/// +/// @ingroup CPUTypes +typedef bool FfxBoolean; + +/// A typedef for a unsigned 8bit integer. +/// +/// @ingroup CPUTypes +typedef uint8_t FfxUInt8; + +/// A typedef for a unsigned 16bit integer. +/// +/// @ingroup CPUTypes +typedef uint16_t FfxUInt16; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32; + +/// A typedef for a unsigned 64bit integer. +/// +/// @ingroup CPUTypes +typedef uint64_t FfxUInt64; + +/// A typedef for a signed 8bit integer. +/// +/// @ingroup CPUTypes +typedef int8_t FfxInt8; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup CPUTypes +typedef int16_t FfxInt16; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32; + +/// A typedef for a signed 64bit integer. +/// +/// @ingroup CPUTypes +typedef int64_t FfxInt64; + +/// A typedef for a floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2[2]; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3[3]; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4[4]; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x4[4]; +#endif // #if defined(FFX_CPU) + +#if defined(FFX_HLSL) + +#define FfxFloat32Mat4 matrix +#define FfxFloat32Mat3 matrix + +/// A typedef for a boolean value. +/// +/// @ingroup HLSLTypes +#define FfxBoolean bool + +#if FFX_HLSL_SM>=62 + +/// @defgroup HLSL62Types HLSL 6.2 And Above Types +/// HLSL 6.2 and above type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +/// A typedef for a floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t2 FfxFloat32x2; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t3 FfxFloat32x3; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t4 FfxFloat32x4; + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +typedef float4x4 FfxFloat32x4x4; +typedef float3x3 FfxFloat32x3x3; +typedef float2x2 FfxFloat32x2x2; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t FfxUInt32; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t2 FfxUInt32x2; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t3 FfxUInt32x3; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t4 FfxUInt32x4; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t FfxInt32; + +/// A typedef for a 2-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t2 FfxInt32x2; + +/// A typedef for a 3-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t3 FfxInt32x3; + +/// A typedef for a 4-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t4 FfxInt32x4; + +#else // #if FFX_HLSL_SM>=62 + +/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types +/// HLSL 6.1 and below type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +#define FfxFloat32 float +#define FfxFloat32x2 float2 +#define FfxFloat32x3 float3 +#define FfxFloat32x4 float4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 float4x4 +#define FfxFloat32x3x3 float3x3 +#define FfxFloat32x2x2 float2x2 + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup GPU +#define FfxUInt32 uint +#define FfxUInt32x2 uint2 +#define FfxUInt32x3 uint3 +#define FfxUInt32x4 uint4 + +#define FfxInt32 int +#define FfxInt32x2 int2 +#define FfxInt32x3 int3 +#define FfxInt32x4 int4 + +#endif // #if FFX_HLSL_SM>=62 + +#if FFX_HALF + +#if FFX_HLSL_SM >= 62 + +typedef float16_t FfxFloat16; +typedef float16_t2 FfxFloat16x2; +typedef float16_t3 FfxFloat16x3; +typedef float16_t4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef uint16_t FfxUInt16; +typedef uint16_t2 FfxUInt16x2; +typedef uint16_t3 FfxUInt16x3; +typedef uint16_t4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef int16_t FfxInt16; +typedef int16_t2 FfxInt16x2; +typedef int16_t3 FfxInt16x3; +typedef int16_t4 FfxInt16x4; +#else // #if FFX_HLSL_SM>=62 +typedef min16float FfxFloat16; +typedef min16float2 FfxFloat16x2; +typedef min16float3 FfxFloat16x3; +typedef min16float4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16uint FfxUInt16; +typedef min16uint2 FfxUInt16x2; +typedef min16uint3 FfxUInt16x3; +typedef min16uint4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16int FfxInt16; +typedef min16int2 FfxInt16x2; +typedef min16int3 FfxInt16x3; +typedef min16int4 FfxInt16x4; +#endif // #if FFX_HLSL_SM>=62 + +#endif // FFX_HALF + +#endif // #if defined(FFX_HLSL) + +#if defined(FFX_GLSL) + +#define FfxFloat32Mat4 mat4 +#define FfxFloat32Mat3 mat3 + +/// A typedef for a boolean value. +/// +/// @ingroup GLSLTypes +#define FfxBoolean bool +#define FfxFloat32 float +#define FfxFloat32x2 vec2 +#define FfxFloat32x3 vec3 +#define FfxFloat32x4 vec4 +#define FfxUInt32 uint +#define FfxUInt32x2 uvec2 +#define FfxUInt32x3 uvec3 +#define FfxUInt32x4 uvec4 +#define FfxInt32 int +#define FfxInt32x2 ivec2 +#define FfxInt32x3 ivec3 +#define FfxInt32x4 ivec4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 mat4 +#define FfxFloat32x3x3 mat3 +#define FfxFloat32x2x2 mat2 + +#if FFX_HALF +#define FfxFloat16 float16_t +#define FfxFloat16x2 f16vec2 +#define FfxFloat16x3 f16vec3 +#define FfxFloat16x4 f16vec4 +#define FfxUInt16 uint16_t +#define FfxUInt16x2 u16vec2 +#define FfxUInt16x3 u16vec3 +#define FfxUInt16x4 u16vec4 +#define FfxInt16 int16_t +#define FfxInt16x2 i16vec2 +#define FfxInt16x3 i16vec3 +#define FfxInt16x4 i16vec4 +#endif // FFX_HALF +#endif // #if defined(FFX_GLSL) + +// Global toggles: +// #define FFX_HALF (1) +// #define FFX_HLSL_SM (62) + +#if FFX_HALF + +#if FFX_HLSL_SM >= 62 + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#else //FFX_HLSL_SM>=62 + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType ); +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ); +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ); + +#endif //FFX_HLSL_SM>=62 + +#else //FFX_HALF + +/* +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; +*/ + + +#endif //FFX_HALF + +#if defined(FFX_GPU) +// Common typedefs: +#if defined(FFX_HLSL) + +#define FFX_MIN16_F float +#define FFX_MIN16_F2 float2 +#define FFX_MIN16_F3 float3 +#define FFX_MIN16_F4 float4 + +#define FFX_MIN16_I int +#define FFX_MIN16_I2 int2 +#define FFX_MIN16_I3 int3 +#define FFX_MIN16_I4 int3 + +#define FFX_MIN16_U uint +#define FFX_MIN16_U2 uint2 +#define FFX_MIN16_U3 uint3 +#define FFX_MIN16_U4 uint4 + +#define FFX_16BIT_F float +#define FFX_16BIT_F2 float2 +#define FFX_16BIT_F3 float3 +#define FFX_16BIT_F4 float4 + +#define FFX_16BIT_I int +#define FFX_16BIT_I2 int2 +#define FFX_16BIT_I3 int3 +#define FFX_16BIT_I4 int4 + +#define FFX_16BIT_U uint +#define FFX_16BIT_U2 uint2 +#define FFX_16BIT_U3 uint3 +#define FFX_16BIT_U4 uint4 + +/* + +FFX_MIN16_SCALAR( FFX_MIN16_F , float ); +FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_I, int ); +FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_U, uint ); +FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 ); + +FFX_16BIT_SCALAR( FFX_F16_t , float ); +FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 ); +FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 ); +FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 ); + +FFX_16BIT_SCALAR( FFX_I16_t, int ); +FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 ); +FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 ); +FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 ); + +FFX_16BIT_SCALAR( FFX_U16_t, uint ); +FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 ); +FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 ); +FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 ); +*/ + +/* +#define TYPEDEF_MIN16_TYPES(Prefix) \ +typedef FFX_MIN16_F Prefix##_F; \ +typedef FFX_MIN16_F2 Prefix##_F2; \ +typedef FFX_MIN16_F3 Prefix##_F3; \ +typedef FFX_MIN16_F4 Prefix##_F4; \ +typedef FFX_MIN16_I Prefix##_I; \ +typedef FFX_MIN16_I2 Prefix##_I2; \ +typedef FFX_MIN16_I3 Prefix##_I3; \ +typedef FFX_MIN16_I4 Prefix##_I4; \ +typedef FFX_MIN16_U Prefix##_U; \ +typedef FFX_MIN16_U2 Prefix##_U2; \ +typedef FFX_MIN16_U3 Prefix##_U3; \ +typedef FFX_MIN16_U4 Prefix##_U4; + +#define TYPEDEF_16BIT_TYPES(Prefix) \ +typedef FFX_16BIT_F Prefix##_F; \ +typedef FFX_16BIT_F2 Prefix##_F2; \ +typedef FFX_16BIT_F3 Prefix##_F3; \ +typedef FFX_16BIT_F4 Prefix##_F4; \ +typedef FFX_16BIT_I Prefix##_I; \ +typedef FFX_16BIT_I2 Prefix##_I2; \ +typedef FFX_16BIT_I3 Prefix##_I3; \ +typedef FFX_16BIT_I4 Prefix##_I4; \ +typedef FFX_16BIT_U Prefix##_U; \ +typedef FFX_16BIT_U2 Prefix##_U2; \ +typedef FFX_16BIT_U3 Prefix##_U3; \ +typedef FFX_16BIT_U4 Prefix##_U4; + +#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \ +typedef FfxFloat32 Prefix##_F; \ +typedef FfxFloat32x2 Prefix##_F2; \ +typedef FfxFloat32x3 Prefix##_F3; \ +typedef FfxFloat32x4 Prefix##_F4; \ +typedef FfxInt32 Prefix##_I; \ +typedef FfxInt32x2 Prefix##_I2; \ +typedef FfxInt32x3 Prefix##_I3; \ +typedef FfxInt32x4 Prefix##_I4; \ +typedef FfxUInt32 Prefix##_U; \ +typedef FfxUInt32x2 Prefix##_U2; \ +typedef FfxUInt32x3 Prefix##_U3; \ +typedef FfxUInt32x4 Prefix##_U4; +*/ + +#endif // #if defined(FFX_HLSL) + +#if defined(FFX_GLSL) + +#if FFX_HALF + +#define FFX_MIN16_F float16_t +#define FFX_MIN16_F2 f16vec2 +#define FFX_MIN16_F3 f16vec3 +#define FFX_MIN16_F4 f16vec4 + +#define FFX_MIN16_I int16_t +#define FFX_MIN16_I2 i16vec2 +#define FFX_MIN16_I3 i16vec3 +#define FFX_MIN16_I4 i16vec4 + +#define FFX_MIN16_U uint16_t +#define FFX_MIN16_U2 u16vec2 +#define FFX_MIN16_U3 u16vec3 +#define FFX_MIN16_U4 u16vec4 + +#define FFX_16BIT_F float16_t +#define FFX_16BIT_F2 f16vec2 +#define FFX_16BIT_F3 f16vec3 +#define FFX_16BIT_F4 f16vec4 + +#define FFX_16BIT_I int16_t +#define FFX_16BIT_I2 i16vec2 +#define FFX_16BIT_I3 i16vec3 +#define FFX_16BIT_I4 i16vec4 + +#define FFX_16BIT_U uint16_t +#define FFX_16BIT_U2 u16vec2 +#define FFX_16BIT_U3 u16vec3 +#define FFX_16BIT_U4 u16vec4 + +#else // FFX_HALF + +#define FFX_MIN16_F float +#define FFX_MIN16_F2 vec2 +#define FFX_MIN16_F3 vec3 +#define FFX_MIN16_F4 vec4 + +#define FFX_MIN16_I int +#define FFX_MIN16_I2 ivec2 +#define FFX_MIN16_I3 ivec3 +#define FFX_MIN16_I4 ivec4 + +#define FFX_MIN16_U uint +#define FFX_MIN16_U2 uvec2 +#define FFX_MIN16_U3 uvec3 +#define FFX_MIN16_U4 uvec4 + +#define FFX_16BIT_F float +#define FFX_16BIT_F2 vec2 +#define FFX_16BIT_F3 vec3 +#define FFX_16BIT_F4 vec4 + +#define FFX_16BIT_I int +#define FFX_16BIT_I2 ivec2 +#define FFX_16BIT_I3 ivec3 +#define FFX_16BIT_I4 ivec4 + +#define FFX_16BIT_U uint +#define FFX_16BIT_U2 uvec2 +#define FFX_16BIT_U3 uvec3 +#define FFX_16BIT_U4 uvec4 + +#endif // FFX_HALF + +#endif // #if defined(FFX_GLSL) + +#endif // #if defined(FFX_GPU) +#endif // #ifndef FFX_COMMON_TYPES_H diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_core.h b/Graphics/SuperResolution/shaders/fsr1/ffx_core.h new file mode 100644 index 0000000000..02f6b3f624 --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_core.h @@ -0,0 +1,80 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +/// @defgroup FfxGPU GPU +/// The FidelityFX SDK GPU References +/// +/// @ingroup ffxSDK + +/// @defgroup FfxHLSL HLSL References +/// FidelityFX SDK HLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGLSL GLSL References +/// FidelityFX SDK GLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGPUEffects FidelityFX GPU References +/// FidelityFX Effect GPU Reference Documentation +/// +/// @ingroup FfxGPU + +/// @defgroup GPUCore GPU Core +/// GPU defines and functions +/// +/// @ingroup FfxGPU + +#if !defined(FFX_CORE_H) +#define FFX_CORE_H + +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler + +#include "ffx_common_types.h" + +#if defined(FFX_CPU) + #include "ffx_core_cpu.h" +#endif // #if defined(FFX_CPU) + +#if defined(FFX_GLSL) && defined(FFX_GPU) + #include "ffx_core_glsl.h" +#endif // #if defined(FFX_GLSL) && defined(FFX_GPU) + +#if defined(FFX_HLSL) && defined(FFX_GPU) + #include "ffx_core_hlsl.h" +#endif // #if defined(FFX_HLSL) && defined(FFX_GPU) + +#if defined(FFX_GPU) + #include "ffx_core_gpu_common.h" + #include "ffx_core_gpu_common_half.h" + #include "ffx_core_portability.h" +#endif // #if defined(FFX_GPU) + +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#endif // #if !defined(FFX_CORE_H) diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_core_cpu.h b/Graphics/SuperResolution/shaders/fsr1/ffx_core_cpu.h new file mode 100644 index 0000000000..377ada7b4d --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_core_cpu.h @@ -0,0 +1,328 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// A define for a true value in a boolean expression. +/// +/// @ingroup CPU +#define FFX_TRUE (1) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup CPU +#define FFX_FALSE (0) + +#if !defined(FFX_STATIC) +/// A define to abstract declaration of static variables and functions. +/// +/// @ingroup CPU +#define FFX_STATIC static +#endif // #if !defined(FFX_STATIC) + +/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer. +/// +/// @param [in] x A 32bit floating value. +/// +/// @returns +/// An unsigned 32bit integer value containing the bit pattern of x. +/// +/// @ingroup CPU +FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = x; + return bits.u; +} + +FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return a[0] * b[0] + a[1] * b[1]; +} + +FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; +} + +FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; +} + +/// Compute the linear interpolation between two values. +/// +/// Implemented by calling the GLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return y * t + (-x * t + x); +} + +/// Compute the reciprocal of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a) +{ + return 1.0f / a; +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a) +{ + return a - floor(a); +} + +/// Compute the reciprocal square root of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a) +{ + return ffxReciprocal(ffxSqrt(a)); +} + +FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return x < y ? x : y; +} + +FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return x < y ? x : y; +} + +FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return x > y ? x : y; +} + +FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return x > y ? x : y; +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a) +{ + return ffxMin(1.0f, ffxMax(0.0f, a)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] + b; + d[1] = a[1] + b; + d[2] = a[2] + b; + return; +} + +FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + return; +} + +FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d[0] = a[0] * b[0]; + d[1] = a[1] * b[1]; + d[2] = a[2] * b[2]; + return; +} + +FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] * b; + d[1] = a[1] * b; + d[2] = a[2] * b; + return; +} + +FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = ffxReciprocal(a[0]); + d[1] = ffxReciprocal(a[1]); + d[2] = ffxReciprocal(a[2]); + return; +} + +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. +/// +/// @ingroup CPU +FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f) +{ + static FfxUInt16 base[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, + 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, + 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, + 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, + 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff + }; + + static FfxUInt8 shift[512] = { + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + }; + + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = f; + FfxUInt32 u = bits.u; + FfxUInt32 i = u >> 23; + return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]); +} + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup CPU +FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a) +{ + return f32tof16(a[0]) + (f32tof16(a[1]) << 16); +} diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_core_glsl.h b/Graphics/SuperResolution/shaders/fsr1/ffx_core_glsl.h new file mode 100644 index 0000000000..d23902b9ac --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_core_glsl.h @@ -0,0 +1,1790 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +/// @defgroup GLSLCore GLSL Core +/// GLSL core defines and functions +/// +/// @ingroup FfxGLSL + +/// A define for abstracting shared memory between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_GROUPSHARED shared + +/// A define for abstracting compute memory barriers between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_GROUP_MEMORY_BARRIER() groupMemoryBarrier(); barrier() + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_ADD(x, y) atomicAdd(x, y) + +/// A define added to accept static markup on functions to aid CPU/GPU portability of code. +/// +/// @ingroup GLSLCore +#define FFX_STATIC + +/// A define for abstracting loop unrolling between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_UNROLL + +/// A define for abstracting a 'greater than' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_GREATER_THAN(x, y) greaterThan(x, y) + +/// A define for abstracting a 'greater than or equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y) + +/// A define for abstracting a 'less than' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_LESS_THAN(x, y) lessThan(x, y) + +/// A define for abstracting a 'less than or equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y) + +/// A define for abstracting an 'equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_EQUAL(x, y) equal(x, y) + +/// A define for abstracting a 'not equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_NOT_EQUAL(x, y) notEqual(x, y) + +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_MATRIX_MULTIPLY(a, b) (a * b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_TRANSFORM_VECTOR(a, b) (a * b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_MODULO(a, b) (mod(a, b)) + +/// Broadcast a scalar value to a 1-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x)) + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x)) + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x)) + +/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x)) + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x)) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x)) + +/// Broadcast a scalar value to a 1-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32(x) FfxInt32(x) + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x)) + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x)) + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x)) + +/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16(x) FFX_MIN16_F(x) + +/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x)) + +/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x)) + +/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x)) + +/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16(x) FFX_MIN16_U(x) + +/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x)) + +/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x)) + +/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x)) + +/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16(x) FFX_MIN16_I(x) + +/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x)) + +/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x)) + +/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x)) + + #extension GL_EXT_shader_explicit_arithmetic_types : require +#if !defined(FFX_SKIP_EXT) +#if FFX_HALF + #extension GL_EXT_shader_16bit_storage : require +#endif // FFX_HALF + +#if defined(FFX_LONG) + #extension GL_ARB_gpu_shader_int64 : require + #extension GL_NV_shader_atomic_int64 : require +#endif // #if defined(FFX_LONG) + +#if defined(FFX_WAVE) + #extension GL_KHR_shader_subgroup_arithmetic : require + #extension GL_KHR_shader_subgroup_ballot : require + #extension GL_KHR_shader_subgroup_quad : require + #extension GL_KHR_shader_subgroup_shuffle : require +#endif // #if defined(FFX_WAVE) +#endif // #if !defined(FFX_SKIP_EXT) + +// Forward declarations +FfxFloat32 ffxSqrt(FfxFloat32 x); +FfxFloat32x2 ffxSqrt(FfxFloat32x2 x); +FfxFloat32x3 ffxSqrt(FfxFloat32x3 x); +FfxFloat32x4 ffxSqrt(FfxFloat32x4 x); + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxAsFloat(FfxUInt32 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + return floatBitsToUint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) +{ + return floatBitsToUint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) +{ + return floatBitsToUint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) +{ + return floatBitsToUint(x); +} + +/// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent. +/// +/// @param [in] value The value to convert. +/// +/// @returns +/// The nearest 16bit equivalent of value. +/// +/// @ingroup GLSLCore +FfxUInt32 f32tof16(FfxFloat32 value) +{ + return packHalf2x16(FfxFloat32x2(value, 0.0)); +} + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional floating point vector with value in each component. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) +{ + return FfxFloat32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional floating point vector with value in each component. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) +{ + return FfxFloat32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional floating point vector with value in each component. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) +{ + return FfxFloat32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional signed integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxInt32x2 ffxBroadcast2(FfxInt32 value) +{ + return FfxInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional signed integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxInt32x3 ffxBroadcast3(FfxInt32 value) +{ + return FfxInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional signed integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxInt32x4 ffxBroadcast4(FfxInt32 value) +{ + return FfxInt32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) +{ + return FfxUInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) +{ + return FfxUInt32x4(value, value, value, value); +} + +/// +/// +/// @ingroup GLSLCore +FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +{ + return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits)); +} + +/// +/// +/// @ingroup GLSLCore +FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +{ + return (ins & mask) | (src & (~mask)); +} + +// Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) +{ + return mix(x, y, t); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN and RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32 rcp(FfxFloat32 x) +{ + return FfxFloat32(1.0) / x; +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 rcp(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) / x; +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 rcp(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) / x; +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 rcp(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) / x; +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32 rsqrt(FfxFloat32 x) +{ + return FfxFloat32(1.0) / ffxSqrt(x); +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 rsqrt(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) / ffxSqrt(x); +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 rsqrt(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) / ffxSqrt(x); +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 rsqrt(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) / ffxSqrt(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0)); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) +{ + return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0)); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) +{ + return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0)); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) +{ + return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0)); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxFract(FfxFloat32 x) +{ + return fract(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxFract(FfxFloat32x2 x) +{ + return fract(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxFract(FfxFloat32x3 x) +{ + return fract(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxFract(FfxFloat32x4 x) +{ + return fract(x); +} + +FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + return packHalf2x16(v); +} + +FfxFloat32x2 ffxUnpackF32(FfxUInt32 u){ + return unpackHalf2x16(u); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = sign(v); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = sign(v); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = sign(v); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = sign(v); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} +#if FFX_HALF +#define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x)) + +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + return packHalf2x16(v); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 u){ + return FfxFloat16x2(unpackHalf2x16(u)); +} + +FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) +{ + return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y)); +} +#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) +#define FFX_UINT32_TO_UINT16X2(x) unpackUint2x16(FfxUInt32(x)) +#define FFX_UINT32X2_TO_UINT16X4(x) unpackUint4x16(pack64(FfxUInt32x2(x))) +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_FLOAT16X2_TO_UINT32(x) packFloat2x16(FfxFloat16x2(x)) +FfxUInt32x2 ffxFloat16x4ToUint32x2(FfxFloat16x4 x) +{ + return FfxUInt32x2(packFloat2x16(x.xy), packFloat2x16(x.zw)); +} +#define FFX_FLOAT16X4_TO_UINT32X2(x) ffxFloat16x4ToUint32x2(FfxFloat16x4(x)) +#define FFX_UINT16X2_TO_UINT32(x) packUint2x16(FfxUInt16x2(x)) +#define FFX_UINT16X4_TO_UINT32X2(x) unpack32(packUint4x16(FfxUInt16x4(x))) +//============================================================================================================================== +#define FFX_TO_UINT16(x) halfBitsToUint16(FfxFloat16(x)) +#define FFX_TO_UINT16X2(x) halfBitsToUint16(FfxFloat16x2(x)) +#define FFX_TO_UINT16X3(x) halfBitsToUint16(FfxFloat16x3(x)) +#define FFX_TO_UINT16X4(x) halfBitsToUint16(FfxFloat16x4(x)) +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_TO_FLOAT16(x) uint16BitsToHalf(FfxUInt16(x)) +#define FFX_TO_FLOAT16X2(x) uint16BitsToHalf(FfxUInt16x2(x)) +#define FFX_TO_FLOAT16X3(x) uint16BitsToHalf(FfxUInt16x3(x)) +#define FFX_TO_FLOAT16X4(x) uint16BitsToHalf(FfxUInt16x4(x)) +//============================================================================================================================== +FfxFloat16 ffxBroadcastFloat16(FfxFloat16 a) +{ + return FfxFloat16(a); +} +FfxFloat16x2 ffxBroadcastFloat16x2(FfxFloat16 a) +{ + return FfxFloat16x2(a, a); +} +FfxFloat16x3 ffxBroadcastFloat16x3(FfxFloat16 a) +{ + return FfxFloat16x3(a, a, a); +} +FfxFloat16x4 ffxBroadcastFloat16x4(FfxFloat16 a) +{ + return FfxFloat16x4(a, a, a, a); +} +#define FFX_BROADCAST_FLOAT16(a) FfxFloat16(a) +#define FFX_BROADCAST_FLOAT16X2(a) FfxFloat16x2(FfxFloat16(a)) +#define FFX_BROADCAST_FLOAT16X3(a) FfxFloat16x3(FfxFloat16(a)) +#define FFX_BROADCAST_FLOAT16X4(a) FfxFloat16x4(FfxFloat16(a)) +//------------------------------------------------------------------------------------------------------------------------------ +FfxInt16 ffxBroadcastInt16(FfxInt16 a) +{ + return FfxInt16(a); +} +FfxInt16x2 ffxBroadcastInt16x2(FfxInt16 a) +{ + return FfxInt16x2(a, a); +} +FfxInt16x3 ffxBroadcastInt16x3(FfxInt16 a) +{ + return FfxInt16x3(a, a, a); +} +FfxInt16x4 ffxBroadcastInt16x4(FfxInt16 a) +{ + return FfxInt16x4(a, a, a, a); +} +#define FFX_BROADCAST_INT16(a) FfxInt16(a) +#define FFX_BROADCAST_INT16X2(a) FfxInt16x2(FfxInt16(a)) +#define FFX_BROADCAST_INT16X3(a) FfxInt16x3(FfxInt16(a)) +#define FFX_BROADCAST_INT16X4(a) FfxInt16x4(FfxInt16(a)) +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt16 ffxBroadcastUInt16(FfxUInt16 a) +{ + return FfxUInt16(a); +} +FfxUInt16x2 ffxBroadcastUInt16x2(FfxUInt16 a) +{ + return FfxUInt16x2(a, a); +} +FfxUInt16x3 ffxBroadcastUInt16x3(FfxUInt16 a) +{ + return FfxUInt16x3(a, a, a); +} +FfxUInt16x4 ffxBroadcastUInt16x4(FfxUInt16 a) +{ + return FfxUInt16x4(a, a, a, a); +} +#define FFX_BROADCAST_UINT16(a) FfxUInt16(a) +#define FFX_BROADCAST_UINT16X2(a) FfxUInt16x2(FfxUInt16(a)) +#define FFX_BROADCAST_UINT16X3(a) FfxUInt16x3(FfxUInt16(a)) +#define FFX_BROADCAST_UINT16X4(a) FfxUInt16x4(FfxUInt16(a)) +//============================================================================================================================== +FfxUInt16 ffxAbsHalf(FfxUInt16 a) +{ + return FfxUInt16(abs(FfxInt16(a))); +} +FfxUInt16x2 ffxAbsHalf(FfxUInt16x2 a) +{ + return FfxUInt16x2(abs(FfxInt16x2(a))); +} +FfxUInt16x3 ffxAbsHalf(FfxUInt16x3 a) +{ + return FfxUInt16x3(abs(FfxInt16x3(a))); +} +FfxUInt16x4 ffxAbsHalf(FfxUInt16x4 a) +{ + return FfxUInt16x4(abs(FfxInt16x4(a))); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxClampHalf(FfxFloat16 x, FfxFloat16 n, FfxFloat16 m) +{ + return clamp(x, n, m); +} +FfxFloat16x2 ffxClampHalf(FfxFloat16x2 x, FfxFloat16x2 n, FfxFloat16x2 m) +{ + return clamp(x, n, m); +} +FfxFloat16x3 ffxClampHalf(FfxFloat16x3 x, FfxFloat16x3 n, FfxFloat16x3 m) +{ + return clamp(x, n, m); +} +FfxFloat16x4 ffxClampHalf(FfxFloat16x4 x, FfxFloat16x4 n, FfxFloat16x4 m) +{ + return clamp(x, n, m); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxFract(FfxFloat16 x) +{ + return fract(x); +} +FfxFloat16x2 ffxFract(FfxFloat16x2 x) +{ + return fract(x); +} +FfxFloat16x3 ffxFract(FfxFloat16x3 x) +{ + return fract(x); +} +FfxFloat16x4 ffxFract(FfxFloat16x4 x) +{ + return fract(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxLerp(FfxFloat16 x, FfxFloat16 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 a) +{ + return mix(x, y, a); +} +FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 a) +{ + return mix(x, y, a); +} +FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a) +{ + return mix(x, y, a); +} +//------------------------------------------------------------------------------------------------------------------------------ +// No packed version of ffxMax3. +FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x2 ffxMax3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x3 ffxMax3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x4 ffxMax3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(x, max(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +// No packed version of ffxMin3. +FfxFloat16 ffxMin3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x2 ffxMin3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x3 ffxMin3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return min(x, min(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxReciprocalHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) / x; +} +FfxFloat16x2 ffxReciprocalHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) / x; +} +FfxFloat16x3 ffxReciprocalHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) / x; +} +FfxFloat16x4 ffxReciprocalHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) / x; +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxReciprocalSquareRootHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) / sqrt(x); +} +FfxFloat16x2 ffxReciprocalSquareRootHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) / sqrt(x); +} +FfxFloat16x3 ffxReciprocalSquareRootHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) / sqrt(x); +} +FfxFloat16x4 ffxReciprocalSquareRootHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) / sqrt(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxSaturate(FfxFloat16 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16(0.0), FFX_BROADCAST_FLOAT16(1.0)); +} +FfxFloat16x2 ffxSaturate(FfxFloat16x2 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16X2(0.0), FFX_BROADCAST_FLOAT16X2(1.0)); +} +FfxFloat16x3 ffxSaturate(FfxFloat16x3 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16X3(0.0), FFX_BROADCAST_FLOAT16X3(1.0)); +} +FfxFloat16x4 ffxSaturate(FfxFloat16x4 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16X4(0.0), FFX_BROADCAST_FLOAT16X4(1.0)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt16 ffxBitShiftRightHalf(FfxUInt16 a, FfxUInt16 b) +{ + return FfxUInt16(FfxInt16(a) >> FfxInt16(b)); +} +FfxUInt16x2 ffxBitShiftRightHalf(FfxUInt16x2 a, FfxUInt16x2 b) +{ + return FfxUInt16x2(FfxInt16x2(a) >> FfxInt16x2(b)); +} +FfxUInt16x3 ffxBitShiftRightHalf(FfxUInt16x3 a, FfxUInt16x3 b) +{ + return FfxUInt16x3(FfxInt16x3(a) >> FfxInt16x3(b)); +} +FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b) +{ + return FfxUInt16x4(FfxInt16x4(a) >> FfxInt16x4(b)); +} +#endif // FFX_HALF + +#if defined(FFX_WAVE) +// Where 'x' must be a compile time literal. +FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32x2 AWaveXorU2(FfxUInt32x2 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32x3 AWaveXorU3(FfxUInt32x3 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32x4 AWaveXorU4(FfxUInt32x4 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxBoolean AWaveIsFirstLane() +{ + return subgroupElect(); +} +FfxUInt32 AWaveLaneIndex() +{ + return gl_SubgroupInvocationID; +} +FfxBoolean AWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x ) +{ + return subgroupShuffle(v, x); +} +FfxUInt32 AWavePrefixCountBits(FfxBoolean v) +{ + return subgroupBallotExclusiveBitCount(subgroupBallot(v)); +} +FfxUInt32 AWaveActiveCountBits(FfxBoolean v) +{ + return subgroupBallotBitCount(subgroupBallot(v)); +} +FfxUInt32 AWaveReadLaneFirstU1(FfxUInt32 v) +{ + return subgroupBroadcastFirst(v); +} +FfxUInt32 WaveOr(FfxUInt32 a) +{ + return subgroupOr(a); +} +FfxFloat32 WaveMin(FfxFloat32 a) +{ + return subgroupMin(a); +} +FfxFloat32 WaveMax(FfxFloat32 a) +{ + return subgroupMax(a); +} +FfxUInt32 WaveLaneCount() +{ + return gl_SubgroupSize; +} +#if defined(FFX_WAVE_ALL_TRUE) +FfxBoolean WaveAllTrue(FfxBoolean v) +{ + return subgroupAll(v); +} +#endif +FfxFloat32 QuadReadX(FfxFloat32 v) +{ + return subgroupQuadSwapHorizontal(v); +} +FfxFloat32x2 QuadReadX(FfxFloat32x2 v) +{ + return subgroupQuadSwapHorizontal(v); +} +FfxFloat32 QuadReadY(FfxFloat32 v) +{ + return subgroupQuadSwapVertical(v); +} +FfxFloat32x2 QuadReadY(FfxFloat32x2 v) +{ + return subgroupQuadSwapVertical(v); +} + +//------------------------------------------------------------------------------------------------------------------------------ +#if FFX_HALF +FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_FLOAT16X2(subgroupShuffleXor(FFX_FLOAT16X2_TO_UINT32(v), x)); +} +FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_FLOAT16X4(subgroupShuffleXor(FFX_FLOAT16X4_TO_UINT32X2(v), x)); +} +FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_UINT16X2(subgroupShuffleXor(FFX_UINT16X2_TO_UINT32(v), x)); +} +FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_UINT16X4(subgroupShuffleXor(FFX_UINT16X4_TO_UINT32X2(v), x)); +} +#endif // FFX_HALF +#endif // #if defined(FFX_WAVE) diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common.h b/Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common.h new file mode 100644 index 0000000000..24adcd18e4 --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common.h @@ -0,0 +1,2784 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +/// A define for a true value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFX_TRUE (true) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFX_FALSE (false) + +/// A define value for positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u) + +/// A define value for negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u) + +/// A define value for PI. +/// +/// @ingroup GPUCore +#define FFX_PI (3.14159) + +FFX_STATIC const FfxFloat32 FFX_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FFX_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX; + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxReciprocal(FfxFloat32 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value) +{ + return rcp(value); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsSigned(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against for have the sign set. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Convert a 32bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) +{ + return value ^ ((AShrSU1(value, FfxUInt32(31u))) | FfxUInt32(0x80000000u)); +} + +/// Convert a sortable integer to a 32bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) +{ + return value ^ ((~AShrSU1(value, FfxUInt32(31u))) | FfxUInt32(0x80000000u)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateSqrt(FfxFloat32 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1u)) + FfxUInt32(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) +{ + FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + FfxFloat32(2.0)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal square root for. +/// +/// @returns +/// An approximation of the reciprocal square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x5f347d74u) - (ffxAsUInt32(value) >> FfxUInt32(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value) +{ + FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast2(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value) +{ + FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast3(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value) +{ + FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast4(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u))); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return dot(a, b); +} + + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); + FfxFloat32 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); + FfxFloat32 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); + FfxFloat32x2 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); + FfxFloat32x2 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); + FfxFloat32x3 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); + FfxFloat32x3 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); + FfxFloat32x4 b4 = b * b * b * b * b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); + FfxFloat32x4 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32 ffxParabolicSin(FfxFloat32 value) +{ + return value * abs(value) - value; +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x) +{ + return x * abs(x) - x; +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32 ffxParabolicCos(FfxFloat32 x) +{ + x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + x = x * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(x); +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x) +{ + x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f)); + x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f); + return ffxParabolicSin(x); +} + +// An approximation of both sine and cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x) +{ + FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + y = y * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(FfxFloat32x2(x, y)); +} + +/// Conditional free logic AND operation using values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) +{ + return x ^ FfxUInt32(1); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) +{ + return x ^ ffxBroadcast2(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) +{ + return x ^ ffxBroadcast3(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) +{ + return x ^ ffxBroadcast4(1u); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) +{ + return FfxUInt32(FfxFloat32(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) +{ + return FfxUInt32x2(ffxBroadcast2(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) +{ + return FfxUInt32x3(ffxBroadcast3(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) +{ + return FfxUInt32x4(ffxBroadcast4(1.0) - x); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) +{ + return FfxFloat32(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) - x; +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Choose between two FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + FfxFloat32 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + FfxFloat32x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + FfxFloat32x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + FfxFloat32x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power) +{ + return pow(value, FfxFloat32(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast2(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast3(power)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPQToLinear(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.159302)); + return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); + return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); + return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSrgbToLinear(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) +{ + return pow(color, FfxFloat32(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast2(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast3(power)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); + return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// 543210 +/// ====== +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) +{ + return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) +{ + return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); +} diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common_half.h b/Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common_half.h new file mode 100644 index 0000000000..6ed2c8cacb --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_core_gpu_common_half.h @@ -0,0 +1,2979 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +#if FFX_HALF +#if FFX_HLSL_SM >= 62 +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u) +#else +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u) +#endif // #if FFX_HLSL_SM>=62 + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSqrt(FfxFloat16 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) +{ + return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) +{ + return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) +{ + return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) +{ + return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) +{ + return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) +{ + return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) +{ + return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) +{ + return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) +{ + return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF))); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) +{ + return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF))); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) +{ + return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF))); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) +{ + return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF))); +} + +/// Convert a 16bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a sortable integer to a 16bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer value to make floating point. +/// +/// @returns +/// The floating point value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a pair of 16bit floating point values to a pair of sortable integers. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point values to make sortable. +/// +/// @returns +/// The sortable integer values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Convert a pair of sortable integers to a pair of 16bit floating point values. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer values to make floating point. +/// +/// @returns +/// The floating point values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y0 [Zero] X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) +{ + return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y1 [Zero] X1 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y2 [Zero] X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y3 [Zero] X3 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) +{ + return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X0 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X2 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X0 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X0 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X2 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 X2 Y0 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 Y0 X2 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) +{ + return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u)); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte0Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte1Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// r=ffxPermuteUByte2Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// r=ffxPermuteUByte3Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) +{ + FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) +{ + FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) +{ + FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) +{ + FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1))); +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x) +{ + return x * abs(x) - x; +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x) +{ + return x * abs(x) - x; +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75)); + x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of both sine and cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x) +{ + FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + y = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(FfxFloat16x2(x, y)); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) +{ + return x ^ FFX_BROADCAST_UINT16(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) +{ + return x ^ FFX_BROADCAST_UINT16X2(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) +{ + return x ^ FFX_BROADCAST_UINT16X3(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) +{ + return x ^ FFX_BROADCAST_UINT16X4(1); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) +{ + return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) +{ + return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) +{ + return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) +{ + return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) +{ + return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) +{ + return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) +{ + return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) +{ + return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return FfxFloat16(ffxSaturate(x * y + z)); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return FfxFloat16x2(ffxSaturate(x * y + z)); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return FfxFloat16x3(ffxSaturate(x * y + z)); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return FfxFloat16x4(ffxSaturate(x * y + z)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) +{ + return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF))); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) +{ + return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF))); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) +{ + return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF))); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) +{ + return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF))); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) - x; +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Choose between two half-precision FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + FfxFloat16 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + FfxFloat16x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + FfxFloat16x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first parameter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparison is greater than zero. +/// @param [in] z The value to return if the comparison is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + FfxFloat16x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) +{ + return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) +{ + return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) +{ + return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) +{ + return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF))); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX)); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) +{ + return sqrt(c); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(x)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// 543210 +/// ====== +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) +{ + return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) +{ + return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); +} + +#endif // FFX_HALF diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_core_hlsl.h b/Graphics/SuperResolution/shaders/fsr1/ffx_core_hlsl.h new file mode 100644 index 0000000000..f94af75bc2 --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_core_hlsl.h @@ -0,0 +1,1655 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +/// @defgroup HLSLCore HLSL Core +/// HLSL core defines and functions +/// +/// @ingroup FfxHLSL + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +/// A define for abstracting shared memory between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_GROUPSHARED groupshared + +/// A define for abstracting compute memory barriers between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y) + +/// A define added to accept static markup on functions to aid CPU/GPU portability of code. +/// +/// @ingroup HLSLCore +#define FFX_STATIC static + +/// A define for abstracting loop unrolling between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_UNROLL [unroll] + +/// A define for abstracting a 'greater than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_GREATER_THAN(x, y) x > y + +/// A define for abstracting a 'greater than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_GREATER_THAN_EQUAL(x, y) x >= y + +/// A define for abstracting a 'less than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_LESS_THAN(x, y) x < y + +/// A define for abstracting a 'less than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_LESS_THAN_EQUAL(x, y) x <= y + +/// A define for abstracting an 'equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_EQUAL(x, y) x == y + +/// A define for abstracting a 'not equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_NOT_EQUAL(x, y) x != y + +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MATRIX_MULTIPLY(a, b) mul(a, b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_TRANSFORM_VECTOR(a, b) mul(a, b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MODULO(a, b) (fmod(a, b)) + +/// Broadcast a scalar value to a 1-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 1-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32(x) FfxInt32(x) + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X2(x) FfxInt32(x) + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X3(x) FfxInt32(x) + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X4(x) FfxInt32(x) + +/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a) + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup HLSLCore +#ifndef GLSL +FfxUInt32 packHalf2x16(FfxFloat32x2 value) +{ + return f32tof16(value.x) | (f32tof16(value.y) << 16u); +} +#endif + + + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) +{ + return FfxFloat32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) +{ + return FfxFloat32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) +{ + return FfxFloat32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x2 ffxBroadcast2(FfxInt32 value) +{ + return FfxInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxBroadcast3(FfxInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x4 ffxBroadcast4(FfxInt32 value) +{ + return FfxInt32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) +{ + return FfxUInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) +{ + return FfxUInt32x4(value, value, value, value); +} + +FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1u; + return (src >> off) & mask; +} + +FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +{ + return (ins & mask) | (src & (~mask)); +} + +FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1u; + return (ins & mask) | (src & (~mask)); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxAsFloat(FfxUInt32 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) +{ + return asfloat(x); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix intrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) +{ + return lerp(x, y, t); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) +{ + return saturate(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxFract(FfxFloat32 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxFract(FfxFloat32x2 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxFract(FfxFloat32x3 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxFract(FfxFloat32x4 x) +{ + return x - floor(x); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calculation. +/// @param [in] z The third value to include in the max calculation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calculation. +/// @param [in] z The third value to include in the median calculation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return min(x, min(y, z)); +} + + +FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16u); +} + +FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){ + return f16tof32(FfxUInt32x2(a & 0xFFFFu, a >> 16u)); +} + +//============================================================================================================================== +// HLSL HALF +//============================================================================================================================== +//============================================================================================================================== +// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). +// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ +FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x) +{ + FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFFu, x >> 16u)); + return FFX_MIN16_F2(t); +} +FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) +{ + return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y)); +} +FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x) +{ + FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFFu, x >> 16u); + return FFX_MIN16_U2(t); +} +FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) +{ + return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = sign(v); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = sign(v); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = sign(v); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = sign(v); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} + +#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x)) +#if FFX_HALF + +#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) +#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x)) +#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) + +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){ + return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16))); +} + +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x) +{ + return f32tof16(x.x) + (f32tof16(x.y) << 16); +} +FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x) +{ + return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw)); +} +FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x) +{ + return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16); +} +FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x) +{ + return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw)); +} +#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x)) +#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x)) +#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x)) +#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x)) + +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) +#define FFX_TO_UINT16(x) asuint16(x) +#define FFX_TO_UINT16X2(x) asuint16(x) +#define FFX_TO_UINT16X3(x) asuint16(x) +#define FFX_TO_UINT16X4(x) asuint16(x) +#else +#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a))) +#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y)) +#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z)) +#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w)) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) + +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) +#define FFX_TO_FLOAT16(x) asfloat16(x) +#define FFX_TO_FLOAT16X2(x) asfloat16(x) +#define FFX_TO_FLOAT16X3(x) asfloat16(x) +#define FFX_TO_FLOAT16X4(x) asfloat16(x) +#else +#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a))) +#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y)) +#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z)) +#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w)) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) + +//============================================================================================================================== +#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a) + +//============================================================================================================================== +FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a) +{ + return FFX_MIN16_U(abs(FFX_MIN16_I(a))); +} +FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a) +{ + return FFX_MIN16_U2(abs(FFX_MIN16_I2(a))); +} +FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a) +{ + return FFX_MIN16_U3(abs(FFX_MIN16_I3(a))); +} +FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a) +{ + return FFX_MIN16_U4(abs(FFX_MIN16_I4(a))); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m) +{ + return max(n, min(x, m)); +} +//------------------------------------------------------------------------------------------------------------------------------ +// V_FRACT_F16 (note DX frac() is different). +FFX_MIN16_F ffxFract(FFX_MIN16_F x) +{ + return x - floor(x); +} +FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x) +{ + return x - floor(x); +} +FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x) +{ + return x - floor(x); +} +FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x) +{ + return x - floor(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a) +{ + return lerp(x, y, a); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(x, max(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return min(x, min(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x) +{ + return rcp(x); +} +FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x) +{ + return rcp(x); +} +FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x) +{ + return rcp(x); +} +FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x) +{ + return rcp(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x) +{ + return rsqrt(x); +} +FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x) +{ + return rsqrt(x); +} +FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x) +{ + return rsqrt(x); +} +FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x) +{ + return rsqrt(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxSaturate(FFX_MIN16_F x) +{ + return saturate(x); +} +FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x) +{ + return saturate(x); +} +FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x) +{ + return saturate(x); +} +FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x) +{ + return saturate(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b) +{ + return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b)); +} +FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b) +{ + return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b)); +} +FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b) +{ + return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b)); +} +FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b) +{ + return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b)); +} +#endif // FFX_HALF + +//============================================================================================================================== +// HLSL WAVE +//============================================================================================================================== +#if defined(FFX_WAVE) +// Where 'x' must be a compile time literal. +FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxBoolean AWaveIsFirstLane() +{ + return WaveIsFirstLane(); +} +FfxUInt32 AWaveLaneIndex() +{ + return WaveGetLaneIndex(); +} +FfxBoolean AWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, x); +} +FfxUInt32 AWavePrefixCountBits(FfxBoolean v) +{ + return WavePrefixCountBits(v); +} +FfxUInt32 AWaveActiveCountBits(FfxBoolean v) +{ + return WaveActiveCountBits(v); +} +FfxUInt32 AWaveReadLaneFirstU1(FfxUInt32 v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32 WaveOr(FfxUInt32 a) +{ + return WaveActiveBitOr(a); +} +FfxFloat32 WaveMin(FfxFloat32 a) +{ + return WaveActiveMin(a); +} +FfxFloat32 WaveMax(FfxFloat32 a) +{ + return WaveActiveMax(a); +} +FfxUInt32 WaveLaneCount() +{ + return WaveGetLaneCount(); +} +FfxBoolean WaveAllTrue(FfxBoolean v) +{ + return WaveActiveAllTrue(v); +} +FfxFloat32 QuadReadX(FfxFloat32 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32x2 QuadReadX(FfxFloat32x2 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32 QuadReadY(FfxFloat32 v) +{ + return QuadReadAcrossY(v); +} +FfxFloat32x2 QuadReadY(FfxFloat32x2 v) +{ + return QuadReadAcrossY(v); +} + +#if FFX_HALF +FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFX_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +#endif // FFX_HALF +#endif // #if defined(FFX_WAVE) diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_core_portability.h b/Graphics/SuperResolution/shaders/fsr1/ffx_core_portability.h new file mode 100644 index 0000000000..84a62d6b84 --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_core_portability.h @@ -0,0 +1,51 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a + ffxBroadcast3(b); + return d; +} + +FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = a; + return d; +} + +FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d = a * b; + return d; +} + +FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a * ffxBroadcast3(b); + return d; +} + +FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = rcp(a); + return d; +} diff --git a/Graphics/SuperResolution/shaders/fsr1/ffx_fsr1.h b/Graphics/SuperResolution/shaders/fsr1/ffx_fsr1.h new file mode 100644 index 0000000000..e03ad53990 --- /dev/null +++ b/Graphics/SuperResolution/shaders/fsr1/ffx_fsr1.h @@ -0,0 +1,1251 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +/// @defgroup FfxGPUFsr1 FidelityFX FSR1 +/// FidelityFX Super Resolution 1 GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The rendered image resolution being upscaled in X dimension. +/// @param [in] inputViewportInPixelsY The rendered image resolution being upscaled in Y dimension. +/// @param [in] inputSizeInPixelsX The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension. +/// @param [in] inputSizeInPixelsY The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension. +/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension. +/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension. +/// +/// @ingroup FfxGPUFsr1 +FFX_STATIC void ffxFsrPopulateEasuConstants( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY) +{ + // Output integer position to a pixel position in viewport. + con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX)); + con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY)); + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5)); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5)); + + // Viewport pixel position to normalized image space. + // This is used to get upper-left of 'F' tap. + con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX)); + con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY)); + + // Centers of gather4, first offset from upper-left of 'F'. + // +---+---+ + // | | | + // +--(0)--+ + // | b | c | + // +---F---+---+---+ + // | e | f | g | h | + // +--(1)--+--(2)--+ + // | i | j | k | l | + // +---+---+---+---+ + // | n | o | + // +--(3)--+ + // | | | + // +---+---+ + con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY)); + + // These are from (0) instead of 'F'. + con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX)); + con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[2] = con3[3] = 0u; +} + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The resolution of the input in the X dimension. +/// @param [in] inputViewportInPixelsY The resolution of the input in the Y dimension. +/// @param [in] inputSizeInPixelsX The input size in pixels in the X dimension. +/// @param [in] inputSizeInPixelsY The input size in pixels in the Y dimension. +/// @param [in] outputSizeInPixelsX The output size in pixels in the X dimension. +/// @param [in] outputSizeInPixelsY The output size in pixels in the Y dimension. +/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution). +/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution). +/// +/// @ingroup FfxGPUFsr1 +FFX_STATIC void ffxFsrPopulateEasuConstantsOffset( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY) +{ + ffxFsrPopulateEasuConstants( + con0, + con1, + con2, + con3, + inputViewportInPixelsX, + inputViewportInPixelsY, + inputSizeInPixelsX, + inputSizeInPixelsY, + outputSizeInPixelsX, + outputSizeInPixelsY); + + // override + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY); +} + +#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p); + +// Filtering for a given tap for the scalar. +void fsrEasuTapFloat( + FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor, // Accumulated color, with negative lobe. + FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight, // Accumulated weight. + FFX_PARAMETER_IN FfxFloat32x2 pixelOffset, // Pixel offset from resolve position to tap. + FFX_PARAMETER_IN FfxFloat32x2 gradientDirection, // Gradient direction. + FFX_PARAMETER_IN FfxFloat32x2 length, // Length. + FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength, // Negative lobe strength. + FFX_PARAMETER_IN FfxFloat32 clippingPoint, // Clipping point. + FFX_PARAMETER_IN FfxFloat32x3 color) // Tap color. +{ + // Rotate offset by direction. + FfxFloat32x2 rotatedOffset; + rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y); + rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x); + + // Anisotropy. + rotatedOffset *= length; + + // Compute distance^2. + FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y; + + // Limit to the window as at corner, 2 taps can easily be outside. + distanceSquared = ffxMin(distanceSquared, clippingPoint); + + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0); + FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0); + weightB *= weightB; + weightA *= weightA; + weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0)); + FfxFloat32 weight = weightB * weightA; + + // Do weighted average. + accumulatedColor += color * weight; + accumulatedWeight += weight; +} + +// Accumulate direction and length. +void fsrEasuSetFloat( + FFX_PARAMETER_INOUT FfxFloat32x2 direction, + FFX_PARAMETER_INOUT FfxFloat32 length, + FFX_PARAMETER_IN FfxFloat32x2 pp, + FFX_PARAMETER_IN FfxBoolean biS, + FFX_PARAMETER_IN FfxBoolean biT, + FFX_PARAMETER_IN FfxBoolean biU, + FFX_PARAMETER_IN FfxBoolean biV, + FFX_PARAMETER_IN FfxFloat32 lA, + FFX_PARAMETER_IN FfxFloat32 lB, + FFX_PARAMETER_IN FfxFloat32 lC, + FFX_PARAMETER_IN FfxFloat32 lD, + FFX_PARAMETER_IN FfxFloat32 lE) +{ + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v + FfxFloat32 weight = FfxFloat32(0.0); + if (biS) + weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y); + if (biT) + weight = pp.x * (FfxFloat32(1.0) - pp.y); + if (biU) + weight = (FfxFloat32(1.0) - pp.x) * pp.y; + if (biV) + weight = pp.x * pp.y; + + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + FfxFloat32 dc = lD - lC; + FfxFloat32 cb = lC - lB; + FfxFloat32 lengthX = max(abs(dc), abs(cb)); + lengthX = ffxApproximateReciprocal(lengthX); + FfxFloat32 directionX = lD - lB; + direction.x += directionX * weight; + lengthX = ffxSaturate(abs(directionX) * lengthX); + lengthX *= lengthX; + length += lengthX * weight; + + // Repeat for the y axis. + FfxFloat32 ec = lE - lC; + FfxFloat32 ca = lC - lA; + FfxFloat32 lengthY = max(abs(ec), abs(ca)); + lengthY = ffxApproximateReciprocal(lengthY); + FfxFloat32 directionY = lE - lA; + direction.y += directionY * weight; + lengthY = ffxSaturate(abs(directionY) * lengthY); + lengthY *= lengthY; + length += lengthY * weight; +} + +/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations. +/// +/// @param [out] outPixel The computed color of a pixel. +/// @param [in] integerPosition Integer pixel position within the output. +/// @param [in] con0 The first constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con1 The second constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con2 The third constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con3 The fourth constant value generated by ffxFsrPopulateEasuConstants. +/// +/// @ingroup FSR +void ffxFsrEasuFloat( + FFX_PARAMETER_OUT FfxFloat32x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + // Get position of 'f'. + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, + // a b <- unused (z) + // r g + // a b a b + // r g r g + // a b + // r g <- unused (z) + // Allowing dead-code removal to remove the 'z's. + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + + // These are from p0 to avoid pulling two constants on pre-Navi hardware. + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat32x4 bczzR = FsrEasuRF(p0); + FfxFloat32x4 bczzG = FsrEasuGF(p0); + FfxFloat32x4 bczzB = FsrEasuBF(p0); + FfxFloat32x4 ijfeR = FsrEasuRF(p1); + FfxFloat32x4 ijfeG = FsrEasuGF(p1); + FfxFloat32x4 ijfeB = FsrEasuBF(p1); + FfxFloat32x4 klhgR = FsrEasuRF(p2); + FfxFloat32x4 klhgG = FsrEasuGF(p2); + FfxFloat32x4 klhgB = FsrEasuBF(p2); + FfxFloat32x4 zzonR = FsrEasuRF(p3); + FfxFloat32x4 zzonG = FsrEasuGF(p3); + FfxFloat32x4 zzonB = FsrEasuBF(p3); + + // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). + FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG); + FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG); + FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG); + FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG); + + // Rename. + FfxFloat32 bL = bczzL.x; + FfxFloat32 cL = bczzL.y; + FfxFloat32 iL = ijfeL.x; + FfxFloat32 jL = ijfeL.y; + FfxFloat32 fL = ijfeL.z; + FfxFloat32 eL = ijfeL.w; + FfxFloat32 kL = klhgL.x; + FfxFloat32 lL = klhgL.y; + FfxFloat32 hL = klhgL.z; + FfxFloat32 gL = klhgL.w; + FfxFloat32 oL = zzonL.z; + FfxFloat32 nL = zzonL.w; + + // Accumulate for bilinear interpolation. + FfxFloat32x2 dir = ffxBroadcast2(0.0); + FfxFloat32 len = FfxFloat32(0.0); + fsrEasuSetFloat(dir, len, pp, FFX_TRUE, FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE, FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE, FFX_FALSE, fL, iL, jL, kL, nL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE, gL, jL, kL, lL, oL); + + // Normalize with approximation, and cleanup close to zero. + FfxFloat32x2 dir2 = dir * dir; + FfxFloat32 dirR = dir2.x + dir2.y; + FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0); + dirR = ffxApproximateReciprocalSquareRoot(dirR); + dirR = zro ? FfxFloat32(1.0) : dirR; + dir.x = zro ? FfxFloat32(1.0) : dir.x; + dir *= ffxBroadcast2(dirR); + + // Transform from {0 to 2} to {0 to 1} range, and shape with square. + len = len * FfxFloat32(0.5); + len *= len; + + // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. + FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y))); + + // Anisotropic length after rotation, + // x := 1.0 lerp to 'stretch' on edges + // y := 1.0 lerp to 2x on edges + FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len); + + // Based on the amount of 'edge', + // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. + FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len; + + // Set distance^2 clipping point to the end of the adjustable window. + FfxFloat32 clp = ffxApproximateReciprocal(lob); + + // Accumulation mixed with min/max of 4 nearest. + // b c + // e f g h + // i j k l + // n o + FfxFloat32x3 min4 = + ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), + FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + FfxFloat32x3 max4 = + max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + + // Accumulation. + FfxFloat32x3 aC = ffxBroadcast3(0.0); + FfxFloat32 aW = FfxFloat32(0.0); + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x)); // b + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y)); // c + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x)); // i + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)); // j + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z)); // f + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w)); // e + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); // k + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y)); // l + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z)); // h + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w)); // g + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z)); // o + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n + + // Normalize and dering. + pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) + +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p); + +// This runs 2 taps in parallel. +void FsrEasuTapH( + FFX_PARAMETER_INOUT FfxFloat16x2 aCR, + FFX_PARAMETER_INOUT FfxFloat16x2 aCG, + FFX_PARAMETER_INOUT FfxFloat16x2 aCB, + FFX_PARAMETER_INOUT FfxFloat16x2 aW, + FFX_PARAMETER_IN FfxFloat16x2 offX, + FFX_PARAMETER_IN FfxFloat16x2 offY, + FFX_PARAMETER_IN FfxFloat16x2 dir, + FFX_PARAMETER_IN FfxFloat16x2 len, + FFX_PARAMETER_IN FfxFloat16 lob, + FFX_PARAMETER_IN FfxFloat16 clp, + FFX_PARAMETER_IN FfxFloat16x2 cR, + FFX_PARAMETER_IN FfxFloat16x2 cG, + FFX_PARAMETER_IN FfxFloat16x2 cB) +{ + FfxFloat16x2 vX, vY; + vX = offX * dir.xx + offY * dir.yy; + vY = offX * (-dir.yy) + offY * dir.xx; + vX *= len.x; + vY *= len.y; + FfxFloat16x2 d2 = vX * vX + vY * vY; + d2 = min(d2, FFX_BROADCAST_FLOAT16X2(clp)); + FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + wB *= wB; + wA *= wA; + wB = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0)); + FfxFloat16x2 w = wB * wA; + aCR += cR * w; + aCG += cG * w; + aCB += cB * w; + aW += w; +} + +// This runs 2 taps in parallel. +void FsrEasuSetH( + FFX_PARAMETER_INOUT FfxFloat16x2 dirPX, + FFX_PARAMETER_INOUT FfxFloat16x2 dirPY, + FFX_PARAMETER_INOUT FfxFloat16x2 lenP, + FFX_PARAMETER_IN FfxFloat16x2 pp, + FFX_PARAMETER_IN FfxBoolean biST, + FFX_PARAMETER_IN FfxBoolean biUV, + FFX_PARAMETER_IN FfxFloat16x2 lA, + FFX_PARAMETER_IN FfxFloat16x2 lB, + FFX_PARAMETER_IN FfxFloat16x2 lC, + FFX_PARAMETER_IN FfxFloat16x2 lD, + FFX_PARAMETER_IN FfxFloat16x2 lE) +{ + FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0); + + if (biST) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y); + + if (biUV) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y); + + // ABS is not free in the packed FP16 path. + FfxFloat16x2 dc = lD - lC; + FfxFloat16x2 cb = lC - lB; + FfxFloat16x2 lenX = max(abs(dc), abs(cb)); + lenX = ffxReciprocalHalf(lenX); + + FfxFloat16x2 dirX = lD - lB; + dirPX += dirX * w; + lenX = FfxFloat16x2(ffxSaturate(abs(dirX) * lenX)); + lenX *= lenX; + lenP += lenX * w; + FfxFloat16x2 ec = lE - lC; + FfxFloat16x2 ca = lC - lA; + FfxFloat16x2 lenY = max(abs(ec), abs(ca)); + lenY = ffxReciprocalHalf(lenY); + FfxFloat16x2 dirY = lE - lA; + dirPY += dirY * w; + lenY = FfxFloat16x2(ffxSaturate(abs(dirY) * lenY)); + lenY *= lenY; + lenP += lenY * w; +} + +void FsrEasuH( + FFX_PARAMETER_OUT FfxFloat16x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + FfxFloat16x2 ppp = FfxFloat16x2(pp); + + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat16x4 bczzR = FsrEasuRH(p0); + FfxFloat16x4 bczzG = FsrEasuGH(p0); + FfxFloat16x4 bczzB = FsrEasuBH(p0); + FfxFloat16x4 ijfeR = FsrEasuRH(p1); + FfxFloat16x4 ijfeG = FsrEasuGH(p1); + FfxFloat16x4 ijfeB = FsrEasuBH(p1); + FfxFloat16x4 klhgR = FsrEasuRH(p2); + FfxFloat16x4 klhgG = FsrEasuGH(p2); + FfxFloat16x4 klhgB = FsrEasuBH(p2); + FfxFloat16x4 zzonR = FsrEasuRH(p3); + FfxFloat16x4 zzonG = FsrEasuGH(p3); + FfxFloat16x4 zzonB = FsrEasuBH(p3); + + FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG); + FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG); + FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG); + FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG); + FfxFloat16 bL = bczzL.x; + FfxFloat16 cL = bczzL.y; + FfxFloat16 iL = ijfeL.x; + FfxFloat16 jL = ijfeL.y; + FfxFloat16 fL = ijfeL.z; + FfxFloat16 eL = ijfeL.w; + FfxFloat16 kL = klhgL.x; + FfxFloat16 lL = klhgL.y; + FfxFloat16 hL = klhgL.z; + FfxFloat16 gL = klhgL.w; + FfxFloat16 oL = zzonL.z; + FfxFloat16 nL = zzonL.w; + + // This part is different, accumulating 2 taps in parallel. + FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 lenP = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(true), + FfxBoolean(false), + FfxFloat16x2(bL, cL), + FfxFloat16x2(eL, fL), + FfxFloat16x2(fL, gL), + FfxFloat16x2(gL, hL), + FfxFloat16x2(jL, kL)); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(false), + FfxBoolean(true), + FfxFloat16x2(fL, gL), + FfxFloat16x2(iL, jL), + FfxFloat16x2(jL, kL), + FfxFloat16x2(kL, lL), + FfxFloat16x2(nL, oL)); + FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g); + FfxFloat16 len = lenP.r + lenP.g; + + FfxFloat16x2 dir2 = dir * dir; + FfxFloat16 dirR = dir2.x + dir2.y; + FfxUInt32 zro = FfxUInt32(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0)); + dirR = ffxApproximateReciprocalSquareRootHalf(dirR); + dirR = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR; + dir.x = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x; + dir *= FFX_BROADCAST_FLOAT16X2(dirR); + len = len * FFX_BROADCAST_FLOAT16(0.5); + len *= len; + FfxFloat16 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y))); + FfxFloat16x2 len2 = + FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len); + FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len; + FfxFloat16 clp = ffxApproximateReciprocalHalf(lob); + + // FP16 is different, using packed trick to do min and max in same operation. + FfxFloat16x2 bothR = + max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x))); + FfxFloat16x2 bothG = + max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x))); + FfxFloat16x2 bothB = + max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x))); + + // This part is different for FP16, working pairs of taps at a time. + FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw); + FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y); + FfxFloat16 aW = pW.x + pW.y; + + // Slightly different for FP16 version due to combined min and max. + pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF) + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +// +//------------------------------------------------------------------------------------------------------------------------------ +// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. +// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. +// RCAS also has a built in process to limit sharpening of what it detects as possible noise. +// RCAS sharper does not support scaling, as it should be applied after EASU scaling. +// Pass EASU output straight into RCAS, no color conversions necessary. +//------------------------------------------------------------------------------------------------------------------------------ +// RCAS is based on the following logic. +// RCAS uses a 5 tap filter in a cross pattern (same as CAS), +// w n +// w 1 w for taps w m e +// w s +// Where 'w' is the negative lobe weight. +// output = (w*(n+e+w+s)+m)/(4*w+1) +// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, +// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) +// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) +// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. +// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. +// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. +// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. +// This stabilizes RCAS. +// RCAS does a simple highpass which is normalized against the local contrast then shaped, +// 0.25 +// 0.25 -1 0.25 +// 0.25 +// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. +// +// GLSL example for the required callbacks : +// +// FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));} +// void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b) +// { +// //do any simple input color conversions here or leave empty if none needed +// } +// +// FsrRcasCon need to be called from the CPU or GPU to set up constants. +// Including a GPU example here, the 'con' value would be stored out to a constant buffer. +// +// FfxUInt32x4 con; +// FsrRcasCon(con, +// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. +// --------------- +// RCAS sharpening supports a CAS-like pass-through alpha via, +// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 +// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. +// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, +// #define FSR_RCAS_DENOISE 1 +//============================================================================================================================== +// This is set at the limit of providing unnatural results for sharpening. +#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CONSTANT SETUP +//============================================================================================================================== +// Call to setup required constant values (works on CPU or GPU). +FFX_STATIC void FsrRcasCon(FFX_PARAMETER_INOUT FfxUInt32x4 con, + FFX_PARAMETER_IN FfxFloat32 sharpness) // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. + { + // Transform from stops to linear value. + FfxFloat32x2 hSharp; + hSharp[0] = sharpness; + hSharp[1] = sharpness; + con[0] = ffxAsUInt32(sharpness); + con[1] = packHalf2x16(hSharp); + con[2] = 0u; + con[3] = 0u; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 32-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU) && defined(FSR_RCAS_F) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p); + void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasF(out FfxFloat32 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat32 pixG, + out FfxFloat32 pixB, +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat32 pixA, +#endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con) + { // Constant generated by RcasSetup(). + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt32x2 sp = FfxInt32x2(ip); + FfxFloat32x3 b = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb; + FfxFloat32x3 d = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb; +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat32x4 ee = FsrRcasLoadF(sp); + FfxFloat32x3 e = ee.rgb; + pixA = ee.a; +#else + FfxFloat32x3 e = FsrRcasLoadF(sp).rgb; +#endif + FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb; + FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat32 bR = b.r; + FfxFloat32 bG = b.g; + FfxFloat32 bB = b.b; + FfxFloat32 dR = d.r; + FfxFloat32 dG = d.g; + FfxFloat32 dB = d.b; + FfxFloat32 eR = e.r; + FfxFloat32 eG = e.g; + FfxFloat32 eB = e.b; + FfxFloat32 fR = f.r; + FfxFloat32 fG = f.g; + FfxFloat32 fB = f.b; + FfxFloat32 hR = h.r; + FfxFloat32 hG = h.g; + FfxFloat32 hB = h.b; + // Run optional input transform. + FsrRcasInputF(bR, bG, bB); + FsrRcasInputF(dR, dG, dB); + FsrRcasInputF(eR, eG, eB); + FsrRcasInputF(fR, fG, fB); + FsrRcasInputF(hR, hG, hB); + // Luma times 2. + FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG); + FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG); + FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG); + FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG); + FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG); + // Noise detection. + FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL; + nz = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL))); + nz = FfxFloat32(-0.5) * nz + FfxFloat32(1.0); + // Min and max of ring. + FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR); + FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG); + FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB); + FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR); + FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG); + FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB); + // Immediate constants for peak range. + FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R); + FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G); + FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B); + FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y); + FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y); + FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y); + FfxFloat32 lobeR = max(-hitMinR, hitMaxR); + FfxFloat32 lobeG = max(-hitMinG, hitMaxG); + FfxFloat32 lobeB = max(-hitMinB, hitMaxB); + FfxFloat32 lobe = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat + (con.x); + // Apply noise removal. +#ifdef FSR_RCAS_DENOISE + lobe *= nz; +#endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); + pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL; + pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL; + pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p); + void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasH( + out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat16 pixG, + out FfxFloat16 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // Sharpening algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt16x2 sp=FfxInt16x2(ip); + FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee=FsrRcasLoadH(sp); + FfxFloat16x3 e=ee.rgb;pixA=ee.a; + #else + FfxFloat16x3 e=FsrRcasLoadH(sp).rgb; + #endif + FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat16 bR=b.r; + FfxFloat16 bG=b.g; + FfxFloat16 bB=b.b; + FfxFloat16 dR=d.r; + FfxFloat16 dG=d.g; + FfxFloat16 dB=d.b; + FfxFloat16 eR=e.r; + FfxFloat16 eG=e.g; + FfxFloat16 eB=e.b; + FfxFloat16 fR=f.r; + FfxFloat16 fG=f.g; + FfxFloat16 fB=f.b; + FfxFloat16 hR=h.r; + FfxFloat16 hG=h.g; + FfxFloat16 hB=h.b; + // Run optional input transform. + FsrRcasInputH(bR,bG,bB); + FsrRcasInputH(dR,dG,dB); + FsrRcasInputH(eR,eG,eB); + FsrRcasInputH(fR,fG,fB); + FsrRcasInputH(hR,hG,hB); + // Luma times 2. + FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG); + FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG); + FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG); + FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG); + FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG); + // Noise detection. + FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL; + nz=FfxFloat16(ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)))); + nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0); + // Min and max of ring. + FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R); + FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G); + FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B); + FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); + FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); + FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); + FfxFloat16 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x; + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; +} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2) + // Input callback prototypes that need to be implemented by the calling shader + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p); + void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b); +//------------------------------------------------------------------------------------------------------------------------------ + // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. + void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){ + #ifdef FFX_HLSL + // Invoke a slower path for DX only, since it won't allow uninitialized values. + pix0.a=pix1.a=0.0; + #endif + pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x); + pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasHx2( + // Output values are for 2 8x8 tiles in a 16x8 region. + // pix.x = left 8x8 tile + // pix.y = right 8x8 tile + // This enables later processing to easily be packed as well. + out FfxFloat16x2 pixR, + out FfxFloat16x2 pixG, + out FfxFloat16x2 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16x2 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // No scaling algorithm uses minimal 3x3 pixel neighborhood. + FfxInt16x2 sp0=FfxInt16x2(ip); + FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0); + FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a; + #else + FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb; + #endif + FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb; + FfxInt16x2 sp1=sp0+FfxInt16x2(8,0); + FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1); + FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a; + #else + FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb; + #endif + FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb; + // Arrays of Structures to Structures of Arrays conversion. + FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r); + FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g); + FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b); + FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r); + FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g); + FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b); + FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r); + FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g); + FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b); + FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r); + FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g); + FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b); + FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r); + FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g); + FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b); + // Run optional input transform. + FsrRcasInputHx2(bR,bG,bB); + FsrRcasInputHx2(dR,dG,dB); + FsrRcasInputHx2(eR,eG,eB); + FsrRcasInputHx2(fR,fG,fB); + FsrRcasInputHx2(hR,hG,hB); + // Luma times 2. + FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG); + FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG); + FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG); + FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG); + FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG); + // Noise detection. + FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0); + // Min and max of ring. + FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R); + FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G); + FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B); + FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); + FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); + FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y); + FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR +// +//------------------------------------------------------------------------------------------------------------------------------ +// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. +// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. +// The 'Lfga*()' functions provide a convenient way to introduce grain. +// These functions limit grain based on distance to signal limits. +// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. +// Grain application should be done in a linear colorspace. +// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). +//------------------------------------------------------------------------------------------------------------------------------ +// Usage, +// FsrLfga*( +// color, // In/out linear colorspace color {0 to 1} ranged. +// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. +// amount); // Amount of grain (0 to 1} ranged. +//------------------------------------------------------------------------------------------------------------------------------ +// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)' +//============================================================================================================================== +#if defined(FFX_GPU) + // Maximum grain is the minimum distance to the signal limit. + void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a) + { + c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + // Half precision version (slower). + void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a) + { + c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c); + } + //------------------------------------------------------------------------------------------------------------------------------ + // Packed half precision version (faster). + void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){ + cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER +// +//------------------------------------------------------------------------------------------------------------------------------ +// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. +// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. +//------------------------------------------------------------------------------------------------------------------------------ +// Reversible tonemapper usage, +// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. +// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. +//============================================================================================================================== +#if defined(FFX_GPU) + void FsrSrtmF(inout FfxFloat32x3 c) + { + c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); + } + // The extra max solves the c=1.0 case (which is a /0). + void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} +#endif +//============================================================================================================================== +#if defined(FFX_GPU )&& FFX_HALF == 1 + void FsrSrtmH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0))); + } + void FsrSrtmInvH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b)))); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0)); + cR *= rcp; + cG *= rcp; + cB *= rcp; + } + void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB))); + cR*=rcp; + cG*=rcp; + cB*=rcp; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER +// +//------------------------------------------------------------------------------------------------------------------------------ +// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. +// Gamma 2.0 is used so that the conversion back to linear is just to square the color. +// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. +// Given good non-biased temporal blue noise as dither input, +// the output dither will temporally conserve energy. +// This is done by choosing the linear nearest step point instead of perceptual nearest. +// See code below for details. +//------------------------------------------------------------------------------------------------------------------------------ +// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION +// =============================================== +// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'. +// - Thus rounding is to nearest. +// - NaN gets converted to zero. +// - INF is clamped to {0.0 to 1.0}. +//============================================================================================================================== +#if defined(FFX_GPU) + // Hand tuned integer position to dither value, with more values than simple checkerboard. + // Only 32-bit has enough precision for this compddation. + // Output is {0 to <1}. + FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + // The 1.61803 golden ratio. + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + // Number designed to provide a good visual pattern. + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return ffxFract(x); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 8-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0); + b = b * b; + // Ratio of 'a' to 'b' required to produce 'c'. + // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges). + // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL. + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + // Use the ratio as a cutoff to choose 'a' or 'b'. + // ffxIsGreaterThanZero() is a MUL. + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 10-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0); + b = b * b; + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0)); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return FfxFloat16(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0))); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0))); + } + //============================================================================================================================== + // This computes dither for positions 'p' and 'p+{8,0}'. + FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32x2 x; + x.x = FfxFloat32(p.x + f); + x.y = x.x + FfxFloat32(8.0); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * ffxBroadcast2(a) + ffxBroadcast2(y * b); + return FfxFloat16x2(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit) + { + FfxFloat16x2 nR = sqrt(cR); + FfxFloat16x2 nG = sqrt(cG); + FfxFloat16x2 nB = sqrt(cB); + nR = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nG = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nB = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + FfxFloat16x2 aR = nR * nR; + FfxFloat16x2 aG = nG * nG; + FfxFloat16x2 aB = nB * nB; + FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bR = bR * bR; + FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bG = bG * bG; + FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bB = bB * bB; + FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR); + FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG); + FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB); + cR = FfxFloat16x2(ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); + cG = FfxFloat16x2(ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); + cB = FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){ + FfxFloat16x2 nR=sqrt(cR); + FfxFloat16x2 nG=sqrt(cG); + FfxFloat16x2 nB=sqrt(cB); + nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + FfxFloat16x2 aR=nR*nR; + FfxFloat16x2 aG=nG*nG; + FfxFloat16x2 aB=nB*nB; + FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR; + FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG; + FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB; + FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR); + FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG); + FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB); + cR=FfxFloat16x2(ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0))); + cG=FfxFloat16x2(ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0))); + cB=FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0))); +} +#endif diff --git a/Graphics/SuperResolution/src/FSRProvider.cpp b/Graphics/SuperResolution/src/FSRProvider.cpp new file mode 100644 index 0000000000..aa03c7f81b --- /dev/null +++ b/Graphics/SuperResolution/src/FSRProvider.cpp @@ -0,0 +1,325 @@ +/* + * Copyright 2026 Diligent Graphics LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * In no event and under no legal theory, whether in tort (including negligence), + * contract, or otherwise, unless required by applicable law (such as deliberate + * and grossly negligent acts) or agreed to in writing, shall any Contributor be + * liable for any damages, including any direct, indirect, special, incidental, + * or consequential damages of any character arising as a result of this License or + * out of the use or inability to use the software (including but not limited to damages + * for loss of goodwill, work stoppage, computer failure or malfunction, or any and + * all other commercial damages or losses), even if such Contributor has been advised + * of the possibility of such damages. + */ + +#include "SuperResolutionProvider.hpp" +#include "SuperResolutionBase.hpp" +#include "SuperResolutionVariants.hpp" + +#include "RefCntAutoPtr.hpp" +#include "GraphicsTypesX.hpp" +#include "GraphicsUtilities.h" +#include "ShaderSourceFactoryUtils.h" +#include "CommonlyUsedStates.h" +#include "ShaderMacroHelper.hpp" +#include "BasicMath.hpp" +#include "EngineMemory.h" + +namespace Diligent +{ + +namespace HLSL +{ +#define FFX_CPU +#include "../shaders/fsr1/ffx_core.h" +#include "../shaders/fsr1/ffx_fsr1.h" +#undef FFX_CPU + +#include "../shaders/FSRStructures.fxh" +#include "FSRShaderList.h" +} // namespace HLSL + +namespace +{ + +void PopulateFSRAttribs(HLSL::FSRAttribs& Attribs, float InputWidth, float InputHeight, float OutputWidth, float OutputHeight, float Sharpness) +{ + const auto ToUint4 = [](const uint32_t(&arr)[4]) { + return uint4{arr[0], arr[1], arr[2], arr[3]}; + }; + + HLSL::FfxUInt32x4 Constant0{}, Constant1{}, Constant2{}, Constant3{}; + HLSL::ffxFsrPopulateEasuConstants(Constant0, Constant1, Constant2, Constant3, + InputWidth, InputHeight, + InputWidth, InputHeight, + OutputWidth, OutputHeight); + Attribs.EASUConstants0 = ToUint4(Constant0); + Attribs.EASUConstants1 = ToUint4(Constant1); + Attribs.EASUConstants2 = ToUint4(Constant2); + Attribs.EASUConstants3 = ToUint4(Constant3); + + HLSL::FfxUInt32x4 RCASConstant{}; + HLSL::FsrRcasCon(RCASConstant, Sharpness); + Attribs.RCASConstants = ToUint4(RCASConstant); + + Attribs.SourceSize = float4{InputWidth, InputHeight, 1.0f / InputWidth, 1.0f / InputHeight}; +} + +class SuperResolutionFSR final : public SuperResolutionBase +{ +public: + SuperResolutionFSR(IReferenceCounters* pRefCounters, + IRenderDevice* pDevice, + const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, + IPipelineState* pEASU_PSO, + IPipelineState* pRCAS_PSO); + + virtual void DILIGENT_CALL_TYPE Execute(const ExecuteSuperResolutionAttribs& Attribs) override final; + +private: + RefCntAutoPtr m_pEASU_PSO; + RefCntAutoPtr m_pRCAS_PSO; + RefCntAutoPtr m_pEASU_SRB; + RefCntAutoPtr m_pRCAS_SRB; + RefCntAutoPtr m_pConstantBuffer; + RefCntAutoPtr m_pIntermediateTexture; + float m_LastSharpness = -1.0f; +}; + + +SuperResolutionFSR::SuperResolutionFSR(IReferenceCounters* pRefCounters, + IRenderDevice* pDevice, + const SuperResolutionDesc& Desc, + const SuperResolutionInfo& Info, + IPipelineState* pEASU_PSO, + IPipelineState* pRCAS_PSO) : + SuperResolutionBase{pRefCounters, Desc, Info}, + m_pEASU_PSO{pEASU_PSO}, + m_pRCAS_PSO{pRCAS_PSO} +{ + { + const float InputWidth = static_cast(Desc.InputWidth); + const float InputHeight = static_cast(Desc.InputHeight); + const float OutputWidth = static_cast(Desc.OutputWidth); + const float OutputHeight = static_cast(Desc.OutputHeight); + + HLSL::FSRAttribs DefaultAttribs{}; + PopulateFSRAttribs(DefaultAttribs, InputWidth, InputHeight, OutputWidth, OutputHeight, 1.0f); + + CreateUniformBuffer(pDevice, sizeof(HLSL::FSRAttribs), "FSR::ConstantBuffer", &m_pConstantBuffer, USAGE_DEFAULT, BIND_UNIFORM_BUFFER, CPU_ACCESS_NONE, &DefaultAttribs); + } + + const bool SharpeningEnabled = (m_Desc.Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) != 0; + + if (SharpeningEnabled) + { + TextureDesc TexDesc; + TexDesc.Name = "FSR::EASU Output"; + TexDesc.Type = RESOURCE_DIM_TEX_2D; + TexDesc.Width = Desc.OutputWidth; + TexDesc.Height = Desc.OutputHeight; + TexDesc.Format = m_Desc.OutputFormat; + TexDesc.MipLevels = 1; + TexDesc.BindFlags = BIND_SHADER_RESOURCE | BIND_RENDER_TARGET; + pDevice->CreateTexture(TexDesc, nullptr, &m_pIntermediateTexture); + } + + // Initialize SRBs and bind mutable resources + { + m_pEASU_PSO->CreateShaderResourceBinding(&m_pEASU_SRB, true); + ShaderResourceVariableX{m_pEASU_SRB, SHADER_TYPE_PIXEL, "cbFSRAttribs"}.Set(m_pConstantBuffer); + + if (SharpeningEnabled) + { + m_pRCAS_PSO->CreateShaderResourceBinding(&m_pRCAS_SRB, true); + ShaderResourceVariableX{m_pRCAS_SRB, SHADER_TYPE_PIXEL, "cbFSRAttribs"}.Set(m_pConstantBuffer); + ShaderResourceVariableX{m_pRCAS_SRB, SHADER_TYPE_PIXEL, "g_TextureSource"}.Set(m_pIntermediateTexture->GetDefaultView(TEXTURE_VIEW_SHADER_RESOURCE)); + } + } +} + +void SuperResolutionFSR::Execute(const ExecuteSuperResolutionAttribs& Attribs) +{ + ValidateExecuteSuperResolutionAttribs(m_Desc, m_Info, Attribs); + + IDeviceContext* pContext = Attribs.pContext; + + const bool SharpeningEnabled = (m_Desc.Flags & SUPER_RESOLUTION_FLAG_ENABLE_SHARPENING) != 0; + + if (SharpeningEnabled && m_LastSharpness != Attribs.Sharpness) + { + m_LastSharpness = Attribs.Sharpness; + + HLSL::FSRAttribs FSRAttribs{}; + PopulateFSRAttribs(FSRAttribs, + static_cast(m_Desc.InputWidth), static_cast(m_Desc.InputHeight), + static_cast(m_Desc.OutputWidth), static_cast(m_Desc.OutputHeight), + Attribs.Sharpness); + pContext->UpdateBuffer(m_pConstantBuffer, 0, sizeof(HLSL::FSRAttribs), &FSRAttribs, RESOURCE_STATE_TRANSITION_MODE_TRANSITION); + } + + { + ITextureView* pEASU_RTV = SharpeningEnabled ? m_pIntermediateTexture->GetDefaultView(TEXTURE_VIEW_RENDER_TARGET) : Attribs.pOutputTextureView; + ITextureView* pRTVs[] = {pEASU_RTV}; + pContext->SetRenderTargets(1, pRTVs, nullptr, RESOURCE_STATE_TRANSITION_MODE_TRANSITION); + pContext->SetPipelineState(m_pEASU_PSO); + + ShaderResourceVariableX{m_pEASU_SRB, SHADER_TYPE_PIXEL, "g_TextureSource"}.Set(Attribs.pColorTextureSRV); + + pContext->CommitShaderResources(m_pEASU_SRB, RESOURCE_STATE_TRANSITION_MODE_TRANSITION); + pContext->Draw({3, DRAW_FLAG_VERIFY_ALL}); + } + + if (SharpeningEnabled) + { + ITextureView* pRTVs[] = {Attribs.pOutputTextureView}; + pContext->SetRenderTargets(1, pRTVs, nullptr, RESOURCE_STATE_TRANSITION_MODE_TRANSITION); + pContext->SetPipelineState(m_pRCAS_PSO); + pContext->CommitShaderResources(m_pRCAS_SRB, RESOURCE_STATE_TRANSITION_MODE_TRANSITION); + pContext->Draw({3, DRAW_FLAG_VERIFY_ALL}); + } + + pContext->SetRenderTargets(0, nullptr, nullptr, RESOURCE_STATE_TRANSITION_MODE_NONE); +} + + +class FSRProvider final : public SuperResolutionProvider +{ +public: + FSRProvider(IRenderDevice* pDevice); + + virtual void EnumerateVariants(std::vector& Variants) override final + { + SuperResolutionInfo Info{}; + Info.VariantId = VariantId_FSRSpatial; + snprintf(Info.Name, sizeof(Info.Name), "Software: FSR Spatial"); + Info.Type = SUPER_RESOLUTION_TYPE_SPATIAL; + Info.SpatialCapFlags = SUPER_RESOLUTION_SPATIAL_CAP_FLAG_SHARPNESS; + Variants.push_back(Info); + } + + virtual void CreateSuperResolution(const SuperResolutionDesc& Desc, const SuperResolutionInfo& Info, ISuperResolution** ppUpscaler) override final + { + auto& Pipelines = GetOrCreatePipelines(Desc.OutputFormat); + auto* pUpscaler = NEW_RC_OBJ(GetRawAllocator(), "SuperResolutionFSR instance", SuperResolutionFSR)(m_pDevice, Desc, Info, Pipelines.pEASU_PSO, Pipelines.pRCAS_PSO); + pUpscaler->QueryInterface(IID_SuperResolution, reinterpret_cast(ppUpscaler)); + } + +private: + struct PipelineData + { + RefCntAutoPtr pEASU_PSO; + RefCntAutoPtr pRCAS_PSO; + }; + + PipelineData& GetOrCreatePipelines(TEXTURE_FORMAT OutputFormat); + + RefCntAutoPtr m_pDevice; + RefCntAutoPtr m_pShaderSourceFactory; + RefCntAutoPtr m_pVS; + RefCntAutoPtr m_pEASU_PS; + RefCntAutoPtr m_pRCAS_PS; + std::unordered_map> m_PipelineCache; +}; + + +FSRProvider::FSRProvider(IRenderDevice* pDevice) : + m_pDevice{pDevice} +{ + MemoryShaderSourceFactoryCreateInfo CI{HLSL::g_FSRShaders, _countof(HLSL::g_FSRShaders)}; + CreateMemoryShaderSourceFactory(CI, &m_pShaderSourceFactory); + + ShaderMacroHelper Macros; + if (pDevice->GetDeviceInfo().Type != RENDER_DEVICE_TYPE_GLES) + Macros.AddShaderMacro("FSR_FEATURE_TEXTURE_GATHER", 1); + + auto CreateShader = [&](SHADER_TYPE Type, const char* Name, const char* EntryPoint, const char* FilePath, const ShaderMacroArray& ShaderMacros = {}) { + ShaderCreateInfo ShaderCI; + ShaderCI.SourceLanguage = SHADER_SOURCE_LANGUAGE_HLSL; + ShaderCI.Desc.ShaderType = Type; + ShaderCI.Desc.Name = Name; + ShaderCI.EntryPoint = EntryPoint; + ShaderCI.FilePath = FilePath; + ShaderCI.Macros = ShaderMacros; + ShaderCI.pShaderSourceStreamFactory = m_pShaderSourceFactory; + RefCntAutoPtr pShader; + pDevice->CreateShader(ShaderCI, &pShader); + return pShader; + }; + + m_pVS = CreateShader(SHADER_TYPE_VERTEX, "FSR FullQuad VS", "FSR_FullQuadVS", "FSR_FullQuad.fx"); + m_pEASU_PS = CreateShader(SHADER_TYPE_PIXEL, "FSR EASU PS", "ComputeEdgeAdaptiveUpsamplingPS", "FSR_EdgeAdaptiveUpsampling.fx", Macros); + m_pRCAS_PS = CreateShader(SHADER_TYPE_PIXEL, "FSR RCAS PS", "ComputeContrastAdaptiveSharpeningPS", "FSR_ContrastAdaptiveSharpening.fx"); +} + +FSRProvider::PipelineData& FSRProvider::GetOrCreatePipelines(TEXTURE_FORMAT OutputFormat) +{ + auto It = m_PipelineCache.find(OutputFormat); + if (It != m_PipelineCache.end()) + return It->second; + + PipelineData& Data = m_PipelineCache[OutputFormat]; + + { + PipelineResourceLayoutDescX ResourceLayout; + ResourceLayout + .SetDefaultVariableType(SHADER_RESOURCE_VARIABLE_TYPE_DYNAMIC) + .AddVariable(SHADER_TYPE_PIXEL, "cbFSRAttribs", SHADER_RESOURCE_VARIABLE_TYPE_MUTABLE) + .AddImmutableSampler(SHADER_TYPE_PIXEL, "g_TextureSource_sampler", Sam_PointClamp); + + GraphicsPipelineStateCreateInfoX PSOCreateInfo{"FSR::EASU PSO"}; + PSOCreateInfo + .AddShader(m_pVS) + .AddShader(m_pEASU_PS) + .AddRenderTarget(OutputFormat) + .SetRasterizerDesc(RasterizerStateDesc{FILL_MODE_SOLID, CULL_MODE_NONE}) + .SetDepthStencilDesc(DepthStencilStateDesc{False, False}) + .SetResourceLayout(ResourceLayout); + + m_pDevice->CreateGraphicsPipelineState(PSOCreateInfo, &Data.pEASU_PSO); + } + + { + PipelineResourceLayoutDescX ResourceLayout; + ResourceLayout + .SetDefaultVariableType(SHADER_RESOURCE_VARIABLE_TYPE_DYNAMIC) + .AddVariable(SHADER_TYPE_PIXEL, "cbFSRAttribs", SHADER_RESOURCE_VARIABLE_TYPE_MUTABLE) + .AddVariable(SHADER_TYPE_PIXEL, "g_TextureSource", SHADER_RESOURCE_VARIABLE_TYPE_MUTABLE); + + GraphicsPipelineStateCreateInfoX PSOCreateInfo{"FSR::RCAS PSO"}; + PSOCreateInfo + .AddShader(m_pVS) + .AddShader(m_pRCAS_PS) + .AddRenderTarget(OutputFormat) + .SetRasterizerDesc(RasterizerStateDesc{FILL_MODE_SOLID, CULL_MODE_NONE}) + .SetDepthStencilDesc(DepthStencilStateDesc{False, False}) + .SetResourceLayout(ResourceLayout); + + m_pDevice->CreateGraphicsPipelineState(PSOCreateInfo, &Data.pRCAS_PSO); + } + + return Data; +} + +} // anonymous namespace + + +std::unique_ptr CreateFSRProvider(IRenderDevice* pDevice) +{ + return std::make_unique(pDevice); +} + +} // namespace Diligent diff --git a/Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp b/Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp index d017aee3fc..52dfe2870d 100644 --- a/Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp +++ b/Tests/DiligentCoreAPITest/src/SuperResolutionTest.cpp @@ -395,7 +395,8 @@ TEST(SuperResolution_CInterface, SuperResolution) QueryAttribs.OutputHeight = 1080; QueryAttribs.OptimizationType = SUPER_RESOLUTION_OPTIMIZATION_TYPE_BALANCED; QueryAttribs.OutputFormat = TEX_FORMAT_RGBA16_FLOAT; - QueryAttribs.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; + if (Variants[0].Type == SUPER_RESOLUTION_TYPE_TEMPORAL) + QueryAttribs.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; SuperResolutionSourceSettings SourceSettings; pFactory->GetSourceSettings(QueryAttribs, SourceSettings); @@ -409,9 +410,12 @@ TEST(SuperResolution_CInterface, SuperResolution) Desc.InputWidth = SourceSettings.OptimalInputWidth; Desc.InputHeight = SourceSettings.OptimalInputHeight; Desc.ColorFormat = TEX_FORMAT_RGBA16_FLOAT; - Desc.DepthFormat = TEX_FORMAT_R32_FLOAT; - Desc.MotionFormat = TEX_FORMAT_RG16_FLOAT; - Desc.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; + if (Variants[0].Type == SUPER_RESOLUTION_TYPE_TEMPORAL) + { + Desc.DepthFormat = TEX_FORMAT_R32_FLOAT; + Desc.MotionFormat = TEX_FORMAT_RG16_FLOAT; + Desc.Flags = SUPER_RESOLUTION_FLAG_AUTO_EXPOSURE; + } RefCntAutoPtr pUpscaler; pFactory->CreateSuperResolution(Desc, &pUpscaler);