initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled

This commit is contained in:
2025-09-16 20:46:46 -04:00
commit 9d30169a8d
13378 changed files with 7050105 additions and 0 deletions

39
drivers/metal/README.md Normal file
View File

@@ -0,0 +1,39 @@
# Metal Rendering Device
This document aims to describe the Metal rendering device implementation in Godot.
## Future work / ideas
* Use placement heaps
* Explicit hazard tracking
* [MetalFX] upscaling support?
## Acknowledgments
The Metal rendering owes a lot to the work of the [MoltenVK] project, which is a Vulkan implementation on top of Metal.
In accordance with the Apache 2.0 license, the following copyright notices have been included where applicable:
```
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
/* */
/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
/* (http://www.brenwill.com) */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
/* implied. See the License for the specific language governing */
/* permissions and limitations under the License. */
/**************************************************************************/
```
[MoltenVK]: https://github.com/KhronosGroup/MoltenVK
[MetalFX]: https://developer.apple.com/documentation/metalfx?language=objc

52
drivers/metal/SCsub Normal file
View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python
from misc.utility.scons_hints import *
Import("env")
env_metal = env.Clone()
# Thirdparty source files
thirdparty_obj = []
thirdparty_dir = "#thirdparty/spirv-cross/"
thirdparty_sources = [
"spirv_cfg.cpp",
"spirv_cross.cpp",
"spirv_parser.cpp",
"spirv_msl.cpp",
"spirv_reflect.cpp",
"spirv_glsl.cpp",
"spirv_cross_parsed_ir.cpp",
]
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
env_metal.Prepend(CPPEXTPATH=[thirdparty_dir, thirdparty_dir + "/include"])
# Must enable exceptions for SPIRV-Cross; otherwise, it will abort the process on errors.
if "-fno-exceptions" in env_metal["CXXFLAGS"]:
env_metal["CXXFLAGS"].remove("-fno-exceptions")
env_metal.Append(CXXFLAGS=["-fexceptions"])
env_thirdparty = env_metal.Clone()
env_thirdparty.disable_warnings()
env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
env_metal.drivers_sources += thirdparty_obj
# Enable C++20 for the Objective-C++ Metal code, which uses C++20 concepts.
if "-std=gnu++17" in env_metal["CXXFLAGS"]:
env_metal["CXXFLAGS"].remove("-std=gnu++17")
env_metal.Append(CXXFLAGS=["-std=gnu++20"])
# Enable module support
env_metal.Append(CCFLAGS=["-fmodules", "-fcxx-modules"])
# Driver source files
driver_obj = []
env_metal.add_source_files(driver_obj, "*.mm")
env.drivers_sources += driver_obj
# Needed to force rebuilding the driver files when the thirdparty library is updated.
env.Depends(driver_obj, thirdparty_obj)

View File

@@ -0,0 +1,122 @@
/**************************************************************************/
/* inflection_map.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#include "core/templates/hash_map.h"
#include "core/templates/local_vector.h"
/// An unordered map that splits elements between a fast-access vector of LinearCount consecutively
/// indexed elements, and a slower-access map holding sparse indexes larger than LinearCount.
///
/// \tparam KeyType is used to lookup values, and must be a type that is convertible to an unsigned integer.
/// \tparam ValueType must have an empty constructor (default or otherwise).
/// \tparam LinearCount
/// \tparam IndexType must be a type that is convertible to an unsigned integer (eg. uint8_t...uint64_t), and which is large enough to represent the number of values in this map.
template <typename KeyType, typename ValueType, size_t LinearCount, typename IndexType = uint16_t>
class InflectionMap {
public:
using value_type = ValueType;
class Iterator {
InflectionMap *map;
IndexType index;
public:
using iterator_category = std::forward_iterator_tag;
using value_type = ValueType;
using pointer = value_type *;
using reference = value_type &;
Iterator() :
map(nullptr), index(0) {}
Iterator(InflectionMap &p_m, const IndexType p_i) :
map(&p_m), index(p_i) {}
Iterator &operator=(const Iterator &p_it) {
map = p_it.map;
index = p_it.index;
return *this;
}
ValueType *operator->() { return &map->_values[index]; }
ValueType &operator*() { return map->_values[index]; }
operator ValueType *() { return &map->_values[index]; }
bool operator==(const Iterator &p_it) const { return map == p_it.map && index == p_it.index; }
bool operator!=(const Iterator &p_it) const { return map != p_it.map || index != p_it.index; }
Iterator &operator++() {
index++;
return *this;
}
Iterator operator++(int) {
Iterator t = *this;
index++;
return t;
}
bool is_valid() const { return index < map->_values.size(); }
};
const ValueType &operator[](const KeyType p_idx) const { return get_value(p_idx); }
ValueType &operator[](const KeyType p_idx) { return get_value(p_idx); }
Iterator begin() { return Iterator(*this, 0); }
Iterator end() { return Iterator(*this, _values.size()); }
bool is_empty() { return _values.is_empty(); }
size_t size() { return _values.size(); }
void reserve(size_t p_new_cap) { _values.reserve(p_new_cap); }
protected:
static constexpr IndexType INVALID = std::numeric_limits<IndexType>::max();
typedef struct IndexValue {
IndexType value = INVALID;
} IndexValue;
// Returns a reference to the value at the index.
// If the index has not been initialized, add an empty element at
// the end of the values array, and set the index to its position.
ValueType &get_value(KeyType p_idx) {
IndexValue *val_idx = p_idx < LinearCount ? &_linear_indexes[p_idx] : _inflection_indexes.getptr(p_idx);
if (val_idx == nullptr || val_idx->value == INVALID) {
_values.push_back({});
if (val_idx == nullptr) {
val_idx = &_inflection_indexes.insert(p_idx, {})->value;
}
val_idx->value = _values.size() - 1;
}
return _values[val_idx->value];
}
TightLocalVector<ValueType> _values;
HashMap<KeyType, IndexValue> _inflection_indexes;
IndexValue _linear_indexes[LinearCount];
};

View File

@@ -0,0 +1,162 @@
/**************************************************************************/
/* metal_device_properties.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
/* */
/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
/* (http://www.brenwill.com) */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
/* implied. See the License for the specific language governing */
/* permissions and limitations under the License. */
/**************************************************************************/
#import "servers/rendering/rendering_device.h"
#import <Foundation/Foundation.h>
#import <Metal/Metal.h>
/** The buffer index to use for vertex content. */
const static uint32_t VERT_CONTENT_BUFFER_INDEX = 0;
const static uint32_t MAX_COLOR_ATTACHMENT_COUNT = 8;
typedef NS_OPTIONS(NSUInteger, SampleCount) {
SampleCount1 = (1UL << 0),
SampleCount2 = (1UL << 1),
SampleCount4 = (1UL << 2),
SampleCount8 = (1UL << 3),
SampleCount16 = (1UL << 4),
SampleCount32 = (1UL << 5),
SampleCount64 = (1UL << 6),
};
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
uint32_t mslVersionMajor = 0;
uint32_t mslVersionMinor = 0;
MTLGPUFamily highestFamily = MTLGPUFamilyApple4;
bool supportsBCTextureCompression = false;
bool supportsDepth24Stencil8 = false;
bool supports32BitFloatFiltering = false;
bool supports32BitMSAA = false;
bool supportsMac = TARGET_OS_OSX;
MTLLanguageVersion mslVersionEnum = MTLLanguageVersion1_2;
SampleCount supportedSampleCounts = SampleCount1;
long hostMemoryPageSize = 0;
bool layeredRendering = false;
bool multisampleLayeredRendering = false;
bool quadPermute = false; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
bool simdPermute = false; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
bool simdReduction = false; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
bool tessellationShader = false; /**< If true, tessellation shaders are supported. */
bool imageCubeArray = false; /**< If true, image cube arrays are supported. */
MTLArgumentBuffersTier argument_buffers_tier = MTLArgumentBuffersTier1;
/// If true, argument encoders are required to encode arguments into an argument buffer.
bool needs_arg_encoders = true;
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported by the GPU. */
bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported by the GPU. */
bool supports_native_image_atomics = false; /**< If true, native image atomic operations are supported by the OS. */
};
struct MetalLimits {
uint64_t maxImageArrayLayers;
uint64_t maxFramebufferHeight;
uint64_t maxFramebufferWidth;
uint64_t maxImageDimension1D;
uint64_t maxImageDimension2D;
uint64_t maxImageDimension3D;
uint64_t maxImageDimensionCube;
uint64_t maxViewportDimensionX;
uint64_t maxViewportDimensionY;
MTLSize maxThreadsPerThreadGroup;
MTLSize maxComputeWorkGroupCount;
uint64_t maxBoundDescriptorSets;
uint64_t maxColorAttachments;
uint64_t maxTexturesPerArgumentBuffer;
uint64_t maxSamplersPerArgumentBuffer;
uint64_t maxBuffersPerArgumentBuffer;
uint64_t maxBufferLength;
uint64_t minUniformBufferOffsetAlignment;
uint64_t maxVertexDescriptorLayoutStride;
uint16_t maxViewports;
uint32_t maxPerStageBufferCount; /**< The total number of per-stage Metal buffers available for shader uniform content and attributes. */
uint32_t maxPerStageTextureCount; /**< The total number of per-stage Metal textures available for shader uniform content. */
uint32_t maxPerStageSamplerCount; /**< The total number of per-stage Metal samplers available for shader uniform content. */
uint32_t maxVertexInputAttributes;
uint32_t maxVertexInputBindings;
uint32_t maxVertexInputBindingStride;
uint32_t maxDrawIndexedIndexValue;
uint32_t maxShaderVaryings;
uint32_t maxThreadGroupMemoryAllocation;
double temporalScalerInputContentMinScale;
double temporalScalerInputContentMaxScale;
uint32_t minSubgroupSize; /**< The minimum number of threads in a SIMD-group. */
uint32_t maxSubgroupSize; /**< The maximum number of threads in a SIMD-group. */
BitField<RDD::ShaderStage> subgroupSupportedShaderStages;
BitField<RD::SubgroupOperations> subgroupSupportedOperations; /**< The subgroup operations supported by the device. */
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalDeviceProperties {
private:
void init_features(id<MTLDevice> p_device);
void init_limits(id<MTLDevice> p_device);
void init_os_props();
public:
MetalFeatures features;
MetalLimits limits;
// maj * 10000 + min * 100 + patch
uint32_t os_version;
SampleCount find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const;
MetalDeviceProperties(id<MTLDevice> p_device);
~MetalDeviceProperties();
private:
static const SampleCount sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX];
};

View File

@@ -0,0 +1,358 @@
/**************************************************************************/
/* metal_device_properties.mm */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
/* */
/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
/* (http://www.brenwill.com) */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
/* implied. See the License for the specific language governing */
/* permissions and limitations under the License. */
/**************************************************************************/
#import "metal_device_properties.h"
#include "servers/rendering/renderer_rd/effects/metal_fx.h"
#import <Metal/Metal.h>
#import <MetalFX/MetalFX.h>
#import <spirv_cross.hpp>
#import <spirv_msl.hpp>
// Common scaling multipliers.
#define KIBI (1024)
#define MEBI (KIBI * KIBI)
#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000)
#define MTLGPUFamilyApple9 (MTLGPUFamily)1009
#endif
API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(1.0))
MTLGPUFamily &operator--(MTLGPUFamily &p_family) {
p_family = static_cast<MTLGPUFamily>(static_cast<int>(p_family) - 1);
if (p_family < MTLGPUFamilyApple1) {
p_family = MTLGPUFamilyApple9;
}
return p_family;
}
void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
features = {};
features.highestFamily = MTLGPUFamilyApple1;
for (MTLGPUFamily family = MTLGPUFamilyApple9; family >= MTLGPUFamilyApple1; --family) {
if ([p_device supportsFamily:family]) {
features.highestFamily = family;
break;
}
}
if (@available(macOS 11, iOS 16.4, tvOS 16.4, *)) {
features.supportsBCTextureCompression = p_device.supportsBCTextureCompression;
} else {
features.supportsBCTextureCompression = false;
}
#if TARGET_OS_OSX
features.supportsDepth24Stencil8 = p_device.isDepth24Stencil8PixelFormatSupported;
#endif
if (@available(macOS 11.0, iOS 14.0, tvOS 14.0, *)) {
features.supports32BitFloatFiltering = p_device.supports32BitFloatFiltering;
features.supports32BitMSAA = p_device.supports32BitMSAA;
}
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.supports_gpu_address = true;
}
features.hostMemoryPageSize = sysconf(_SC_PAGESIZE);
for (SampleCount sc = SampleCount1; sc <= SampleCount64; sc <<= 1) {
if ([p_device supportsTextureSampleCount:sc]) {
features.supportedSampleCounts |= sc;
}
}
features.layeredRendering = [p_device supportsFamily:MTLGPUFamilyApple5];
features.multisampleLayeredRendering = [p_device supportsFamily:MTLGPUFamilyApple7];
features.tessellationShader = [p_device supportsFamily:MTLGPUFamilyApple3];
features.imageCubeArray = [p_device supportsFamily:MTLGPUFamilyApple3];
features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4];
features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
features.argument_buffers_tier = p_device.argumentBuffersSupport;
features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple9] || ([p_device supportsFamily:MTLGPUFamilyApple8] && [p_device supportsFamily:MTLGPUFamilyMac2]);
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) {
features.supports_native_image_atomics = true;
}
if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
features.supports_native_image_atomics = false;
}
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);
}
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.metal_fx_spatial = [MTLFXSpatialScalerDescriptor supportsDevice:p_device];
#ifdef METAL_MFXTEMPORAL_ENABLED
features.metal_fx_temporal = [MTLFXTemporalScalerDescriptor supportsDevice:p_device];
#else
features.metal_fx_temporal = false;
#endif
}
MTLCompileOptions *opts = [MTLCompileOptions new];
features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version.
features.mslVersionMajor = (opts.languageVersion >> 0x10) & 0xff;
features.mslVersionMinor = (opts.languageVersion >> 0x00) & 0xff;
}
void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {
using std::max;
using std::min;
// FST: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
// FST: Maximum number of layers per 1D texture array, 2D texture array, or 3D texture.
limits.maxImageArrayLayers = 2048;
if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
// FST: Maximum 2D texture width and height.
limits.maxFramebufferWidth = 16384;
limits.maxFramebufferHeight = 16384;
limits.maxViewportDimensionX = 16384;
limits.maxViewportDimensionY = 16384;
// FST: Maximum 1D texture width.
limits.maxImageDimension1D = 16384;
// FST: Maximum 2D texture width and height.
limits.maxImageDimension2D = 16384;
// FST: Maximum cube map texture width and height.
limits.maxImageDimensionCube = 16384;
} else {
// FST: Maximum 2D texture width and height.
limits.maxFramebufferWidth = 8192;
limits.maxFramebufferHeight = 8192;
limits.maxViewportDimensionX = 8192;
limits.maxViewportDimensionY = 8192;
// FST: Maximum 1D texture width.
limits.maxImageDimension1D = 8192;
// FST: Maximum 2D texture width and height.
limits.maxImageDimension2D = 8192;
// FST: Maximum cube map texture width and height.
limits.maxImageDimensionCube = 8192;
}
// FST: Maximum 3D texture width, height, and depth.
limits.maxImageDimension3D = 2048;
limits.maxThreadsPerThreadGroup = p_device.maxThreadsPerThreadgroup;
// No effective limits.
limits.maxComputeWorkGroupCount = { std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max() };
// https://github.com/KhronosGroup/MoltenVK/blob/568cc3acc0e2299931fdaecaaa1fc3ec5b4af281/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h#L85
limits.maxBoundDescriptorSets = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;
// FST: Maximum number of color render targets per render pass descriptor.
limits.maxColorAttachments = 8;
// Maximum number of textures the device can access, per stage, from an argument buffer.
if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
limits.maxTexturesPerArgumentBuffer = 1'000'000;
} else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
limits.maxTexturesPerArgumentBuffer = 96;
} else {
limits.maxTexturesPerArgumentBuffer = 31;
}
// Maximum number of samplers the device can access, per stage, from an argument buffer.
if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
limits.maxSamplersPerArgumentBuffer = 1024;
} else {
limits.maxSamplersPerArgumentBuffer = 16;
}
// Maximum number of buffers the device can access, per stage, from an argument buffer.
if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
limits.maxBuffersPerArgumentBuffer = std::numeric_limits<uint64_t>::max();
} else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
limits.maxBuffersPerArgumentBuffer = 96;
} else {
limits.maxBuffersPerArgumentBuffer = 31;
}
limits.minSubgroupSize = limits.maxSubgroupSize = 1;
// These values were taken from MoltenVK.
if (features.simdPermute) {
limits.minSubgroupSize = 4;
limits.maxSubgroupSize = 32;
} else if (features.quadPermute) {
limits.minSubgroupSize = limits.maxSubgroupSize = 4;
}
limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_COMPUTE_BIT);
if (features.tessellationShader) {
limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_TESSELATION_CONTROL_BIT);
}
limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_FRAGMENT_BIT);
limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BASIC_BIT);
if (features.simdPermute || features.quadPermute) {
limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_VOTE_BIT);
limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BALLOT_BIT);
limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_BIT);
limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT);
}
if (features.simdReduction) {
limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT);
}
if (features.quadPermute) {
limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_QUAD_BIT);
}
limits.maxBufferLength = p_device.maxBufferLength;
// FST: Maximum size of vertex descriptor layout stride.
limits.maxVertexDescriptorLayoutStride = std::numeric_limits<uint64_t>::max();
// Maximum number of viewports.
if ([p_device supportsFamily:MTLGPUFamilyApple5]) {
limits.maxViewports = 16;
} else {
limits.maxViewports = 1;
}
limits.maxPerStageBufferCount = 31;
limits.maxPerStageSamplerCount = 16;
if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
limits.maxPerStageTextureCount = 128;
} else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
limits.maxPerStageTextureCount = 96;
} else {
limits.maxPerStageTextureCount = 31;
}
limits.maxVertexInputAttributes = 31;
limits.maxVertexInputBindings = 31;
limits.maxVertexInputBindingStride = (2 * KIBI);
limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
limits.maxThreadGroupMemoryAllocation = 32768;
} else if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
limits.maxThreadGroupMemoryAllocation = 16384;
} else {
limits.maxThreadGroupMemoryAllocation = 16352;
}
#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
limits.minUniformBufferOffsetAlignment = 64;
#endif
#if TARGET_OS_OSX
// This is Apple Silicon specific.
limits.minUniformBufferOffsetAlignment = 16;
#endif
limits.maxDrawIndexedIndexValue = std::numeric_limits<uint32_t>::max() - 1;
#ifdef METAL_MFXTEMPORAL_ENABLED
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
limits.temporalScalerInputContentMinScale = (double)[MTLFXTemporalScalerDescriptor supportedInputContentMinScaleForDevice:p_device];
limits.temporalScalerInputContentMaxScale = (double)[MTLFXTemporalScalerDescriptor supportedInputContentMaxScaleForDevice:p_device];
} else {
// Defaults taken from macOS 14+
limits.temporalScalerInputContentMinScale = 1.0;
limits.temporalScalerInputContentMaxScale = 3.0;
}
#else
// Defaults taken from macOS 14+
limits.temporalScalerInputContentMinScale = 1.0;
limits.temporalScalerInputContentMaxScale = 3.0;
#endif
}
void MetalDeviceProperties::init_os_props() {
NSOperatingSystemVersion ver = NSProcessInfo.processInfo.operatingSystemVersion;
os_version = (uint32_t)ver.majorVersion * 10000 + (uint32_t)ver.minorVersion * 100 + (uint32_t)ver.patchVersion;
}
MetalDeviceProperties::MetalDeviceProperties(id<MTLDevice> p_device) {
init_features(p_device);
init_limits(p_device);
init_os_props();
}
MetalDeviceProperties::~MetalDeviceProperties() {
}
SampleCount MetalDeviceProperties::find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const {
SampleCount supported = features.supportedSampleCounts;
if (supported & sample_count[p_samples]) {
return sample_count[p_samples];
}
SampleCount requested_sample_count = sample_count[p_samples];
// Find the nearest supported sample count.
while (requested_sample_count > SampleCount1) {
if (supported & requested_sample_count) {
return requested_sample_count;
}
requested_sample_count = (SampleCount)(requested_sample_count >> 1);
}
return SampleCount1;
}
// region static members
const SampleCount MetalDeviceProperties::sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX] = {
SampleCount1,
SampleCount2,
SampleCount4,
SampleCount8,
SampleCount16,
SampleCount32,
SampleCount64,
};
// endregion

View File

@@ -0,0 +1,983 @@
/**************************************************************************/
/* metal_objects.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
/* */
/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
/* (http://www.brenwill.com) */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
/* implied. See the License for the specific language governing */
/* permissions and limitations under the License. */
/**************************************************************************/
#import "metal_device_properties.h"
#import "metal_utils.h"
#import "pixel_formats.h"
#import "sha256_digest.h"
#include "servers/rendering/rendering_device_driver.h"
#import <CommonCrypto/CommonDigest.h>
#import <Foundation/Foundation.h>
#import <Metal/Metal.h>
#import <QuartzCore/CAMetalLayer.h>
#import <simd/simd.h>
#import <zlib.h>
#import <initializer_list>
#import <optional>
// These types can be used in Vector and other containers that use
// pointer operations not supported by ARC.
namespace MTL {
#define MTL_CLASS(name) \
class name { \
public: \
name(id<MTL##name> obj = nil) : m_obj(obj) {} \
operator id<MTL##name>() const { \
return m_obj; \
} \
id<MTL##name> m_obj; \
};
MTL_CLASS(Texture)
} //namespace MTL
enum ShaderStageUsage : uint32_t {
None = 0,
Vertex = RDD::SHADER_STAGE_VERTEX_BIT,
Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT,
TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT,
TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT,
Compute = RDD::SHADER_STAGE_COMPUTE_BIT,
};
_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) {
p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b));
return p_a;
}
enum StageResourceUsage : uint32_t {
VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2),
VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2),
FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2),
FragmentWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2),
TesselationControlRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),
TesselationControlWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),
TesselationEvaluationRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),
TesselationEvaluationWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),
ComputeRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2),
ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2),
};
typedef LocalVector<__unsafe_unretained id<MTLResource>> ResourceVector;
typedef HashMap<StageResourceUsage, ResourceVector> ResourceUsageMap;
enum class MDCommandBufferStateType {
None,
Render,
Compute,
Blit,
};
enum class MDPipelineType {
None,
Render,
Compute,
};
class MDRenderPass;
class MDPipeline;
class MDRenderPipeline;
class MDComputePipeline;
class MDFrameBuffer;
class RenderingDeviceDriverMetal;
class MDUniformSet;
class MDShader;
#pragma mark - Resource Factory
struct ClearAttKey {
const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT;
const static uint32_t DEPTH_INDEX = COLOR_COUNT;
const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;
const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;
enum Flags : uint16_t {
CLEAR_FLAGS_NONE = 0,
CLEAR_FLAGS_LAYERED = 1 << 0,
};
Flags flags = CLEAR_FLAGS_NONE;
uint16_t sample_count = 0;
uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 };
_FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; }
_FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; }
_FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; }
_FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; }
_FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; }
_FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; }
_FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; }
_FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; }
_FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const {
return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0;
}
uint32_t hash() const {
uint32_t h = hash_murmur3_one_32(flags);
h = hash_murmur3_one_32(sample_count, h);
h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h);
return hash_fmix32(h);
}
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceFactory {
private:
RenderingDeviceDriverMetal *device_driver;
id<MTLFunction> new_func(NSString *p_source, NSString *p_name, NSError **p_error);
id<MTLFunction> new_clear_vert_func(ClearAttKey &p_key);
id<MTLFunction> new_clear_frag_func(ClearAttKey &p_key);
NSString *get_format_type_string(MTLPixelFormat p_fmt);
public:
id<MTLRenderPipelineState> new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error);
id<MTLDepthStencilState> new_depth_stencil_state(bool p_use_depth, bool p_use_stencil);
MDResourceFactory(RenderingDeviceDriverMetal *p_device_driver) :
device_driver(p_device_driver) {}
~MDResourceFactory() = default;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceCache {
private:
typedef HashMap<ClearAttKey, id<MTLRenderPipelineState>, HashableHasher<ClearAttKey>> HashMap;
std::unique_ptr<MDResourceFactory> resource_factory;
HashMap clear_states;
struct {
id<MTLDepthStencilState> all;
id<MTLDepthStencilState> depth_only;
id<MTLDepthStencilState> stencil_only;
id<MTLDepthStencilState> none;
} clear_depth_stencil_state;
public:
id<MTLRenderPipelineState> get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error);
id<MTLDepthStencilState> get_depth_stencil_state(bool p_use_depth, bool p_use_stencil);
explicit MDResourceCache(RenderingDeviceDriverMetal *p_device_driver) :
resource_factory(new MDResourceFactory(p_device_driver)) {}
~MDResourceCache() = default;
};
enum class MDAttachmentType : uint8_t {
None = 0,
Color = 1 << 0,
Depth = 1 << 1,
Stencil = 1 << 2,
};
_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) {
flags::set(p_a, p_b);
return p_a;
}
_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) {
return uint8_t(p_a) & uint8_t(p_b);
}
struct MDSubpass {
uint32_t subpass_index = 0;
uint32_t view_count = 0;
LocalVector<RDD::AttachmentReference> input_references;
LocalVector<RDD::AttachmentReference> color_references;
RDD::AttachmentReference depth_stencil_reference;
LocalVector<RDD::AttachmentReference> resolve_references;
MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const;
};
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDAttachment {
private:
uint32_t index = 0;
uint32_t firstUseSubpassIndex = 0;
uint32_t lastUseSubpassIndex = 0;
public:
MTLPixelFormat format = MTLPixelFormatInvalid;
MDAttachmentType type = MDAttachmentType::None;
MTLLoadAction loadAction = MTLLoadActionDontCare;
MTLStoreAction storeAction = MTLStoreActionDontCare;
MTLLoadAction stencilLoadAction = MTLLoadActionDontCare;
MTLStoreAction stencilStoreAction = MTLStoreActionDontCare;
uint32_t samples = 1;
/*!
* @brief Returns true if this attachment is first used in the given subpass.
* @param p_subpass
* @return
*/
_FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const {
return p_subpass.subpass_index == firstUseSubpassIndex;
}
/*!
* @brief Returns true if this attachment is last used in the given subpass.
* @param p_subpass
* @return
*/
_FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const {
return p_subpass.subpass_index == lastUseSubpassIndex;
}
void linkToSubpass(MDRenderPass const &p_pass);
MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass,
bool p_is_rendering_entire_area,
bool p_has_resolve,
bool p_can_resolve,
bool p_is_stencil) const;
bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc,
PixelFormats &p_pf,
MDSubpass const &p_subpass,
id<MTLTexture> p_attachment,
bool p_is_rendering_entire_area,
bool p_has_resolve,
bool p_can_resolve,
bool p_is_stencil) const;
/** Returns whether this attachment should be cleared in the subpass. */
bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPass {
public:
Vector<MDAttachment> attachments;
Vector<MDSubpass> subpasses;
uint32_t get_sample_count() const {
return attachments.is_empty() ? 1 : attachments[0].samples;
}
MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses);
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
private:
#pragma mark - Common State
// From RenderingDevice
static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
RenderingDeviceDriverMetal *device_driver = nullptr;
id<MTLCommandQueue> queue = nil;
id<MTLCommandBuffer> commandBuffer = nil;
bool state_begin = false;
_FORCE_INLINE_ id<MTLCommandBuffer> command_buffer() {
DEV_ASSERT(state_begin);
if (commandBuffer == nil) {
commandBuffer = queue.commandBuffer;
}
return commandBuffer;
}
void _end_compute_dispatch();
void _end_blit();
#pragma mark - Render
void _render_set_dirty_state();
void _render_bind_uniform_sets();
void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects);
uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size);
void _end_render_pass();
void _render_clear_render_area();
#pragma mark - Compute
void _compute_set_dirty_state();
void _compute_bind_uniform_sets();
public:
MDCommandBufferStateType type = MDCommandBufferStateType::None;
struct RenderState {
MDRenderPass *pass = nullptr;
MDFrameBuffer *frameBuffer = nullptr;
MDRenderPipeline *pipeline = nullptr;
LocalVector<RDD::RenderPassClearValue> clear_values;
LocalVector<MTLViewport> viewports;
LocalVector<MTLScissorRect> scissors;
std::optional<Color> blend_constants;
uint32_t current_subpass = UINT32_MAX;
Rect2i render_area = {};
bool is_rendering_entire_area = false;
MTLRenderPassDescriptor *desc = nil;
id<MTLRenderCommandEncoder> encoder = nil;
id<MTLBuffer> __unsafe_unretained index_buffer = nil; // Buffer is owned by RDD.
MTLIndexType index_type = MTLIndexTypeUInt16;
uint32_t index_offset = 0;
LocalVector<id<MTLBuffer> __unsafe_unretained> vertex_buffers;
LocalVector<NSUInteger> vertex_offsets;
ResourceUsageMap resource_usage;
// clang-format off
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
DIRTY_PIPELINE = 1 << 0, //! pipeline state
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
DIRTY_PUSH = 1 << 2, //! push constants
DIRTY_DEPTH = 1 << 3, //! depth / stencil state
DIRTY_VERTEX = 1 << 4, //! vertex buffers
DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
DIRTY_BLEND = 1 << 7, //! blend state
DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
DIRTY_ALL = (1 << 9) - 1,
};
// clang-format on
BitField<DirtyFlag> dirty = DIRTY_NONE;
LocalVector<MDUniformSet *> uniform_sets;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[2] = { 0 };
_FORCE_INLINE_ void reset();
void end_encoding();
_ALWAYS_INLINE_ const MDSubpass &get_subpass() const {
DEV_ASSERT(pass != nullptr);
return pass->subpasses[current_subpass];
}
_FORCE_INLINE_ void mark_viewport_dirty() {
if (viewports.is_empty()) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_VIEWPORT);
}
_FORCE_INLINE_ void mark_scissors_dirty() {
if (scissors.is_empty()) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_SCISSOR);
}
_FORCE_INLINE_ void mark_vertex_dirty() {
if (vertex_buffers.is_empty()) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_VERTEX);
}
_FORCE_INLINE_ void mark_uniforms_dirty(std::initializer_list<uint32_t> l) {
if (uniform_sets.is_empty()) {
return;
}
for (uint32_t i : l) {
if (i < uniform_sets.size() && uniform_sets[i] != nullptr) {
uniform_set_mask |= 1 << i;
}
}
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_uniforms_dirty(void) {
if (uniform_sets.is_empty()) {
return;
}
for (uint32_t i = 0; i < uniform_sets.size(); i++) {
if (uniform_sets[i] != nullptr) {
uniform_set_mask |= 1 << i;
}
}
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
_FORCE_INLINE_ void mark_blend_dirty() {
if (!blend_constants.has_value()) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_BLEND);
}
MTLScissorRect clip_to_render_area(MTLScissorRect p_rect) const {
uint32_t raLeft = render_area.position.x;
uint32_t raRight = raLeft + render_area.size.width;
uint32_t raBottom = render_area.position.y;
uint32_t raTop = raBottom + render_area.size.height;
p_rect.x = CLAMP(p_rect.x, raLeft, MAX(raRight - 1, raLeft));
p_rect.y = CLAMP(p_rect.y, raBottom, MAX(raTop - 1, raBottom));
p_rect.width = MIN(p_rect.width, raRight - p_rect.x);
p_rect.height = MIN(p_rect.height, raTop - p_rect.y);
return p_rect;
}
Rect2i clip_to_render_area(Rect2i p_rect) const {
int32_t raLeft = render_area.position.x;
int32_t raRight = raLeft + render_area.size.width;
int32_t raBottom = render_area.position.y;
int32_t raTop = raBottom + render_area.size.height;
p_rect.position.x = CLAMP(p_rect.position.x, raLeft, MAX(raRight - 1, raLeft));
p_rect.position.y = CLAMP(p_rect.position.y, raBottom, MAX(raTop - 1, raBottom));
p_rect.size.width = MIN(p_rect.size.width, raRight - p_rect.position.x);
p_rect.size.height = MIN(p_rect.size.height, raTop - p_rect.position.y);
return p_rect;
}
} render;
// State specific for a compute pass.
struct ComputeState {
MDComputePipeline *pipeline = nullptr;
id<MTLComputeCommandEncoder> encoder = nil;
ResourceUsageMap resource_usage;
// clang-format off
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
DIRTY_PIPELINE = 1 << 0, //! pipeline state
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
DIRTY_PUSH = 1 << 2, //! push constants
DIRTY_ALL = (1 << 3) - 1,
};
// clang-format on
BitField<DirtyFlag> dirty = DIRTY_NONE;
LocalVector<MDUniformSet *> uniform_sets;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[1] = { 0 };
_FORCE_INLINE_ void reset();
void end_encoding();
_FORCE_INLINE_ void mark_uniforms_dirty(void) {
if (uniform_sets.is_empty()) {
return;
}
for (uint32_t i = 0; i < uniform_sets.size(); i++) {
if (uniform_sets[i] != nullptr) {
uniform_set_mask |= 1 << i;
}
}
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
} compute;
// State specific to a blit pass.
struct {
id<MTLBlitCommandEncoder> encoder = nil;
_FORCE_INLINE_ void reset() {
encoder = nil;
}
} blit;
_FORCE_INLINE_ id<MTLCommandBuffer> get_command_buffer() const {
return commandBuffer;
}
void begin();
void commit();
void end();
id<MTLBlitCommandEncoder> blit_command_encoder();
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
void bind_pipeline(RDD::PipelineID p_pipeline);
void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
#pragma mark - Render Commands
void render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
void render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
void render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects);
void render_set_viewport(VectorView<Rect2i> p_viewports);
void render_set_scissor(VectorView<Rect2i> p_scissors);
void render_set_blend_constants(const Color &p_constants);
void render_begin_pass(RDD::RenderPassID p_render_pass,
RDD::FramebufferID p_frameBuffer,
RDD::CommandBufferType p_cmd_buffer_type,
const Rect2i &p_rect,
VectorView<RDD::RenderPassClearValue> p_clear_values);
void render_next_subpass();
void render_draw(uint32_t p_vertex_count,
uint32_t p_instance_count,
uint32_t p_base_vertex,
uint32_t p_first_instance);
void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets);
void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset);
void render_draw_indexed(uint32_t p_index_count,
uint32_t p_instance_count,
uint32_t p_first_index,
int32_t p_vertex_offset,
uint32_t p_first_instance);
void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride);
void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride);
void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride);
void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride);
void render_end_pass();
#pragma mark - Compute Commands
void compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
void compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset);
MDCommandBuffer(id<MTLCommandQueue> p_queue, RenderingDeviceDriverMetal *p_device_driver) :
device_driver(p_device_driver), queue(p_queue) {
type = MDCommandBufferStateType::None;
}
MDCommandBuffer() = default;
};
#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000)
#define MTLBindingAccess MTLArgumentAccess
#define MTLBindingAccessReadOnly MTLArgumentAccessReadOnly
#define MTLBindingAccessReadWrite MTLArgumentAccessReadWrite
#define MTLBindingAccessWriteOnly MTLArgumentAccessWriteOnly
#endif
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) BindingInfo {
MTLDataType dataType = MTLDataTypeNone;
uint32_t index = 0;
MTLBindingAccess access = MTLBindingAccessReadOnly;
MTLResourceUsage usage = 0;
MTLTextureType textureType = MTLTextureType2D;
int imageFormat = 0;
uint32_t arrayLength = 0;
bool isMultisampled = false;
inline MTLArgumentDescriptor *new_argument_descriptor() const {
MTLArgumentDescriptor *desc = MTLArgumentDescriptor.argumentDescriptor;
desc.dataType = dataType;
desc.index = index;
desc.access = access;
desc.textureType = textureType;
desc.arrayLength = arrayLength;
return desc;
}
};
using RDC = RenderingDeviceCommons;
typedef API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) HashMap<RDC::ShaderStage, BindingInfo> BindingInfoMap;
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo {
uint32_t binding;
ShaderStageUsage active_stages = None;
BindingInfoMap bindings;
BindingInfoMap bindings_secondary;
};
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSet {
LocalVector<UniformInfo> uniforms;
uint32_t buffer_size = 0;
HashMap<RDC::ShaderStage, uint32_t> offsets;
HashMap<RDC::ShaderStage, id<MTLArgumentEncoder>> encoders;
};
struct ShaderCacheEntry;
enum class ShaderLoadStrategy {
IMMEDIATE,
LAZY,
/// The default strategy is to load the shader immediately.
DEFAULT = IMMEDIATE,
};
/// A Metal shader library.
@interface MDLibrary : NSObject {
ShaderCacheEntry *_entry;
NSString *_original_source;
};
- (id<MTLLibrary>)library;
- (NSError *)error;
- (void)setLabel:(NSString *)label;
#ifdef DEV_ENABLED
- (NSString *)originalSource;
#endif
+ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
device:(id<MTLDevice>)device
source:(NSString *)source
options:(MTLCompileOptions *)options
strategy:(ShaderLoadStrategy)strategy;
+ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
device:(id<MTLDevice>)device
#ifdef DEV_ENABLED
source:(NSString *)source
#endif
data:(dispatch_data_t)data;
@end
template <>
struct HashMapComparatorDefault<SHA256Digest> {
static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) {
return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0;
}
};
/// A cache entry for a Metal shader library.
struct ShaderCacheEntry {
RenderingDeviceDriverMetal &owner;
/// A hash of the Metal shader source code.
SHA256Digest key;
CharString name;
RD::ShaderStage stage = RD::SHADER_STAGE_VERTEX;
/// This reference must be weak, to ensure that when the last strong reference to the library
/// is released, the cache entry is freed.
MDLibrary *__weak library = nil;
/// Notify the cache that this entry is no longer needed.
void notify_free() const;
ShaderCacheEntry(RenderingDeviceDriverMetal &p_owner, SHA256Digest p_key) :
owner(p_owner), key(p_key) {
}
~ShaderCacheEntry() = default;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader {
public:
CharString name;
Vector<UniformSet> sets;
bool uses_argument_buffers = true;
MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
virtual ~MDShader() = default;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
public:
struct {
int32_t binding = -1;
uint32_t size = 0;
} push_constants;
MTLSize local = {};
MDLibrary *kernel;
MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderShader final : public MDShader {
public:
struct {
struct {
int32_t binding = -1;
uint32_t size = 0;
} vert;
struct {
int32_t binding = -1;
uint32_t size = 0;
} frag;
} push_constants;
bool needs_view_mask_buffer = false;
MDLibrary *vert;
MDLibrary *frag;
MDRenderShader(CharString p_name,
Vector<UniformSet> p_sets,
bool p_needs_view_mask_buffer,
bool p_uses_argument_buffers,
MDLibrary *p_vert, MDLibrary *p_frag);
};
_FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) {
p_a = StageResourceUsage(uint32_t(p_a) | p_b);
return p_a;
}
_FORCE_INLINE_ StageResourceUsage stage_resource_usage(RDC::ShaderStage p_stage, MTLResourceUsage p_usage) {
return StageResourceUsage(p_usage << (p_stage * 2));
}
_FORCE_INLINE_ MTLResourceUsage resource_usage_for_stage(StageResourceUsage p_usage, RDC::ShaderStage p_stage) {
return MTLResourceUsage((p_usage >> (p_stage * 2)) & 0b11);
}
template <>
struct HashMapComparatorDefault<RDD::ShaderID> {
static bool compare(const RDD::ShaderID &p_lhs, const RDD::ShaderID &p_rhs) {
return p_lhs.id == p_rhs.id;
}
};
struct BoundUniformSet {
id<MTLBuffer> buffer;
ResourceUsageMap usage_to_resources;
/// Perform a 2-way merge each key of `ResourceVector` resources from this set into the
/// destination set.
///
/// Assumes the vectors of resources are sorted.
void merge_into(ResourceUsageMap &p_dst) const;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDUniformSet {
private:
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
public:
uint32_t index;
LocalVector<RDD::BoundUniform> uniforms;
HashMap<MDShader *, BoundUniformSet> bound_uniforms;
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index);
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index);
BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index);
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDPipeline {
public:
MDPipelineType type;
explicit MDPipeline(MDPipelineType p_type) :
type(p_type) {}
virtual ~MDPipeline() = default;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPipeline final : public MDPipeline {
public:
id<MTLRenderPipelineState> state = nil;
id<MTLDepthStencilState> depth_stencil = nil;
uint32_t push_constant_size = 0;
uint32_t push_constant_stages_mask = 0;
SampleCount sample_count = SampleCount1;
struct {
MTLCullMode cull_mode = MTLCullModeNone;
MTLTriangleFillMode fill_mode = MTLTriangleFillModeFill;
MTLDepthClipMode clip_mode = MTLDepthClipModeClip;
MTLWinding winding = MTLWindingClockwise;
MTLPrimitiveType render_primitive = MTLPrimitiveTypePoint;
struct {
bool enabled = false;
} depth_test;
struct {
bool enabled = false;
float depth_bias = 0.0;
float slope_scale = 0.0;
float clamp = 0.0;
_FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
if (!enabled) {
return;
}
[p_enc setDepthBias:depth_bias slopeScale:slope_scale clamp:clamp];
}
} depth_bias;
struct {
bool enabled = false;
uint32_t front_reference = 0;
uint32_t back_reference = 0;
_FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
if (!enabled) {
return;
}
[p_enc setStencilFrontReferenceValue:front_reference backReferenceValue:back_reference];
}
} stencil;
struct {
bool enabled = false;
float r = 0.0;
float g = 0.0;
float b = 0.0;
float a = 0.0;
_FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
//if (!enabled)
// return;
[p_enc setBlendColorRed:r green:g blue:b alpha:a];
}
} blend;
_FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
[p_enc setCullMode:cull_mode];
[p_enc setTriangleFillMode:fill_mode];
[p_enc setDepthClipMode:clip_mode];
[p_enc setFrontFacingWinding:winding];
depth_bias.apply(p_enc);
stencil.apply(p_enc);
blend.apply(p_enc);
}
} raster_state;
MDRenderShader *shader = nil;
MDRenderPipeline() :
MDPipeline(MDPipelineType::Render) {}
~MDRenderPipeline() final = default;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputePipeline final : public MDPipeline {
public:
id<MTLComputePipelineState> state = nil;
struct {
MTLSize local = {};
} compute_state;
MDComputeShader *shader = nil;
explicit MDComputePipeline(id<MTLComputePipelineState> p_state) :
MDPipeline(MDPipelineType::Compute), state(p_state) {}
~MDComputePipeline() final = default;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDFrameBuffer {
Vector<MTL::Texture> textures;
public:
Size2i size;
MDFrameBuffer(Vector<MTL::Texture> p_textures, Size2i p_size) :
textures(p_textures), size(p_size) {}
MDFrameBuffer() {}
/// Returns the texture at the given index.
_ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const {
return textures[p_idx];
}
/// Returns true if the texture at the given index is not nil.
_ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const {
return textures[p_idx] != nil;
}
/// Set the texture at the given index.
_ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) {
textures.write[p_idx] = p_texture;
}
/// Unset or nil the texture at the given index.
_ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) {
textures.write[p_idx] = nil;
}
/// Resizes buffers to the specified size.
_ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) {
textures.resize(p_size);
}
virtual ~MDFrameBuffer() = default;
};
// These functions are used to convert between Objective-C objects and
// the RIDs used by Godot, respecting automatic reference counting.
namespace rid {
// Converts an Objective-C object to a pointer, and incrementing the
// reference count.
_FORCE_INLINE_ void *owned(id p_id) {
return (__bridge_retained void *)p_id;
}
#define MAKE_ID(FROM, TO) \
_FORCE_INLINE_ TO make(FROM p_obj) { \
return TO(owned(p_obj)); \
}
MAKE_ID(id<MTLTexture>, RDD::TextureID)
MAKE_ID(id<MTLBuffer>, RDD::BufferID)
MAKE_ID(id<MTLSamplerState>, RDD::SamplerID)
MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID)
MAKE_ID(id<MTLCommandQueue>, RDD::CommandPoolID)
// Converts a pointer to an Objective-C object without changing the reference count.
_FORCE_INLINE_ auto get(RDD::ID p_id) {
return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil;
}
// Converts a pointer to an Objective-C object, and decrements the reference count.
_FORCE_INLINE_ auto release(RDD::ID p_id) {
return (__bridge_transfer ::id)(void *)p_id.id;
}
} // namespace rid

File diff suppressed because it is too large Load Diff

113
drivers/metal/metal_utils.h Normal file
View File

@@ -0,0 +1,113 @@
/**************************************************************************/
/* metal_utils.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#import <os/log.h>
#import <functional>
#pragma mark - Boolean flags
namespace flags {
/*! Sets the flags within the value parameter specified by the mask parameter. */
template <typename Tv, typename Tm>
void set(Tv &p_value, Tm p_mask) {
using T = std::underlying_type_t<Tv>;
p_value = static_cast<Tv>(static_cast<T>(p_value) | static_cast<T>(p_mask));
}
/*! Clears the flags within the value parameter specified by the mask parameter. */
template <typename Tv, typename Tm>
void clear(Tv &p_value, Tm p_mask) {
using T = std::underlying_type_t<Tv>;
p_value = static_cast<Tv>(static_cast<T>(p_value) & ~static_cast<T>(p_mask));
}
/*! Returns whether the specified value has any of the bits specified in mask set to 1. */
template <typename Tv, typename Tm>
static constexpr bool any(Tv p_value, const Tm p_mask) {
return ((p_value & p_mask) != 0);
}
/*! Returns whether the specified value has all of the bits specified in mask set to 1. */
template <typename Tv, typename Tm>
static constexpr bool all(Tv p_value, const Tm p_mask) {
return ((p_value & p_mask) == p_mask);
}
} //namespace flags
#pragma mark - Alignment and Offsets
static constexpr bool is_power_of_two(uint64_t p_value) {
return p_value && ((p_value & (p_value - 1)) == 0);
}
static constexpr uint64_t round_up_to_alignment(uint64_t p_value, uint64_t p_alignment) {
DEV_ASSERT(is_power_of_two(p_alignment));
if (p_alignment == 0) {
return p_value;
}
uint64_t mask = p_alignment - 1;
uint64_t aligned_value = (p_value + mask) & ~mask;
return aligned_value;
}
class Defer {
public:
Defer(std::function<void()> func) :
func_(func) {}
~Defer() { func_(); }
private:
std::function<void()> func_;
};
#define CONCAT_INTERNAL(x, y) x##y
#define CONCAT(x, y) CONCAT_INTERNAL(x, y)
#define DEFER const Defer &CONCAT(defer__, __LINE__) = Defer
extern os_log_t LOG_DRIVER;
// Used for dynamic tracing.
extern os_log_t LOG_INTERVALS;
_FORCE_INLINE_ static uint32_t make_msl_version(uint32_t p_major, uint32_t p_minor = 0, uint32_t p_patch = 0) {
return (p_major * 10000) + (p_minor * 100) + p_patch;
}
_FORCE_INLINE_ static void parse_msl_version(uint32_t p_version, uint32_t &r_major, uint32_t &r_minor) {
r_major = p_version / 10000;
r_minor = (p_version % 10000) / 100;
}

View File

@@ -0,0 +1,411 @@
/**************************************************************************/
/* pixel_formats.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
/**************************************************************************/
/* */
/* Portions of this code were derived from MoltenVK. */
/* */
/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
/* (http://www.brenwill.com) */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
/* implied. See the License for the specific language governing */
/* permissions and limitations under the License. */
/**************************************************************************/
#include "core/typedefs.h"
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations")
#import "inflection_map.h"
#import "metal_device_properties.h"
#include "servers/rendering/rendering_device.h"
#import <Metal/Metal.h>
#pragma mark -
#pragma mark Metal format capabilities
typedef enum : uint16_t {
kMTLFmtCapsNone = 0,
/*! The format can be used in a shader read operation. */
kMTLFmtCapsRead = (1 << 0),
/*! The format can be used in a shader filter operation during sampling. */
kMTLFmtCapsFilter = (1 << 1),
/*! The format can be used in a shader write operation. */
kMTLFmtCapsWrite = (1 << 2),
/*! The format can be used with atomic operations. */
kMTLFmtCapsAtomic = (1 << 3),
/*! The format can be used as a color attachment. */
kMTLFmtCapsColorAtt = (1 << 4),
/*! The format can be used as a depth-stencil attachment. */
kMTLFmtCapsDSAtt = (1 << 5),
/*! The format can be used with blend operations. */
kMTLFmtCapsBlend = (1 << 6),
/*! The format can be used as a destination for multisample antialias (MSAA) data. */
kMTLFmtCapsMSAA = (1 << 7),
/*! The format can be used as a resolve attachment. */
kMTLFmtCapsResolve = (1 << 8),
kMTLFmtCapsVertex = (1 << 9),
kMTLFmtCapsRF = (kMTLFmtCapsRead | kMTLFmtCapsFilter),
kMTLFmtCapsRC = (kMTLFmtCapsRead | kMTLFmtCapsColorAtt),
kMTLFmtCapsRCB = (kMTLFmtCapsRC | kMTLFmtCapsBlend),
kMTLFmtCapsRCM = (kMTLFmtCapsRC | kMTLFmtCapsMSAA),
kMTLFmtCapsRCMB = (kMTLFmtCapsRCM | kMTLFmtCapsBlend),
kMTLFmtCapsRWC = (kMTLFmtCapsRC | kMTLFmtCapsWrite),
kMTLFmtCapsRWCB = (kMTLFmtCapsRWC | kMTLFmtCapsBlend),
kMTLFmtCapsRWCM = (kMTLFmtCapsRWC | kMTLFmtCapsMSAA),
kMTLFmtCapsRWCMB = (kMTLFmtCapsRWCM | kMTLFmtCapsBlend),
kMTLFmtCapsRFCMRB = (kMTLFmtCapsRCMB | kMTLFmtCapsFilter | kMTLFmtCapsResolve),
kMTLFmtCapsRFWCMB = (kMTLFmtCapsRWCMB | kMTLFmtCapsFilter),
kMTLFmtCapsAll = (kMTLFmtCapsRFWCMB | kMTLFmtCapsResolve),
kMTLFmtCapsDRM = (kMTLFmtCapsDSAtt | kMTLFmtCapsRead | kMTLFmtCapsMSAA),
kMTLFmtCapsDRFM = (kMTLFmtCapsDRM | kMTLFmtCapsFilter),
kMTLFmtCapsDRMR = (kMTLFmtCapsDRM | kMTLFmtCapsResolve),
kMTLFmtCapsDRFMR = (kMTLFmtCapsDRFM | kMTLFmtCapsResolve),
kMTLFmtCapsChromaSubsampling = kMTLFmtCapsRF,
kMTLFmtCapsMultiPlanar = kMTLFmtCapsChromaSubsampling,
} MTLFmtCaps;
inline MTLFmtCaps operator|(MTLFmtCaps p_left, MTLFmtCaps p_right) {
return static_cast<MTLFmtCaps>(static_cast<uint32_t>(p_left) | p_right);
}
inline MTLFmtCaps &operator|=(MTLFmtCaps &p_left, MTLFmtCaps p_right) {
return (p_left = p_left | p_right);
}
#pragma mark -
#pragma mark Metal view classes
enum class MTLViewClass : uint8_t {
None,
Color8,
Color16,
Color32,
Color64,
Color128,
PVRTC_RGB_2BPP,
PVRTC_RGB_4BPP,
PVRTC_RGBA_2BPP,
PVRTC_RGBA_4BPP,
EAC_R11,
EAC_RG11,
EAC_RGBA8,
ETC2_RGB8,
ETC2_RGB8A1,
ASTC_4x4,
ASTC_5x4,
ASTC_5x5,
ASTC_6x5,
ASTC_6x6,
ASTC_8x5,
ASTC_8x6,
ASTC_8x8,
ASTC_10x5,
ASTC_10x6,
ASTC_10x8,
ASTC_10x10,
ASTC_12x10,
ASTC_12x12,
BC1_RGBA,
BC2_RGBA,
BC3_RGBA,
BC4_R,
BC5_RG,
BC6H_RGB,
BC7_RGBA,
Depth24_Stencil8,
Depth32_Stencil8,
BGRA10_XR,
BGR10_XR
};
#pragma mark -
#pragma mark Format descriptors
/** Enumerates the data type of a format. */
enum class MTLFormatType {
None, /**< Format type is unknown. */
ColorHalf, /**< A 16-bit floating point color. */
ColorFloat, /**< A 32-bit floating point color. */
ColorInt8, /**< A signed 8-bit integer color. */
ColorUInt8, /**< An unsigned 8-bit integer color. */
ColorInt16, /**< A signed 16-bit integer color. */
ColorUInt16, /**< An unsigned 16-bit integer color. */
ColorInt32, /**< A signed 32-bit integer color. */
ColorUInt32, /**< An unsigned 32-bit integer color. */
DepthStencil, /**< A depth and stencil value. */
Compressed, /**< A block-compressed color. */
};
struct Extent2D {
uint32_t width;
uint32_t height;
};
struct ComponentMapping {
RD::TextureSwizzle r = RD::TEXTURE_SWIZZLE_IDENTITY;
RD::TextureSwizzle g = RD::TEXTURE_SWIZZLE_IDENTITY;
RD::TextureSwizzle b = RD::TEXTURE_SWIZZLE_IDENTITY;
RD::TextureSwizzle a = RD::TEXTURE_SWIZZLE_IDENTITY;
};
/** Describes the properties of a DataFormat, including the corresponding Metal pixel and vertex format. */
struct DataFormatDesc {
RD::DataFormat dataFormat;
MTLPixelFormat mtlPixelFormat;
MTLPixelFormat mtlPixelFormatSubstitute;
MTLVertexFormat mtlVertexFormat;
MTLVertexFormat mtlVertexFormatSubstitute;
uint8_t chromaSubsamplingPlaneCount;
uint8_t chromaSubsamplingComponentBits;
Extent2D blockTexelSize;
uint32_t bytesPerBlock;
MTLFormatType formatType;
ComponentMapping componentMapping;
const char *name;
bool hasReportedSubstitution;
inline double bytesPerTexel() const { return (double)bytesPerBlock / (double)(blockTexelSize.width * blockTexelSize.height); }
inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid || chromaSubsamplingPlaneCount > 1); }
inline bool isSupportedOrSubstitutable() const { return isSupported() || (mtlPixelFormatSubstitute != MTLPixelFormatInvalid); }
inline bool vertexIsSupported() const { return (mtlVertexFormat != MTLVertexFormatInvalid); }
inline bool vertexIsSupportedOrSubstitutable() const { return vertexIsSupported() || (mtlVertexFormatSubstitute != MTLVertexFormatInvalid); }
bool needsSwizzle() const {
return (componentMapping.r != RD::TEXTURE_SWIZZLE_IDENTITY ||
componentMapping.g != RD::TEXTURE_SWIZZLE_IDENTITY ||
componentMapping.b != RD::TEXTURE_SWIZZLE_IDENTITY ||
componentMapping.a != RD::TEXTURE_SWIZZLE_IDENTITY);
}
};
/** Describes the properties of a MTLPixelFormat or MTLVertexFormat. */
struct MTLFormatDesc {
union {
MTLPixelFormat mtlPixelFormat;
MTLVertexFormat mtlVertexFormat;
};
RD::DataFormat dataFormat = RD::DATA_FORMAT_MAX;
MTLFmtCaps mtlFmtCaps;
MTLViewClass mtlViewClass;
MTLPixelFormat mtlPixelFormatLinear;
const char *name = nullptr;
inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid) && (mtlFmtCaps != kMTLFmtCapsNone); }
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) PixelFormats {
using DataFormat = RD::DataFormat;
public:
/** Returns whether the DataFormat is supported by the GPU bound to this instance. */
bool isSupported(DataFormat p_format);
/** Returns whether the DataFormat is supported by this implementation, or can be substituted by one that is. */
bool isSupportedOrSubstitutable(DataFormat p_format);
/** Returns whether the specified Metal MTLPixelFormat can be used as a depth format. */
_FORCE_INLINE_ bool isDepthFormat(MTLPixelFormat p_format) {
switch (p_format) {
case MTLPixelFormatDepth32Float:
case MTLPixelFormatDepth16Unorm:
case MTLPixelFormatDepth32Float_Stencil8:
#if TARGET_OS_OSX
case MTLPixelFormatDepth24Unorm_Stencil8:
#endif
return true;
default:
return false;
}
}
/** Returns whether the specified Metal MTLPixelFormat can be used as a stencil format. */
_FORCE_INLINE_ bool isStencilFormat(MTLPixelFormat p_format) {
switch (p_format) {
case MTLPixelFormatStencil8:
#if TARGET_OS_OSX
case MTLPixelFormatDepth24Unorm_Stencil8:
case MTLPixelFormatX24_Stencil8:
#endif
case MTLPixelFormatDepth32Float_Stencil8:
case MTLPixelFormatX32_Stencil8:
return true;
default:
return false;
}
}
/** Returns whether the specified Metal MTLPixelFormat is a PVRTC format. */
bool isPVRTCFormat(MTLPixelFormat p_format);
/** Returns the format type corresponding to the specified Godot pixel format, */
MTLFormatType getFormatType(DataFormat p_format);
/** Returns the format type corresponding to the specified Metal MTLPixelFormat, */
MTLFormatType getFormatType(MTLPixelFormat p_format);
/**
* Returns the Metal MTLPixelFormat corresponding to the specified Godot pixel
* or returns MTLPixelFormatInvalid if no corresponding MTLPixelFormat exists.
*/
MTLPixelFormat getMTLPixelFormat(DataFormat p_format);
/**
* Returns the DataFormat corresponding to the specified Metal MTLPixelFormat,
* or returns DATA_FORMAT_MAX if no corresponding DataFormat exists.
*/
DataFormat getDataFormat(MTLPixelFormat p_format);
/**
* Returns the size, in bytes, of a texel block of the specified Godot pixel.
* For uncompressed formats, the returned value corresponds to the size in bytes of a single texel.
*/
uint32_t getBytesPerBlock(DataFormat p_format);
/**
* Returns the size, in bytes, of a texel block of the specified Metal format.
* For uncompressed formats, the returned value corresponds to the size in bytes of a single texel.
*/
uint32_t getBytesPerBlock(MTLPixelFormat p_format);
/** Returns the number of planes of the specified chroma-subsampling (YCbCr) DataFormat */
uint8_t getChromaSubsamplingPlaneCount(DataFormat p_format);
/** Returns the number of bits per channel of the specified chroma-subsampling (YCbCr) DataFormat */
uint8_t getChromaSubsamplingComponentBits(DataFormat p_format);
/**
* Returns the size, in bytes, of a texel of the specified Godot format.
* The returned value may be fractional for certain compressed formats.
*/
float getBytesPerTexel(DataFormat p_format);
/**
* Returns the size, in bytes, of a texel of the specified Metal format.
* The returned value may be fractional for certain compressed formats.
*/
float getBytesPerTexel(MTLPixelFormat p_format);
/**
* Returns the size, in bytes, of a row of texels of the specified Godot pixel format.
*
* For compressed formats, this takes into consideration the compression block size,
* and p_texels_per_row should specify the width in texels, not blocks. The result is rounded
* up if p_texels_per_row is not an integer multiple of the compression block width.
*/
size_t getBytesPerRow(DataFormat p_format, uint32_t p_texels_per_row);
/**
* Returns the size, in bytes, of a row of texels of the specified Metal format.
*
* For compressed formats, this takes into consideration the compression block size,
* and texelsPerRow should specify the width in texels, not blocks. The result is rounded
* up if texelsPerRow is not an integer multiple of the compression block width.
*/
size_t getBytesPerRow(MTLPixelFormat p_format, uint32_t p_texels_per_row);
/**
* Returns the size, in bytes, of a texture layer of the specified Godot pixel format.
*
* For compressed formats, this takes into consideration the compression block size,
* and p_texel_rows_per_layer should specify the height in texels, not blocks. The result is
* rounded up if p_texel_rows_per_layer is not an integer multiple of the compression block height.
*/
size_t getBytesPerLayer(DataFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer);
/**
* Returns the size, in bytes, of a texture layer of the specified Metal format.
* For compressed formats, this takes into consideration the compression block size,
* and p_texel_rows_per_layer should specify the height in texels, not blocks. The result is
* rounded up if p_texel_rows_per_layer is not an integer multiple of the compression block height.
*/
size_t getBytesPerLayer(MTLPixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer);
/** Returns whether or not the specified Godot format requires swizzling to use with Metal. */
bool needsSwizzle(DataFormat p_format);
/** Returns the Metal format capabilities supported by the specified Godot format, without substitution. */
MTLFmtCaps getCapabilities(DataFormat p_format, bool p_extended = false);
/** Returns the Metal format capabilities supported by the specified Metal format. */
MTLFmtCaps getCapabilities(MTLPixelFormat p_format, bool p_extended = false);
/**
* Returns the Metal MTLVertexFormat corresponding to the specified
* DataFormat as used as a vertex attribute format.
*/
MTLVertexFormat getMTLVertexFormat(DataFormat p_format);
#pragma mark Construction
explicit PixelFormats(id<MTLDevice> p_device, const MetalFeatures &p_feat);
protected:
DataFormatDesc &getDataFormatDesc(DataFormat p_format);
DataFormatDesc &getDataFormatDesc(MTLPixelFormat p_format);
MTLFormatDesc &getMTLPixelFormatDesc(MTLPixelFormat p_format);
MTLFmtCaps &getMTLPixelFormatCapsIf(MTLPixelFormat mtlPixFmt, bool cond);
MTLFormatDesc &getMTLVertexFormatDesc(MTLVertexFormat p_format);
void initDataFormatCapabilities();
void initMTLPixelFormatCapabilities();
void initMTLVertexFormatCapabilities(const MetalFeatures &p_feat);
void modifyMTLFormatCapabilities(const MetalFeatures &p_feat);
void buildDFFormatMaps();
void addMTLPixelFormatDescImpl(MTLPixelFormat p_pix_fmt, MTLPixelFormat p_pix_fmt_linear,
MTLViewClass p_view_class, MTLFmtCaps p_fmt_caps, const char *p_name);
void addMTLVertexFormatDescImpl(MTLVertexFormat p_vert_fmt, MTLFmtCaps p_vert_caps, const char *name);
id<MTLDevice> device;
InflectionMap<DataFormat, DataFormatDesc, RD::DATA_FORMAT_MAX> _data_format_descs;
InflectionMap<uint16_t, MTLFormatDesc, MTLPixelFormatX32_Stencil8 + 2> _mtl_pixel_format_descs; // The actual last enum value is not available on iOS.
TightLocalVector<MTLFormatDesc> _mtl_vertex_format_descs;
};
GODOT_CLANG_WARNING_POP

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,146 @@
/**************************************************************************/
/* rendering_context_driver_metal.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#ifdef METAL_ENABLED
#include "servers/rendering/rendering_context_driver.h"
#include "servers/rendering/rendering_device_driver.h"
#import <CoreGraphics/CGGeometry.h>
#ifdef __OBJC__
#import "metal_objects.h"
#import <Metal/Metal.h>
#import <QuartzCore/CALayer.h>
@class CAMetalLayer;
@protocol CAMetalDrawable;
#else
typedef enum MTLPixelFormat {
MTLPixelFormatBGRA8Unorm = 80,
} MTLPixelFormat;
class MDCommandBuffer;
#endif
class PixelFormats;
class MDResourceCache;
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMetal : public RenderingContextDriver {
bool capture_available = false;
protected:
#ifdef __OBJC__
id<MTLDevice> metal_device = nullptr;
#else
void *metal_device = nullptr;
#endif
Device device; // There is only one device on Apple Silicon.
public:
Error initialize() final override;
const Device &device_get(uint32_t p_device_index) const final override;
uint32_t device_get_count() const final override;
bool device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const final override { return true; }
RenderingDeviceDriver *driver_create() final override;
void driver_free(RenderingDeviceDriver *p_driver) final override;
SurfaceID surface_create(const void *p_platform_data) final override;
void surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) final override;
void surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) final override;
DisplayServer::VSyncMode surface_get_vsync_mode(SurfaceID p_surface) const final override;
uint32_t surface_get_width(SurfaceID p_surface) const final override;
uint32_t surface_get_height(SurfaceID p_surface) const final override;
void surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) final override;
bool surface_get_needs_resize(SurfaceID p_surface) const final override;
void surface_destroy(SurfaceID p_surface) final override;
bool is_debug_utils_enabled() const final override { return capture_available; }
#pragma mark - Metal-specific methods
// Platform-specific data for the Windows embedded in this driver.
struct WindowPlatformData {
#ifdef __OBJC__
CAMetalLayer *__unsafe_unretained layer;
#else
void *layer;
#endif
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) Surface {
protected:
#ifdef __OBJC__
id<MTLDevice> device;
#else
void *device;
#endif
public:
uint32_t width = 0;
uint32_t height = 0;
DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED;
bool needs_resize = false;
double present_minimum_duration = 0.0;
Surface(
#ifdef __OBJC__
id<MTLDevice> p_device
#else
void *p_device
#endif
) :
device(p_device) {
}
virtual ~Surface() = default;
MTLPixelFormat get_pixel_format() const { return MTLPixelFormatBGRA8Unorm; }
virtual Error resize(uint32_t p_desired_framebuffer_count) = 0;
virtual RDD::FramebufferID acquire_next_frame_buffer() = 0;
virtual void present(MDCommandBuffer *p_cmd_buffer) = 0;
void set_max_fps(int p_max_fps) { present_minimum_duration = p_max_fps ? 1.0 / p_max_fps : 0.0; }
};
#ifdef __OBJC__
id<MTLDevice>
#else
void *
#endif
get_metal_device() const {
return metal_device;
}
#pragma mark - Initialization
RenderingContextDriverMetal();
~RenderingContextDriverMetal() override;
};
#endif // METAL_ENABLED

View File

@@ -0,0 +1,241 @@
/**************************************************************************/
/* rendering_context_driver_metal.mm */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#import "rendering_context_driver_metal.h"
#import "rendering_device_driver_metal.h"
@protocol MTLDeviceEx <MTLDevice>
#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 130300
- (void)setShouldMaximizeConcurrentCompilation:(BOOL)v;
#endif
@end
RenderingContextDriverMetal::RenderingContextDriverMetal() {
}
RenderingContextDriverMetal::~RenderingContextDriverMetal() {
}
Error RenderingContextDriverMetal::initialize() {
if (OS::get_singleton()->get_environment("MTL_CAPTURE_ENABLED") == "1") {
capture_available = true;
}
metal_device = MTLCreateSystemDefaultDevice();
#if TARGET_OS_OSX
if (@available(macOS 13.3, *)) {
[id<MTLDeviceEx>(metal_device) setShouldMaximizeConcurrentCompilation:YES];
}
#endif
device.type = DEVICE_TYPE_INTEGRATED_GPU;
device.vendor = Vendor::VENDOR_APPLE;
device.workarounds = Workarounds();
MetalDeviceProperties props(metal_device);
int version = (int)props.features.highestFamily - (int)MTLGPUFamilyApple1 + 1;
device.name = vformat("%s (Apple%d)", metal_device.name.UTF8String, version);
return OK;
}
const RenderingContextDriver::Device &RenderingContextDriverMetal::device_get(uint32_t p_device_index) const {
DEV_ASSERT(p_device_index < 1);
return device;
}
uint32_t RenderingContextDriverMetal::device_get_count() const {
return 1;
}
RenderingDeviceDriver *RenderingContextDriverMetal::driver_create() {
return memnew(RenderingDeviceDriverMetal(this));
}
void RenderingContextDriverMetal::driver_free(RenderingDeviceDriver *p_driver) {
memdelete(p_driver);
}
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) SurfaceLayer : public RenderingContextDriverMetal::Surface {
CAMetalLayer *__unsafe_unretained layer = nil;
LocalVector<MDFrameBuffer> frame_buffers;
LocalVector<id<MTLDrawable>> drawables;
uint32_t rear = -1;
uint32_t front = 0;
uint32_t count = 0;
public:
SurfaceLayer(CAMetalLayer *p_layer, id<MTLDevice> p_device) :
Surface(p_device), layer(p_layer) {
layer.allowsNextDrawableTimeout = YES;
layer.framebufferOnly = YES;
layer.opaque = OS::get_singleton()->is_layered_allowed() ? NO : YES;
layer.pixelFormat = get_pixel_format();
layer.device = p_device;
}
~SurfaceLayer() override {
layer = nil;
}
Error resize(uint32_t p_desired_framebuffer_count) override final {
if (width == 0 || height == 0) {
// Very likely the window is minimized, don't create a swap chain.
return ERR_SKIP;
}
CGSize drawableSize = CGSizeMake(width, height);
CGSize current = layer.drawableSize;
if (!CGSizeEqualToSize(current, drawableSize)) {
layer.drawableSize = drawableSize;
}
// Metal supports a maximum of 3 drawables.
p_desired_framebuffer_count = MIN(3U, p_desired_framebuffer_count);
layer.maximumDrawableCount = p_desired_framebuffer_count;
#if TARGET_OS_OSX
// Display sync is only supported on macOS.
switch (vsync_mode) {
case DisplayServer::VSYNC_MAILBOX:
case DisplayServer::VSYNC_ADAPTIVE:
case DisplayServer::VSYNC_ENABLED:
layer.displaySyncEnabled = YES;
break;
case DisplayServer::VSYNC_DISABLED:
layer.displaySyncEnabled = NO;
break;
}
#endif
drawables.resize(p_desired_framebuffer_count);
frame_buffers.resize(p_desired_framebuffer_count);
for (uint32_t i = 0; i < p_desired_framebuffer_count; i++) {
// Reserve space for the drawable texture.
frame_buffers[i].set_texture_count(1);
}
return OK;
}
RDD::FramebufferID acquire_next_frame_buffer() override final {
if (count == frame_buffers.size()) {
return RDD::FramebufferID();
}
rear = (rear + 1) % frame_buffers.size();
count++;
MDFrameBuffer &frame_buffer = frame_buffers[rear];
frame_buffer.size = Size2i(width, height);
id<CAMetalDrawable> drawable = layer.nextDrawable;
ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available");
drawables[rear] = drawable;
frame_buffer.set_texture(0, drawable.texture);
return RDD::FramebufferID(&frame_buffer);
}
void present(MDCommandBuffer *p_cmd_buffer) override final {
if (count == 0) {
return;
}
// Release texture and drawable.
frame_buffers[front].unset_texture(0);
id<MTLDrawable> drawable = drawables[front];
drawables[front] = nil;
count--;
front = (front + 1) % frame_buffers.size();
if (vsync_mode != DisplayServer::VSYNC_DISABLED) {
[p_cmd_buffer->get_command_buffer() presentDrawable:drawable afterMinimumDuration:present_minimum_duration];
} else {
[p_cmd_buffer->get_command_buffer() presentDrawable:drawable];
}
}
};
RenderingContextDriver::SurfaceID RenderingContextDriverMetal::surface_create(const void *p_platform_data) {
const WindowPlatformData *wpd = (const WindowPlatformData *)(p_platform_data);
Surface *surface = memnew(SurfaceLayer(wpd->layer, metal_device));
return SurfaceID(surface);
}
void RenderingContextDriverMetal::surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) {
Surface *surface = (Surface *)(p_surface);
if (surface->width == p_width && surface->height == p_height) {
return;
}
surface->width = p_width;
surface->height = p_height;
surface->needs_resize = true;
}
void RenderingContextDriverMetal::surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) {
Surface *surface = (Surface *)(p_surface);
if (surface->vsync_mode == p_vsync_mode) {
return;
}
surface->vsync_mode = p_vsync_mode;
surface->needs_resize = true;
}
DisplayServer::VSyncMode RenderingContextDriverMetal::surface_get_vsync_mode(SurfaceID p_surface) const {
Surface *surface = (Surface *)(p_surface);
return surface->vsync_mode;
}
uint32_t RenderingContextDriverMetal::surface_get_width(SurfaceID p_surface) const {
Surface *surface = (Surface *)(p_surface);
return surface->width;
}
uint32_t RenderingContextDriverMetal::surface_get_height(SurfaceID p_surface) const {
Surface *surface = (Surface *)(p_surface);
return surface->height;
}
void RenderingContextDriverMetal::surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) {
Surface *surface = (Surface *)(p_surface);
surface->needs_resize = p_needs_resize;
}
bool RenderingContextDriverMetal::surface_get_needs_resize(SurfaceID p_surface) const {
Surface *surface = (Surface *)(p_surface);
return surface->needs_resize;
}
void RenderingContextDriverMetal::surface_destroy(SurfaceID p_surface) {
Surface *surface = (Surface *)(p_surface);
memdelete(surface);
}

View File

@@ -0,0 +1,440 @@
/**************************************************************************/
/* rendering_device_driver_metal.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#import "metal_objects.h"
#import "rendering_shader_container_metal.h"
#include "servers/rendering/rendering_device_driver.h"
#import <Metal/Metal.h>
#import <variant>
#ifdef DEBUG_ENABLED
#ifndef _DEBUG
#define _DEBUG
#endif
#endif
class RenderingContextDriverMetal;
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMetal : public RenderingDeviceDriver {
friend struct ShaderCacheEntry;
template <typename T>
using Result = std::variant<T, Error>;
#pragma mark - Generic
RenderingContextDriverMetal *context_driver = nullptr;
RenderingContextDriver::Device context_device;
id<MTLDevice> device = nil;
MetalDeviceProperties *device_properties = nullptr;
MetalDeviceProfile device_profile;
RenderingShaderContainerFormatMetal *shader_container_format = nullptr;
PixelFormats *pixel_formats = nullptr;
std::unique_ptr<MDResourceCache> resource_cache;
RDD::Capabilities capabilities;
RDD::MultiviewCapabilities multiview_capabilities;
RDD::FragmentShadingRateCapabilities fsr_capabilities;
RDD::FragmentDensityMapCapabilities fdm_capabilities;
id<MTLBinaryArchive> archive = nil;
uint32_t archive_count = 0;
id<MTLCommandQueue> device_queue = nil;
id<MTLCaptureScope> device_scope = nil;
String pipeline_cache_id;
Error _create_device();
void _check_capabilities();
#pragma mark - Shader Cache
ShaderLoadStrategy _shader_load_strategy = ShaderLoadStrategy::DEFAULT;
/**
* The shader cache is a map of hashes of the Metal source to shader cache entries.
*
* To prevent unbounded growth of the cache, cache entries are automatically freed when
* there are no more references to the MDLibrary associated with the cache entry.
*/
HashMap<SHA256Digest, ShaderCacheEntry *, HashableHasher<SHA256Digest>> _shader_cache;
void shader_cache_free_entry(const SHA256Digest &key);
public:
Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override final;
#pragma mark - Memory
#pragma mark - Buffers
public:
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final;
virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final;
virtual void buffer_free(BufferID p_buffer) override final;
virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final;
virtual uint8_t *buffer_map(BufferID p_buffer) override final;
virtual void buffer_unmap(BufferID p_buffer) override final;
virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final;
#pragma mark - Texture
private:
// Returns true if the texture is a valid linear format.
Result<bool> is_valid_linear(TextureFormat const &p_format) const;
void _get_sub_resource(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) const;
public:
virtual TextureID texture_create(const TextureFormat &p_format, const TextureView &p_view) override final;
virtual TextureID texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil, uint32_t p_mipmaps) override final;
virtual TextureID texture_create_shared(TextureID p_original_texture, const TextureView &p_view) override final;
virtual TextureID texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) override final;
virtual void texture_free(TextureID p_texture) override final;
virtual uint64_t texture_get_allocation_size(TextureID p_texture) override final;
virtual void texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) override final;
virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final;
virtual void texture_unmap(TextureID p_texture) override final;
virtual BitField<TextureUsageBits> texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final;
virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final;
#pragma mark - Sampler
public:
virtual SamplerID sampler_create(const SamplerState &p_state) final override;
virtual void sampler_free(SamplerID p_sampler) final override;
virtual bool sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) override final;
#pragma mark - Vertex Array
private:
public:
virtual VertexFormatID vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) override final;
virtual void vertex_format_free(VertexFormatID p_vertex_format) override final;
#pragma mark - Barriers
virtual void command_pipeline_barrier(
CommandBufferID p_cmd_buffer,
BitField<PipelineStageBits> p_src_stages,
BitField<PipelineStageBits> p_dst_stages,
VectorView<MemoryBarrier> p_memory_barriers,
VectorView<BufferBarrier> p_buffer_barriers,
VectorView<TextureBarrier> p_texture_barriers) override final;
#pragma mark - Fences
private:
struct Fence {
dispatch_semaphore_t semaphore;
Fence() :
semaphore(dispatch_semaphore_create(0)) {}
};
public:
virtual FenceID fence_create() override final;
virtual Error fence_wait(FenceID p_fence) override final;
virtual void fence_free(FenceID p_fence) override final;
#pragma mark - Semaphores
public:
virtual SemaphoreID semaphore_create() override final;
virtual void semaphore_free(SemaphoreID p_semaphore) override final;
#pragma mark - Commands
// ----- QUEUE FAMILY -----
virtual CommandQueueFamilyID command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface = 0) override final;
// ----- QUEUE -----
public:
virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override final;
virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) override final;
virtual void command_queue_free(CommandQueueID p_cmd_queue) override final;
// ----- POOL -----
virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final;
virtual bool command_pool_reset(CommandPoolID p_cmd_pool) override final;
virtual void command_pool_free(CommandPoolID p_cmd_pool) override final;
// ----- BUFFER -----
private:
// Used to maintain references.
Vector<MDCommandBuffer *> command_buffers;
public:
virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final;
virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final;
virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final;
virtual void command_buffer_end(CommandBufferID p_cmd_buffer) override final;
virtual void command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) override final;
#pragma mark - Swapchain
private:
struct SwapChain {
RenderingContextDriver::SurfaceID surface = RenderingContextDriver::SurfaceID();
RenderPassID render_pass;
RDD::DataFormat data_format = DATA_FORMAT_MAX;
SwapChain() :
render_pass(nullptr) {}
};
void _swap_chain_release(SwapChain *p_swap_chain);
void _swap_chain_release_buffers(SwapChain *p_swap_chain);
public:
virtual SwapChainID swap_chain_create(RenderingContextDriver::SurfaceID p_surface) override final;
virtual Error swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) override final;
virtual FramebufferID swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) override final;
virtual RenderPassID swap_chain_get_render_pass(SwapChainID p_swap_chain) override final;
virtual DataFormat swap_chain_get_format(SwapChainID p_swap_chain) override final;
virtual void swap_chain_set_max_fps(SwapChainID p_swap_chain, int p_max_fps) override final;
virtual void swap_chain_free(SwapChainID p_swap_chain) override final;
#pragma mark - Frame Buffer
virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) override final;
virtual void framebuffer_free(FramebufferID p_framebuffer) override final;
#pragma mark - Shader
private:
// Serialization types need access to private state.
friend struct ShaderStageData;
friend struct SpecializationConstantData;
friend struct UniformData;
friend struct ShaderBinaryData;
friend struct PushConstantData;
public:
virtual ShaderID shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
virtual void shader_free(ShaderID p_shader) override final;
virtual void shader_destroy_modules(ShaderID p_shader) override final;
virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final;
#pragma mark - Uniform Set
public:
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) override final;
virtual void uniform_set_free(UniformSetID p_uniform_set) override final;
#pragma mark - Commands
virtual void command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
#pragma mark Transfer
private:
enum class CopySource {
Buffer,
Texture,
};
void _copy_texture_buffer(CommandBufferID p_cmd_buffer,
CopySource p_source,
TextureID p_texture,
BufferID p_buffer,
VectorView<BufferTextureCopyRegion> p_regions);
public:
virtual void command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) override final;
virtual void command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) override final;
virtual void command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) override final;
virtual void command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) override final;
virtual void command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) override final;
virtual void command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) override final;
virtual void command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) override final;
#pragma mark Pipeline
private:
Result<id<MTLFunction>> _create_function(MDLibrary *p_library, NSString *p_name, VectorView<PipelineSpecializationConstant> &p_specialization_constants);
public:
virtual void pipeline_free(PipelineID p_pipeline_id) override final;
// ----- BINDING -----
virtual void command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_first_index, VectorView<uint32_t> p_data) override final;
// ----- CACHE -----
private:
String _pipeline_get_cache_path() const;
public:
virtual bool pipeline_cache_create(const Vector<uint8_t> &p_data) override final;
virtual void pipeline_cache_free() override final;
virtual size_t pipeline_cache_query_size() override final;
virtual Vector<uint8_t> pipeline_cache_serialize() override final;
#pragma mark Rendering
// ----- SUBPASS -----
virtual RenderPassID render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final;
virtual void render_pass_free(RenderPassID p_render_pass) override final;
// ----- COMMANDS -----
public:
virtual void command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) override final;
virtual void command_end_render_pass(CommandBufferID p_cmd_buffer) override final;
virtual void command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) override final;
virtual void command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) override final;
virtual void command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) override final;
virtual void command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) override final;
// Binding.
virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
// Drawing.
virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final;
virtual void command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) override final;
virtual void command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final;
virtual void command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final;
virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final;
virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final;
// Buffer binding.
virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final;
virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final;
// Dynamic state.
virtual void command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) override final;
virtual void command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) override final;
// ----- PIPELINE -----
virtual PipelineID render_pipeline_create(
ShaderID p_shader,
VertexFormatID p_vertex_format,
RenderPrimitive p_render_primitive,
PipelineRasterizationState p_rasterization_state,
PipelineMultisampleState p_multisample_state,
PipelineDepthStencilState p_depth_stencil_state,
PipelineColorBlendState p_blend_state,
VectorView<int32_t> p_color_attachments,
BitField<PipelineDynamicStateFlags> p_dynamic_state,
RenderPassID p_render_pass,
uint32_t p_render_subpass,
VectorView<PipelineSpecializationConstant> p_specialization_constants) override final;
#pragma mark - Compute
// ----- COMMANDS -----
// Binding.
virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;
// Dispatching.
virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final;
virtual void command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) override final;
// ----- PIPELINE -----
virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) override final;
#pragma mark - Queries
// ----- TIMESTAMP -----
// Basic.
virtual QueryPoolID timestamp_query_pool_create(uint32_t p_query_count) override final;
virtual void timestamp_query_pool_free(QueryPoolID p_pool_id) override final;
virtual void timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) override final;
virtual uint64_t timestamp_query_result_to_time(uint64_t p_result) override final;
// Commands.
virtual void command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) override final;
virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) override final;
#pragma mark - Labels
virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final;
virtual void command_end_label(CommandBufferID p_cmd_buffer) override final;
#pragma mark - Debug
virtual void command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) override final;
#pragma mark - Submission
virtual void begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) override final;
virtual void end_segment() override final;
#pragma mark - Miscellaneous
virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final;
virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final;
virtual uint64_t get_total_memory_used() override final;
virtual uint64_t get_lazily_memory_used() override final;
virtual uint64_t limit_get(Limit p_limit) override final;
virtual uint64_t api_trait_get(ApiTrait p_trait) override final;
virtual bool has_feature(Features p_feature) override final;
virtual const MultiviewCapabilities &get_multiview_capabilities() override final;
virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final;
virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final;
virtual String get_api_name() const override final { return "Metal"; }
virtual String get_api_version() const override final;
virtual String get_pipeline_cache_uuid() const override final;
virtual const Capabilities &get_capabilities() const override final;
virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final;
// Metal-specific.
id<MTLDevice> get_device() const { return device; }
PixelFormats &get_pixel_formats() const { return *pixel_formats; }
MDResourceCache &get_resource_cache() const { return *resource_cache; }
MetalDeviceProperties const &get_device_properties() const { return *device_properties; }
_FORCE_INLINE_ uint32_t get_metal_buffer_index_for_vertex_attribute_binding(uint32_t p_binding) {
return (device_properties->limits.maxPerStageBufferCount - 1) - p_binding;
}
size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const;
size_t get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const;
/******************/
RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver);
~RenderingDeviceDriverMetal();
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,310 @@
/**************************************************************************/
/* rendering_shader_container_metal.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#import "sha256_digest.h"
#import "servers/rendering/rendering_device_driver.h"
#import "servers/rendering/rendering_shader_container.h"
constexpr uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535;
/// Metal buffer index for the view mask when rendering multi-view.
const uint32_t VIEW_MASK_BUFFER_INDEX = 24;
class RenderingShaderContainerFormatMetal;
class MinOsVersion {
uint32_t version;
public:
String to_compiler_os_version() const;
bool is_null() const { return version == UINT32_MAX; }
bool is_valid() const { return version != UINT32_MAX; }
MinOsVersion(const String &p_version);
explicit MinOsVersion(uint32_t p_version) :
version(p_version) {}
MinOsVersion() :
version(UINT32_MAX) {}
bool operator>(uint32_t p_other) {
return version > p_other;
}
};
/// @brief A minimal structure that defines a device profile for Metal.
///
/// This structure is used by the `RenderingShaderContainerMetal` class to
/// determine options for compiling SPIR-V to Metal source. It currently only
/// contains the minimum properties required to transform shaders from SPIR-V to Metal
/// and potentially compile to a `.metallib`.
struct MetalDeviceProfile {
enum class Platform : uint32_t {
macOS = 0,
iOS = 1,
};
/*! @brief The GPU family.
*
* NOTE: These values match Apple's MTLGPUFamily
*/
enum class GPU : uint32_t {
Apple1 = 1001,
Apple2 = 1002,
Apple3 = 1003,
Apple4 = 1004,
Apple5 = 1005,
Apple6 = 1006,
Apple7 = 1007,
Apple8 = 1008,
Apple9 = 1009,
};
enum class ArgumentBuffersTier : uint32_t {
Tier1 = 0,
Tier2 = 1,
};
struct Features {
uint32_t mslVersionMajor = 0;
uint32_t mslVersionMinor = 0;
ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1;
bool simdPermute = false;
};
Platform platform = Platform::macOS;
GPU gpu = GPU::Apple4;
Features features;
static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu);
MetalDeviceProfile() = default;
private:
static Mutex profiles_lock; ///< Mutex to protect access to the profiles map.
static HashMap<uint32_t, MetalDeviceProfile> profiles;
};
class RenderingShaderContainerMetal : public RenderingShaderContainer {
GDSOFTCLASS(RenderingShaderContainerMetal, RenderingShaderContainer);
public:
struct HeaderData {
enum Flags : uint32_t {
NONE = 0,
NEEDS_VIEW_MASK_BUFFER = 1 << 0,
USES_ARGUMENT_BUFFERS = 1 << 1,
};
/// The base profile that was used to generate this shader.
MetalDeviceProfile profile;
/// The Metal language version specified when compiling SPIR-V to MSL.
/// Format is major * 10000 + minor * 100 + patch.
uint32_t msl_version = UINT32_MAX;
/*! @brief The minimum supported OS version for shaders baked to a `.metallib`.
*
* NOTE: This property is only valid when shaders are baked to a .metalllib
*
* Format is major * 10000 + minor * 100 + patch.
*/
MinOsVersion os_min_version;
uint32_t flags = NONE;
/// @brief Returns `true` if the shader is compiled with multi-view support.
bool needs_view_mask_buffer() const {
return flags & NEEDS_VIEW_MASK_BUFFER;
}
void set_needs_view_mask_buffer(bool p_value) {
if (p_value) {
flags |= NEEDS_VIEW_MASK_BUFFER;
} else {
flags &= ~NEEDS_VIEW_MASK_BUFFER;
}
}
/// @brief Returns `true` if the shader was compiled with argument buffer support.
bool uses_argument_buffers() const {
return flags & USES_ARGUMENT_BUFFERS;
}
void set_uses_argument_buffers(bool p_value) {
if (p_value) {
flags |= USES_ARGUMENT_BUFFERS;
} else {
flags &= ~USES_ARGUMENT_BUFFERS;
}
}
};
struct StageData {
uint32_t vertex_input_binding_mask = 0;
uint32_t is_position_invariant = 0; ///< <c>true</c> if the position output is invariant
uint32_t supports_fast_math = 0;
SHA256Digest hash; ///< SHA 256 hash of the shader code
uint32_t source_size = 0; ///< size of the source code in the returned bytes
uint32_t library_size = 0; ///< size of the compiled library in the returned bytes, 0 if it is not compiled
uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data
};
struct BindingInfoData {
uint32_t shader_stage = UINT32_MAX; ///< The shader stage this binding is used in, or UINT32_MAX if not used.
uint32_t data_type = 0; // MTLDataTypeNone
uint32_t index = 0;
uint32_t access = 0; // MTLBindingAccessReadOnly
uint32_t usage = 0; // MTLResourceUsage (none)
uint32_t texture_type = 2; // MTLTextureType2D
uint32_t image_format = 0;
uint32_t array_length = 0;
uint32_t is_multisampled = 0;
};
struct UniformData {
/// Specifies the index into the `bindings` array for the shader stage.
///
/// For example, a vertex and fragment shader use slots 0 and 1 of the bindings and bindings_secondary arrays.
static constexpr uint32_t STAGE_INDEX[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
0, // SHADER_STAGE_VERTEX
1, // SHADER_STAGE_FRAGMENT
0, // SHADER_STAGE_TESSELATION_CONTROL
1, // SHADER_STAGE_TESSELATION_EVALUATION
0, // SHADER_STAGE_COMPUTE
};
/// Specifies the stages the uniform data is
/// used by the Metal shader.
uint32_t active_stages = 0;
/// The primary binding information for the uniform data.
///
/// A maximum of two stages is expected for any given pipeline, such as a vertex and fragment, so
/// the array size is fixed to 2.
BindingInfoData bindings[2];
/// The secondary binding information for the uniform data.
///
/// This is typically a sampler for an image-sampler uniform
BindingInfoData bindings_secondary[2];
_FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RenderingDeviceCommons::ShaderStage p_stage) const {
return STAGE_INDEX[p_stage];
}
_FORCE_INLINE_ BindingInfoData &get_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) {
BindingInfoData &info = bindings[get_index_for_stage(p_stage)];
DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage
info.shader_stage = p_stage;
return info;
}
_FORCE_INLINE_ BindingInfoData &get_secondary_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) {
BindingInfoData &info = bindings_secondary[get_index_for_stage(p_stage)];
DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage
info.shader_stage = p_stage;
return info;
}
};
struct SpecializationData {
uint32_t used_stages = 0;
};
HeaderData mtl_reflection_data; // compliment to reflection_data
Vector<StageData> mtl_shaders; // compliment to shaders
private:
struct ToolchainProperties {
MinOsVersion os_version_min_required;
uint32_t metal_version = UINT32_MAX;
_FORCE_INLINE_ bool is_null() const { return os_version_min_required.is_null() || metal_version == UINT32_MAX; }
_FORCE_INLINE_ bool is_valid() const { return !is_null(); }
};
ToolchainProperties compiler_props;
void _initialize_toolchain_properties();
private:
const MetalDeviceProfile *device_profile = nullptr;
bool export_mode = false;
MinOsVersion min_os_version;
Vector<UniformData> mtl_reflection_binding_set_uniforms_data; // compliment to reflection_binding_set_uniforms_data
Vector<SpecializationData> mtl_reflection_specialization_data; // compliment to reflection_specialization_data
Error compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector<uint8_t> &r_binary_data);
public:
static constexpr uint32_t FORMAT_VERSION = 1;
void set_export_mode(bool p_export_mode) { export_mode = p_export_mode; }
void set_device_profile(const MetalDeviceProfile *p_device_profile) { device_profile = p_device_profile; }
void set_min_os_version(const MinOsVersion p_min_os_version) { min_os_version = p_min_os_version; }
struct MetalShaderReflection {
Vector<Vector<UniformData>> uniform_sets;
Vector<SpecializationData> specialization_constants;
};
MetalShaderReflection get_metal_shader_reflection() const;
protected:
virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
virtual uint32_t _from_bytes_shader_extra_data_start(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override;
virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _format() const override;
virtual uint32_t _format_version() const override;
virtual bool _set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) override;
};
class RenderingShaderContainerFormatMetal : public RenderingShaderContainerFormat {
bool export_mode = false;
MinOsVersion min_os_version;
const MetalDeviceProfile *device_profile = nullptr;
public:
virtual Ref<RenderingShaderContainer> create_container() const override;
virtual ShaderLanguageVersion get_shader_language_version() const override;
virtual ShaderSpirvVersion get_shader_spirv_version() const override;
RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false, const MinOsVersion p_min_os_version = MinOsVersion());
virtual ~RenderingShaderContainerFormatMetal() = default;
};

View File

@@ -0,0 +1,843 @@
/**************************************************************************/
/* rendering_shader_container_metal.mm */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#import "rendering_shader_container_metal.h"
#import "metal_utils.h"
#import "core/io/marshalls.h"
#import "servers/rendering/rendering_device.h"
#import <Metal/Metal.h>
#import <spirv.hpp>
#import <spirv_msl.hpp>
#import <spirv_parser.hpp>
Mutex MetalDeviceProfile::profiles_lock;
HashMap<uint32_t, MetalDeviceProfile> MetalDeviceProfile::profiles;
const MetalDeviceProfile *MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform p_platform, MetalDeviceProfile::GPU p_gpu) {
DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS);
MutexLock lock(profiles_lock);
uint32_t key = (uint32_t)p_platform << 16 | (uint32_t)p_gpu;
if (MetalDeviceProfile *profile = profiles.getptr(key)) {
return profile;
}
MetalDeviceProfile res;
res.platform = p_platform;
res.gpu = p_gpu;
if (p_platform == Platform::macOS) {
res.features.mslVersionMajor = 3;
res.features.mslVersionMinor = 2;
res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2;
res.features.simdPermute = true;
} else if (p_platform == Platform::iOS) {
switch (p_gpu) {
case GPU::Apple1:
case GPU::Apple2:
case GPU::Apple3:
case GPU::Apple4:
case GPU::Apple5: {
res.features.simdPermute = false;
res.features.argument_buffers_tier = ArgumentBuffersTier::Tier1;
} break;
case GPU::Apple6:
case GPU::Apple7:
case GPU::Apple8:
case GPU::Apple9: {
res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2;
res.features.simdPermute = true;
} break;
}
res.features.mslVersionMajor = 3;
res.features.mslVersionMinor = 1;
}
return &profiles.insert(key, res)->value;
}
void RenderingShaderContainerMetal::_initialize_toolchain_properties() {
if (compiler_props.is_valid()) {
return;
}
String sdk;
switch (device_profile->platform) {
case MetalDeviceProfile::Platform::macOS:
sdk = "macosx";
break;
case MetalDeviceProfile::Platform::iOS:
sdk = "iphoneos";
break;
}
Vector<String> parts{ "echo", R"("")", "|", "/usr/bin/xcrun", "-sdk", sdk, "metal", "-E", "-dM", "-x", "metal", "-", "|", "grep", "-E", R"(\"__METAL_VERSION__|__ENVIRONMENT_OS\")" };
// Compile metal shaders for the minimum supported target instead of the host machine
if (min_os_version.is_valid()) {
switch (device_profile->platform) {
case MetalDeviceProfile::Platform::macOS: {
parts.push_back("-mmacosx-version-min=" + min_os_version.to_compiler_os_version());
break;
}
case MetalDeviceProfile::Platform::iOS: {
parts.push_back("-mios-version-min=" + min_os_version.to_compiler_os_version());
break;
}
}
}
String s = " ";
List<String> args = { "-c", String(" ").join(parts) };
String r_pipe;
int exit_code;
Error err = OS::get_singleton()->execute("sh", args, &r_pipe, &exit_code, true);
ERR_FAIL_COND_MSG(err != OK, "Failed to determine Metal toolchain properties");
// Parse the lines, which are in the form:
//
// #define VARNAME VALUE
Vector<String> lines = r_pipe.split("\n", false);
for (String &line : lines) {
Vector<String> name_val = line.trim_prefix("#define ").split(" ");
if (name_val.size() != 2) {
continue;
}
if (name_val[0] == "__ENVIRONMENT_OS_VERSION_MIN_REQUIRED__") {
compiler_props.os_version_min_required = MinOsVersion((uint32_t)name_val[1].to_int());
} else if (name_val[0] == "__METAL_VERSION__") {
uint32_t ver = (uint32_t)name_val[1].to_int();
uint32_t maj = ver / 100;
uint32_t min = (ver % 100) / 10;
compiler_props.metal_version = make_msl_version(maj, min);
}
if (compiler_props.is_valid()) {
break;
}
}
return;
}
Error RenderingShaderContainerMetal::compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector<uint8_t> &r_binary_data) {
String name(shader_name.ptr());
if (name.contains_char(':')) {
name = name.replace_char(':', '_');
}
Error r_error;
Ref<FileAccess> source_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE,
name + "_" + itos(p_stage_data.hash.short_sha()),
"metal", false, &r_error);
ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary source file.");
if (!source_file->store_buffer((const uint8_t *)p_source, strlen(p_source))) {
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unable to write temporary source file");
}
source_file->flush();
Ref<FileAccess> result_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE,
name + "_" + itos(p_stage_data.hash.short_sha()),
"metallib", false, &r_error);
ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary target file");
String sdk;
switch (device_profile->platform) {
case MetalDeviceProfile::Platform::macOS:
sdk = "macosx";
break;
case MetalDeviceProfile::Platform::iOS:
sdk = "iphoneos";
break;
}
// Build the .metallib binary.
{
List<String> args{ "-sdk", sdk, "metal", "-O3" };
// Compile metal shaders for the minimum supported target instead of the host machine.
if (min_os_version.is_valid()) {
switch (device_profile->platform) {
case MetalDeviceProfile::Platform::macOS: {
args.push_back("-mmacosx-version-min=" + min_os_version.to_compiler_os_version());
break;
}
case MetalDeviceProfile::Platform::iOS: {
args.push_back("-mios-version-min=" + min_os_version.to_compiler_os_version());
break;
}
}
} else {
WARN_PRINT_ONCE(vformat("Minimum target OS version is not set, so baking shaders for Metal will target the default version of your toolchain: %s", compiler_props.os_version_min_required.to_compiler_os_version()));
}
if (p_stage_data.is_position_invariant) {
args.push_back("-fpreserve-invariance");
}
args.push_back("-fmetal-math-mode=fast");
args.push_back(source_file->get_path_absolute());
args.push_back("-o");
args.push_back(result_file->get_path_absolute());
String r_pipe;
int exit_code;
Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true);
if (!r_pipe.is_empty()) {
print_line(r_pipe);
}
if (err != OK) {
ERR_PRINT(vformat("Metal compiler returned error code: %d", err));
}
if (exit_code != 0) {
ERR_PRINT(vformat("Metal compiler exited with error code: %d", exit_code));
}
int len = result_file->get_length();
ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "Metal compiler created empty library");
}
// Strip the source from the binary.
{
List<String> args{ "-sdk", sdk, "metal-dsymutil", "--remove-source", result_file->get_path_absolute() };
String r_pipe;
int exit_code;
Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true);
if (!r_pipe.is_empty()) {
print_line(r_pipe);
}
if (err != OK) {
ERR_PRINT(vformat("metal-dsymutil tool returned error code: %d", err));
}
if (exit_code != 0) {
ERR_PRINT(vformat("metal-dsymutil Compiler exited with error code: %d", exit_code));
}
int len = result_file->get_length();
ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "metal-dsymutil tool created empty library");
}
r_binary_data = result_file->get_buffer(result_file->get_length());
return OK;
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunguarded-availability"
bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingDeviceCommons::ShaderStageSPIRVData> &p_spirv) {
using namespace spirv_cross;
using spirv_cross::CompilerMSL;
using spirv_cross::Resource;
if (export_mode) {
_initialize_toolchain_properties();
}
// initialize Metal-specific reflection data
shaders.resize(p_spirv.size());
mtl_shaders.resize(p_spirv.size());
mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size());
mtl_reflection_specialization_data.resize(reflection_specialization_data.size());
mtl_reflection_data.set_needs_view_mask_buffer(reflection_data.has_multiview);
mtl_reflection_data.profile = *device_profile;
// set_indexes will contain the starting offsets of each descriptor set in the binding set uniforms data
// including the last one, which is the size of reflection_binding_set_uniforms_count.
LocalVector<uint32_t> set_indexes;
uint32_t set_indexes_size = reflection_binding_set_uniforms_count.size() + 1;
{
// calculate the starting offsets of each descriptor set in the binding set uniforms data
uint32_t size = reflection_binding_set_uniforms_count.size();
set_indexes.resize(set_indexes_size);
uint32_t offset = 0;
for (uint32_t i = 0; i < size; i++) {
set_indexes[i] = offset;
offset += reflection_binding_set_uniforms_count.get(i);
}
set_indexes[set_indexes_size - 1] = offset;
}
CompilerMSL::Options msl_options{};
// Determine Metal language version.
uint32_t msl_version = 0;
{
if (export_mode && compiler_props.is_valid()) {
// Use the properties determined by the toolchain and minimum OS version.
msl_version = compiler_props.metal_version;
mtl_reflection_data.os_min_version = compiler_props.os_version_min_required;
} else {
msl_version = make_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
mtl_reflection_data.os_min_version = MinOsVersion();
}
uint32_t msl_ver_maj = 0;
uint32_t msl_ver_min = 0;
parse_msl_version(msl_version, msl_ver_maj, msl_ver_min);
msl_options.set_msl_version(msl_ver_maj, msl_ver_min);
mtl_reflection_data.msl_version = msl_version;
}
msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS;
if (device_profile->platform == MetalDeviceProfile::Platform::iOS) {
msl_options.ios_use_simdgroup_functions = device_profile->features.simdPermute;
msl_options.ios_support_base_vertex_instance = true;
}
bool disable_argument_buffers = false;
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
disable_argument_buffers = true;
}
if (device_profile->features.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && !disable_argument_buffers) {
msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2;
msl_options.argument_buffers = true;
mtl_reflection_data.set_uses_argument_buffers(true);
} else {
msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1;
// Tier 1 argument buffers don't support writable textures, so we disable them completely.
msl_options.argument_buffers = false;
mtl_reflection_data.set_uses_argument_buffers(false);
}
msl_options.force_active_argument_buffer_resources = true;
// We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding.
// msl_options.pad_argument_buffer_resources = true;
msl_options.texture_buffer_native = true; // Enable texture buffer support.
msl_options.use_framebuffer_fetch_subpasses = false;
msl_options.pad_fragment_output_components = true;
msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID;
msl_options.agx_manual_cube_grad_fixup = true;
if (reflection_data.has_multiview) {
msl_options.multiview = true;
msl_options.multiview_layered_rendering = true;
msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
}
if (msl_version >= make_msl_version(3, 2)) {
// All 3.2+ versions support device coherence, so we can disable texture fences.
msl_options.readwrite_texture_fences = false;
}
CompilerGLSL::Options options{};
options.vertex.flip_vert_y = true;
#if DEV_ENABLED
options.emit_line_directives = true;
#endif
for (uint32_t i = 0; i < p_spirv.size(); i++) {
StageData &stage_data = mtl_shaders.write[i];
RD::ShaderStageSPIRVData const &v = p_spirv[i];
RD::ShaderStage stage = v.shader_stage;
char const *stage_name = RD::SHADER_STAGE_NAMES[stage];
uint32_t const *const ir = reinterpret_cast<uint32_t const *const>(v.spirv.ptr());
size_t word_count = v.spirv.size() / sizeof(uint32_t);
Parser parser(ir, word_count);
try {
parser.parse();
} catch (CompilerError &e) {
ERR_FAIL_V_MSG(false, "Failed to parse IR at stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what());
}
CompilerMSL compiler(std::move(parser.get_parsed_ir()));
compiler.set_msl_options(msl_options);
compiler.set_common_options(options);
std::unordered_set<VariableID> active = compiler.get_active_interface_variables();
ShaderResources resources = compiler.get_shader_resources();
std::string source;
try {
source = compiler.compile();
} catch (CompilerError &e) {
ERR_FAIL_V_MSG(false, "Failed to compile stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what());
}
ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, false, "Expected a single entry point and stage.");
SmallVector<EntryPoint> entry_pts_stages = compiler.get_entry_points_and_stages();
EntryPoint &entry_point_stage = entry_pts_stages.front();
SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model);
// Process specialization constants.
if (!compiler.get_specialization_constants().empty()) {
uint32_t size = reflection_specialization_data.size();
for (SpecializationConstant const &constant : compiler.get_specialization_constants()) {
uint32_t j = 0;
while (j < size) {
const ReflectionSpecializationData &res = reflection_specialization_data.ptr()[j];
if (res.constant_id == constant.constant_id) {
mtl_reflection_specialization_data.ptrw()[j].used_stages |= 1 << stage;
// emulate labeled for loop and continue
goto outer_continue;
}
++j;
}
if (j == size) {
WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id));
}
outer_continue:;
}
}
// Process bindings.
uint32_t uniform_sets_size = reflection_binding_set_uniforms_count.size();
using BT = SPIRType::BaseType;
// Always clearer than a boolean.
enum class Writable {
No,
Maybe,
};
// Returns a std::optional containing the value of the
// decoration, if it exists.
auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) {
uint32_t res = -1;
if (compiler.has_decoration(id, decoration)) {
res = compiler.get_decoration(id, decoration);
}
return res;
};
auto descriptor_bindings = [&compiler, &active, this, &set_indexes, uniform_sets_size, stage, &get_decoration](SmallVector<Resource> &p_resources, Writable p_writable) {
for (Resource const &res : p_resources) {
uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet);
uint32_t dbin = get_decoration(res.id, spv::DecorationBinding);
UniformData *found = nullptr;
if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets_size) {
uint32_t begin = set_indexes[dset];
uint32_t end = set_indexes[dset + 1];
for (uint32_t j = begin; j < end; j++) {
const ReflectionBindingData &ref_bind = reflection_binding_set_uniforms_data[j];
if (dbin == ref_bind.binding) {
found = &mtl_reflection_binding_set_uniforms_data.write[j];
break;
}
}
}
ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found");
bool is_active = active.find(res.id) != active.end();
if (is_active) {
found->active_stages |= 1 << stage;
}
BindingInfoData &primary = found->get_binding_for_stage(stage);
SPIRType const &a_type = compiler.get_type(res.type_id);
BT basetype = a_type.basetype;
switch (basetype) {
case BT::Struct: {
primary.data_type = MTLDataTypePointer;
} break;
case BT::Image:
case BT::SampledImage: {
primary.data_type = MTLDataTypeTexture;
} break;
case BT::Sampler: {
primary.data_type = MTLDataTypeSampler;
primary.array_length = 1;
for (uint32_t const &a : a_type.array) {
primary.array_length *= a;
}
} break;
default: {
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType");
} break;
}
// Find array length of image.
if (basetype == BT::Image || basetype == BT::SampledImage) {
primary.array_length = 1;
for (uint32_t const &a : a_type.array) {
primary.array_length *= a;
}
primary.is_multisampled = a_type.image.ms;
SPIRType::ImageType const &image = a_type.image;
primary.image_format = image.format;
switch (image.dim) {
case spv::Dim1D: {
if (image.arrayed) {
primary.texture_type = MTLTextureType1DArray;
} else {
primary.texture_type = MTLTextureType1D;
}
} break;
case spv::DimSubpassData: {
[[fallthrough]];
}
case spv::Dim2D: {
if (image.arrayed && image.ms) {
primary.texture_type = MTLTextureType2DMultisampleArray;
} else if (image.arrayed) {
primary.texture_type = MTLTextureType2DArray;
} else if (image.ms) {
primary.texture_type = MTLTextureType2DMultisample;
} else {
primary.texture_type = MTLTextureType2D;
}
} break;
case spv::Dim3D: {
primary.texture_type = MTLTextureType3D;
} break;
case spv::DimCube: {
if (image.arrayed) {
primary.texture_type = MTLTextureTypeCube;
}
} break;
case spv::DimRect: {
} break;
case spv::DimBuffer: {
// VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
primary.texture_type = MTLTextureTypeTextureBuffer;
} break;
case spv::DimTileImageDataEXT: {
// Godot does not use this extension.
// See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html
} break;
case spv::DimMax: {
// Add all enumerations to silence the compiler warning
// and generate future warnings, should a new one be added.
} break;
}
}
// Update writable.
if (p_writable == Writable::Maybe) {
if (basetype == BT::Struct) {
Bitset flags = compiler.get_buffer_block_flags(res.id);
if (!flags.get(spv::DecorationNonWritable)) {
if (flags.get(spv::DecorationNonReadable)) {
primary.access = MTLBindingAccessWriteOnly;
} else {
primary.access = MTLBindingAccessReadWrite;
}
}
} else if (basetype == BT::Image) {
switch (a_type.image.access) {
case spv::AccessQualifierWriteOnly:
primary.access = MTLBindingAccessWriteOnly;
break;
case spv::AccessQualifierReadWrite:
primary.access = MTLBindingAccessReadWrite;
break;
case spv::AccessQualifierReadOnly:
break;
case spv::AccessQualifierMax:
[[fallthrough]];
default:
if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) {
if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) {
primary.access = MTLBindingAccessWriteOnly;
} else {
primary.access = MTLBindingAccessReadWrite;
}
}
break;
}
}
}
switch (primary.access) {
case MTLBindingAccessReadOnly:
primary.usage = MTLResourceUsageRead;
break;
case MTLBindingAccessWriteOnly:
primary.usage = MTLResourceUsageWrite;
break;
case MTLBindingAccessReadWrite:
primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite;
break;
}
primary.index = compiler.get_automatic_msl_resource_binding(res.id);
// A sampled image contains two bindings, the primary
// is to the image, and the secondary is to the associated sampler.
if (basetype == BT::SampledImage) {
uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
if (binding != (uint32_t)-1) {
BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage);
secondary.data_type = MTLDataTypeSampler;
secondary.index = binding;
secondary.access = MTLBindingAccessReadOnly;
}
}
// An image may have a secondary binding if it is used
// for atomic operations.
if (basetype == BT::Image) {
uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
if (binding != (uint32_t)-1) {
BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage);
secondary.data_type = MTLDataTypePointer;
secondary.index = binding;
secondary.access = MTLBindingAccessReadWrite;
}
}
}
return Error::OK;
};
if (!resources.uniform_buffers.empty()) {
Error err = descriptor_bindings(resources.uniform_buffers, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.storage_buffers.empty()) {
Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.storage_images.empty()) {
Error err = descriptor_bindings(resources.storage_images, Writable::Maybe);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.sampled_images.empty()) {
Error err = descriptor_bindings(resources.sampled_images, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.separate_images.empty()) {
Error err = descriptor_bindings(resources.separate_images, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.separate_samplers.empty()) {
Error err = descriptor_bindings(resources.separate_samplers, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.subpass_inputs.empty()) {
Error err = descriptor_bindings(resources.subpass_inputs, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.push_constant_buffers.empty()) {
for (Resource const &res : resources.push_constant_buffers) {
uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id);
if (binding != (uint32_t)-1) {
stage_data.push_constant_binding = binding;
}
}
}
ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), false, "Atomic counters not supported");
ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), false, "Acceleration structures not supported");
ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), false, "Shader record buffers not supported");
if (!resources.stage_inputs.empty()) {
for (Resource const &res : resources.stage_inputs) {
uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id);
if (binding != (uint32_t)-1) {
stage_data.vertex_input_binding_mask |= 1 << binding;
}
}
}
stage_data.is_position_invariant = compiler.is_position_invariant();
stage_data.supports_fast_math = !entry_point.flags.get(spv::ExecutionModeSignedZeroInfNanPreserve);
stage_data.hash = SHA256Digest(source.c_str(), source.length());
stage_data.source_size = source.length();
::Vector<uint8_t> binary_data;
binary_data.resize(stage_data.source_size);
memcpy(binary_data.ptrw(), source.c_str(), stage_data.source_size);
if (export_mode) {
if (compiler_props.is_valid()) {
// Try to compile the Metal source code.
::Vector<uint8_t> library_data;
Error compile_err = compile_metal_source(source.c_str(), stage_data, library_data);
if (compile_err == OK) {
// If we successfully compiled to a `.metallib`, there are greater restrictions on target platforms,
// so we must update the properties.
stage_data.library_size = library_data.size();
binary_data.resize(stage_data.source_size + stage_data.library_size);
memcpy(binary_data.ptrw() + stage_data.source_size, library_data.ptr(), stage_data.library_size);
}
} else {
WARN_PRINT_ONCE("Metal shader baking limited to SPIR-V: Unable to determine toolchain properties to compile .metallib");
}
}
uint32_t binary_data_size = binary_data.size();
Shader &shader = shaders.write[i];
shader.shader_stage = stage;
shader.code_decompressed_size = binary_data_size;
shader.code_compressed_bytes.resize(binary_data_size);
uint32_t compressed_size = 0;
bool compressed = compress_code(binary_data.ptr(), binary_data_size, shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags);
ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", i));
shader.code_compressed_bytes.resize(compressed_size);
}
return true;
}
#pragma clang diagnostic pop
uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const {
if (p_bytes != nullptr) {
*(HeaderData *)p_bytes = mtl_reflection_data;
}
return sizeof(HeaderData);
}
uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
if (p_bytes != nullptr) {
*(UniformData *)p_bytes = mtl_reflection_binding_set_uniforms_data[p_index];
}
return sizeof(UniformData);
}
uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
if (p_bytes != nullptr) {
*(SpecializationData *)p_bytes = mtl_reflection_specialization_data[p_index];
}
return sizeof(SpecializationData);
}
uint32_t RenderingShaderContainerMetal::_to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
if (p_bytes != nullptr) {
*(StageData *)p_bytes = mtl_shaders[p_index];
}
return sizeof(StageData);
}
uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) {
mtl_reflection_data = *(HeaderData *)p_bytes;
return sizeof(HeaderData);
}
uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) {
mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size());
return 0;
}
uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
mtl_reflection_binding_set_uniforms_data.ptrw()[p_index] = *(UniformData *)p_bytes;
return sizeof(UniformData);
}
uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) {
mtl_reflection_specialization_data.resize(reflection_specialization_data.size());
return 0;
}
uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
mtl_reflection_specialization_data.ptrw()[p_index] = *(SpecializationData *)p_bytes;
return sizeof(SpecializationData);
}
uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data_start(const uint8_t *p_bytes) {
mtl_shaders.resize(shaders.size());
return 0;
}
uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
mtl_shaders.ptrw()[p_index] = *(StageData *)p_bytes;
return sizeof(StageData);
}
RenderingShaderContainerMetal::MetalShaderReflection RenderingShaderContainerMetal::get_metal_shader_reflection() const {
MetalShaderReflection res;
res.specialization_constants = mtl_reflection_specialization_data;
uint32_t uniform_set_count = reflection_binding_set_uniforms_count.size();
uint32_t start = 0;
res.uniform_sets.resize(uniform_set_count);
for (uint32_t i = 0; i < uniform_set_count; i++) {
Vector<UniformData> &set = res.uniform_sets.ptrw()[i];
uint32_t count = reflection_binding_set_uniforms_count.get(i);
set.resize(count);
memcpy(set.ptrw(), &mtl_reflection_binding_set_uniforms_data.ptr()[start], count * sizeof(UniformData));
start += count;
}
return res;
}
uint32_t RenderingShaderContainerMetal::_format() const {
return 0x42424242;
}
uint32_t RenderingShaderContainerMetal::_format_version() const {
return FORMAT_VERSION;
}
Ref<RenderingShaderContainer> RenderingShaderContainerFormatMetal::create_container() const {
Ref<RenderingShaderContainerMetal> result;
result.instantiate();
result->set_export_mode(export_mode);
result->set_device_profile(device_profile);
result->set_min_os_version(min_os_version);
return result;
}
RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatMetal::get_shader_language_version() const {
return SHADER_LANGUAGE_VULKAN_VERSION_1_1;
}
RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatMetal::get_shader_spirv_version() const {
return SHADER_SPIRV_VERSION_1_6;
}
RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export, const MinOsVersion p_min_os_version) :
export_mode(p_export), min_os_version(p_min_os_version), device_profile(p_device_profile) {
}
String MinOsVersion::to_compiler_os_version() const {
if (version == UINT32_MAX) {
return "";
}
uint32_t major = version / 10000;
uint32_t minor = (version % 10000) / 100;
return vformat("%d.%d", major, minor);
}
MinOsVersion::MinOsVersion(const String &p_version) {
int pos = p_version.find_char('.');
if (pos > 0) {
version = (uint32_t)(p_version.substr(0, pos).to_int() * 10000 +
p_version.substr(pos + 1).to_int() * 100);
} else {
version = (uint32_t)(p_version.to_int() * 10000);
}
if (version == 0) {
version = UINT32_MAX;
}
}

View File

@@ -0,0 +1,75 @@
/**************************************************************************/
/* sha256_digest.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#import <CommonCrypto/CommonDigest.h>
#import <simd/simd.h>
#import <zlib.h>
#include "core/templates/local_vector.h"
struct SHA256Digest {
unsigned char data[CC_SHA256_DIGEST_LENGTH];
static constexpr size_t serialized_size() { return CC_SHA256_DIGEST_LENGTH; }
uint32_t hash() const {
uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH);
return c;
}
SHA256Digest() {
bzero(data, CC_SHA256_DIGEST_LENGTH);
}
SHA256Digest(const char *p_hash) {
memcpy(data, p_hash, CC_SHA256_DIGEST_LENGTH);
}
SHA256Digest(const char *p_data, size_t p_length) {
CC_SHA256(p_data, (CC_LONG)p_length, data);
}
_FORCE_INLINE_ uint32_t short_sha() const {
return __builtin_bswap32(*(uint32_t *)&data[0]);
}
LocalVector<uint8_t> serialize() const {
LocalVector<uint8_t> result;
result.resize(CC_SHA256_DIGEST_LENGTH);
memcpy(result.ptr(), data, CC_SHA256_DIGEST_LENGTH);
return result;
}
static SHA256Digest deserialize(LocalVector<uint8_t> p_ser) {
return SHA256Digest((const char *)p_ser.ptr());
}
};