Disable ubershaders on problematic Adreno compiler versions.

This commit is contained in:
Skyth
2026-05-21 14:13:48 +03:00
parent 3bfa50fd32
commit c6987624ef
15 changed files with 124 additions and 65 deletions
@@ -180,7 +180,6 @@ Error RenderingContextDriverD3D12::_initialize_devices() {
Device &device = driver_devices[i];
device.name = desc.Description;
device.vendor = desc.VendorId;
device.workarounds = Workarounds();
if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
device.type = DEVICE_TYPE_CPU;
@@ -70,7 +70,6 @@ Error RenderingContextDriverMetal::initialize() {
#endif
device.type = DEVICE_TYPE_INTEGRATED_GPU;
device.vendor = Vendor::VENDOR_APPLE;
device.workarounds = Workarounds();
MetalDeviceProperties props(metal_device);
int version = (int)props.features.highestFamily - (int)MTL::GPUFamilyApple1 + 1;
@@ -852,9 +852,6 @@ Error RenderingContextDriverVulkan::_initialize_devices() {
driver_device.name = String::utf8(props.deviceName);
driver_device.vendor = props.vendorID;
driver_device.type = DeviceType(props.deviceType);
driver_device.workarounds = Workarounds();
_check_driver_workarounds(props, driver_device);
uint32_t queue_family_properties_count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &queue_family_properties_count, nullptr);
@@ -868,31 +865,6 @@ Error RenderingContextDriverVulkan::_initialize_devices() {
return OK;
}
void RenderingContextDriverVulkan::_check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device) {
// Workaround for the Adreno 6XX family of devices.
//
// There's a known issue with the Vulkan driver in this family of devices where it'll crash if a dynamic state for drawing is
// used in a command buffer before a dispatch call is issued. As both dynamic scissor and viewport are basic requirements for
// the engine to not bake this state into the PSO, the only known way to fix this issue is to reset the command buffer entirely.
//
// As the render graph has no built in limitations of whether it'll issue compute work before anything needs to draw on the
// frame, and there's no guarantee that compute work will never be dependent on rasterization in the future, this workaround
// will end recording on the current command buffer any time a compute list is encountered after a draw list was executed.
// A new command buffer will be created afterwards and the appropriate synchronization primitives will be inserted.
//
// Executing this workaround has the added cost of synchronization between all the command buffers that are created as well as
// all the individual submissions. This performance hit is accepted for the sake of being able to support these devices without
// limiting the design of the renderer.
//
// This bug was fixed in driver version 512.503.0, so we only enabled it on devices older than this.
//
r_device.workarounds.avoid_compute_after_draw =
r_device.vendor == Vendor::VENDOR_QUALCOMM &&
p_device_properties.deviceID >= 0x6000000 && // Adreno 6xx
p_device_properties.driverVersion < VK_MAKE_VERSION(512, 503, 0) &&
r_device.name.find("Turnip") < 0;
}
bool RenderingContextDriverVulkan::_use_validation_layers() const {
return Engine::get_singleton()->is_validation_layers_enabled();
}
@@ -113,7 +113,6 @@ private:
Error _initialize_instance_extensions();
Error _initialize_instance();
Error _initialize_devices();
void _check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device);
// Static callbacks.
static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data);
@@ -615,6 +615,8 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() {
}
}
_register_requested_device_extension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false);
uint32_t device_extension_count = 0;
VkResult err = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &device_extension_count, nullptr);
ERR_FAIL_COND_V_MSG(err != VK_SUCCESS, ERR_CANT_CREATE, vformat("Couldn't get Vulkan device extension count (VkResult error %d).", err));
@@ -684,6 +686,81 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() {
return OK;
}
void RenderingDeviceDriverVulkan::_check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, const VkPhysicalDeviceDriverPropertiesKHR *p_driver_properties) {
// Workaround a driver bug on Adreno 5XX GPUs that causes a crash when
// there are empty descriptor set layouts placed between non-empty ones.
adreno_5xx_empty_descriptor_set_layout_workaround =
p_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM &&
p_device_properties.deviceID >= 0x5000000 &&
p_device_properties.deviceID < 0x6000000;
// Workaround for the Adreno 6XX family of devices.
//
// There's a known issue with the Vulkan driver in this family of devices where it'll crash if a dynamic state for drawing is
// used in a command buffer before a dispatch call is issued. As both dynamic scissor and viewport are basic requirements for
// the engine to not bake this state into the PSO, the only known way to fix this issue is to reset the command buffer entirely.
//
// As the render graph has no built in limitations of whether it'll issue compute work before anything needs to draw on the
// frame, and there's no guarantee that compute work will never be dependent on rasterization in the future, this workaround
// will end recording on the current command buffer any time a compute list is encountered after a draw list was executed.
// A new command buffer will be created afterwards and the appropriate synchronization primitives will be inserted.
//
// Executing this workaround has the added cost of synchronization between all the command buffers that are created as well as
// all the individual submissions. This performance hit is accepted for the sake of being able to support these devices without
// limiting the design of the renderer.
//
// This bug was fixed in driver version 512.503.0, so we only enabled it on devices older than this.
//
driver_workarounds.avoid_compute_after_draw =
p_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM &&
p_device_properties.deviceID >= 0x6000000 && // Adreno 6xx
p_device_properties.driverVersion < VK_MAKE_VERSION(512, 503, 0) &&
strstr(p_device_properties.deviceName, "Turnip") == nullptr;
// Workaround a driver bug on Adreno 730 GPUs that keeps leaking memory on each call to vkResetDescriptorPool.
// Which eventually run out of memory. In such case we should not be using linear allocated pools
// Bug introduced in driver 512.597.0 and fixed in 512.671.0.
// Confirmed by Qualcomm.
if (linear_descriptor_pools_enabled) {
const uint32_t reset_descriptor_pool_broken_driver_begin = VK_MAKE_VERSION(512u, 597u, 0u);
const uint32_t reset_descriptor_pool_fixed_driver_begin = VK_MAKE_VERSION(512u, 671u, 0u);
linear_descriptor_pools_enabled =
p_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM &&
(p_device_properties.driverVersion < reset_descriptor_pool_broken_driver_begin || p_device_properties.driverVersion > reset_descriptor_pool_fixed_driver_begin);
}
if (p_driver_properties != nullptr) {
// Workaround for Adreno drivers where ubershaders with a lot of constant literals crash the compiler.
driver_workarounds.disable_ubershaders =
p_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM &&
strstr(p_driver_properties->driverInfo, "Compiler Version: EV031.32.02.") != nullptr;
}
}
void RenderingDeviceDriverVulkan::_get_device_properties() {
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
if (functions.GetPhysicalDeviceProperties2 != nullptr && enabled_device_extension_names.has(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME)) {
VkPhysicalDeviceProperties2KHR device_props = {};
device_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
VkPhysicalDeviceDriverPropertiesKHR driver_props = {};
driver_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR;
device_props.pNext = &driver_props;
functions.GetPhysicalDeviceProperties2(physical_device, &device_props);
physical_device_properties = device_props.properties;
_check_driver_workarounds(physical_device_properties, &driver_props);
} else {
vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties);
_check_driver_workarounds(physical_device_properties, nullptr);
}
}
Error RenderingDeviceDriverVulkan::_check_device_features() {
vkGetPhysicalDeviceFeatures(physical_device, &physical_device_features);
@@ -1742,25 +1819,6 @@ void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, u
Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t p_frame_count) {
context_device = context_driver->device_get(p_device_index);
physical_device = context_driver->physical_device_get(p_device_index);
vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties);
// Workaround a driver bug on Adreno 730 GPUs that keeps leaking memory on each call to vkResetDescriptorPool.
// Which eventually run out of memory. In such case we should not be using linear allocated pools
// Bug introduced in driver 512.597.0 and fixed in 512.671.0.
// Confirmed by Qualcomm.
if (linear_descriptor_pools_enabled) {
const uint32_t reset_descriptor_pool_broken_driver_begin = VK_MAKE_VERSION(512u, 597u, 0u);
const uint32_t reset_descriptor_pool_fixed_driver_begin = VK_MAKE_VERSION(512u, 671u, 0u);
linear_descriptor_pools_enabled = physical_device_properties.driverVersion < reset_descriptor_pool_broken_driver_begin || physical_device_properties.driverVersion > reset_descriptor_pool_fixed_driver_begin;
}
// Workaround a driver bug on Adreno 5XX GPUs that causes a crash when
// there are empty descriptor set layouts placed between non-empty ones.
adreno_5xx_empty_descriptor_set_layout_workaround =
physical_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM &&
physical_device_properties.deviceID >= 0x5000000 &&
physical_device_properties.deviceID < 0x6000000;
frame_count = p_frame_count;
// Copy the queue family properties the context already retrieved.
@@ -1773,6 +1831,8 @@ Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t
Error err = _initialize_device_extensions();
ERR_FAIL_COND_V_MSG(err != OK, err, "Couldn't initialize Vulkan device extensions. This may be caused by an incompatible or outdated graphics driver.");
_get_device_properties();
err = _check_device_features();
ERR_FAIL_COND_V_MSG(err != OK, err, "Couldn't initialize Vulkan device features. This may be caused by an incompatible or outdated graphics driver.");
@@ -7388,6 +7448,10 @@ bool RenderingDeviceDriverVulkan::is_composite_alpha_supported(CommandQueueID p_
return false;
}
RenderingDeviceDriver::DriverWorkarounds RenderingDeviceDriverVulkan::get_driver_workarounds() const {
return driver_workarounds;
}
/******************/
RenderingDeviceDriverVulkan::RenderingDeviceDriverVulkan(RenderingContextDriverVulkan *p_context_driver) {
@@ -187,9 +187,12 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver {
};
PipelineStatistics pipeline_statistics;
DriverWorkarounds driver_workarounds;
void _register_requested_device_extension(const CharString &p_extension_name, bool p_required);
Error _initialize_device_extensions();
void _check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, const VkPhysicalDeviceDriverPropertiesKHR *p_driver_properties);
void _get_device_properties();
Error _check_device_features();
Error _check_device_capabilities();
void _choose_vrs_capabilities();
@@ -798,6 +801,8 @@ public:
virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final;
virtual DriverWorkarounds get_driver_workarounds() const override final;
private:
/*********************/
/**** BOOKKEEPING ****/
@@ -2564,7 +2564,7 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr
RID pipeline_rd;
RID vertex_array_rd;
RID index_array_rd;
const uint32_t ubershader_iterations = 2;
const uint32_t ubershader_iterations = (disable_ubershaders ? 1 : 2);
bool pipeline_valid = false;
while (pipeline_key.ubershader < ubershader_iterations) {
// Skeleton and blend shape.
@@ -2589,7 +2589,7 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr
if (shader != prev_shader || pipeline_hash != prev_pipeline_hash) {
RSE::PipelineSource pipeline_source = pipeline_key.ubershader ? RSE::PIPELINE_SOURCE_DRAW : RSE::PIPELINE_SOURCE_SPECIALIZATION;
pipeline_rd = shader->pipeline_hash_map.get_pipeline(pipeline_key, pipeline_hash, pipeline_key.ubershader, pipeline_source);
pipeline_rd = shader->pipeline_hash_map.get_pipeline(pipeline_key, pipeline_hash, pipeline_key.ubershader || disable_ubershaders, pipeline_source);
if (pipeline_rd.is_valid()) {
pipeline_valid = true;
@@ -3324,6 +3324,10 @@ static RD::FramebufferFormatID _get_shadow_atlas_framebuffer_format_for_pipeline
}
void RenderForwardMobile::_mesh_compile_pipeline_for_surface(SceneShaderForwardMobile::ShaderData *p_shader, void *p_mesh_surface, bool p_instanced_surface, RSE::PipelineSource p_source, SceneShaderForwardMobile::ShaderData::PipelineKey &r_pipeline_key, Vector<ShaderPipelinePair> *r_pipeline_pairs) {
if (disable_ubershaders) {
return;
}
RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton();
uint64_t input_mask = p_shader->get_vertex_input_mask(r_pipeline_key.version, true);
bool emulate_point_size = p_shader->uses_point_size && scene_shader.emulate_point_size;
@@ -3546,6 +3550,13 @@ void RenderForwardMobile::_update_shader_quality_settings() {
RenderForwardMobile::RenderForwardMobile() {
singleton = this;
disable_ubershaders = RD::get_singleton()->get_driver_workarounds().disable_ubershaders;
if (disable_ubershaders) {
print_verbose("Ubershaders: Disabled");
} else {
print_verbose("Ubershaders: Enabled");
}
sky.set_texture_format(_render_buffers_get_preferred_color_format());
String defines;
@@ -73,6 +73,7 @@ private:
/* Scene Shader */
SceneShaderForwardMobile scene_shader;
bool disable_ubershaders = false;
/* Render Buffer */
@@ -88,15 +88,10 @@ public:
DEVICE_TYPE_MAX = 0x5
};
struct Workarounds {
bool avoid_compute_after_draw = false;
};
struct Device {
String name = "Unknown";
uint32_t vendor = Vendor::VENDOR_UNKNOWN;
DeviceType type = DEVICE_TYPE_OTHER;
Workarounds workarounds;
};
virtual ~RenderingContextDriver();
+5 -1
View File
@@ -7841,6 +7841,10 @@ String RenderingDevice::get_device_pipeline_cache_uuid() const {
return driver->get_pipeline_cache_uuid();
}
RenderingDevice::DriverWorkarounds RenderingDevice::get_driver_workarounds() const {
return driver->get_driver_workarounds();
}
void RenderingDevice::swap_buffers(bool p_present) {
ERR_RENDER_THREAD_GUARD();
@@ -8502,7 +8506,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
driver->command_buffer_begin(frames[0].command_buffer);
// Create draw graph and start it initialized as well.
draw_graph.initialize(driver, device, &_render_pass_create_from_graph, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME);
draw_graph.initialize(driver, &_render_pass_create_from_graph, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME);
draw_graph.begin();
for (uint32_t i = 0; i < frames.size(); i++) {
+2
View File
@@ -1948,6 +1948,8 @@ public:
String get_device_api_version() const;
String get_device_pipeline_cache_uuid() const;
DriverWorkarounds get_driver_workarounds() const;
uint64_t get_frames_drawn() const { return frames_drawn; }
bool is_composite_alpha_supported() const;
@@ -1049,6 +1049,12 @@ public:
SUBGROUP_QUAD_BIT = 128,
};
// Driver workarounds that require higher level code and cannot be solely implemented in RenderingDeviceDriver.
struct DriverWorkarounds {
bool avoid_compute_after_draw = false;
bool disable_ubershaders = false;
};
////////////////////////////////////////////
// PROTECTED STUFF
// Not exposed by RenderingDevice, but shared
@@ -950,6 +950,8 @@ public:
virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const { return false; }
virtual DriverWorkarounds get_driver_workarounds() const { return DriverWorkarounds(); }
/******************/
virtual ~RenderingDeviceDriver();
+5 -5
View File
@@ -1117,7 +1117,7 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC
_run_raytracing_list_command(r_command_buffer, raytracing_list_command->instruction_data(), raytracing_list_command->instruction_data_size);
} break;
case RecordedCommand::TYPE_COMPUTE_LIST: {
if (device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) {
if (driver_workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) {
// Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information.
workarounds_state.draw_list_found = false;
@@ -1141,7 +1141,7 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC
_run_compute_list_command(r_command_buffer, compute_list_command->instruction_data(), compute_list_command->instruction_data_size);
} break;
case RecordedCommand::TYPE_DRAW_LIST: {
if (device.workarounds.avoid_compute_after_draw) {
if (driver_workarounds.avoid_compute_after_draw) {
// Indicate that a draw list was encountered for the workaround.
workarounds_state.draw_list_found = true;
}
@@ -1712,13 +1712,13 @@ void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data
}
}
void RenderingDeviceGraph::initialize(RDD *p_driver, RenderingContextDriver::Device p_device, RenderPassCreationFunction p_render_pass_creation_function, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame) {
void RenderingDeviceGraph::initialize(RDD *p_driver, RenderPassCreationFunction p_render_pass_creation_function, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame) {
DEV_ASSERT(p_driver != nullptr);
DEV_ASSERT(p_render_pass_creation_function != nullptr);
DEV_ASSERT(p_frame_count > 0);
driver = p_driver;
device = p_device;
driver_workarounds = p_driver->get_driver_workarounds();
render_pass_creation_function = p_render_pass_creation_function;
frames.resize(p_frame_count);
@@ -2703,7 +2703,7 @@ void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RD
int32_t current_label_level = -1;
_run_label_command_change(r_command_buffer, -1, -1, true, true, nullptr, 0, current_label_index, current_label_level);
if (device.workarounds.avoid_compute_after_draw) {
if (driver_workarounds.avoid_compute_after_draw) {
// Reset the state of the workaround.
workarounds_state.draw_list_found = false;
}
+2 -2
View File
@@ -811,7 +811,7 @@ private:
};
RDD *driver = nullptr;
RenderingContextDriver::Device device;
RDD::DriverWorkarounds driver_workarounds;
RenderPassCreationFunction render_pass_creation_function = nullptr;
int64_t tracking_frame = 0;
LocalVector<uint8_t> command_data;
@@ -887,7 +887,7 @@ private:
public:
RenderingDeviceGraph();
~RenderingDeviceGraph();
void initialize(RDD *p_driver, RenderingContextDriver::Device p_device, RenderPassCreationFunction p_render_pass_creation_function, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame);
void initialize(RDD *p_driver, RenderPassCreationFunction p_render_pass_creation_function, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame);
void finalize();
void begin();
void add_blas_build(RDD::AccelerationStructureID p_blas, RDD::BufferID p_scratch_buffer, ResourceTracker *p_dst_tracker, VectorView<ResourceTracker *> p_src_trackers);