Merge pull request #11789 from Kelebek1/spirv_shift_right
Manually robust on Maxwell and earlier
This commit is contained in:
commit
4b06bcc82c
@ -111,16 +111,33 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr,
|
|||||||
} else if (element_size > 1) {
|
} else if (element_size > 1) {
|
||||||
const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
|
const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
|
||||||
const Id shift{ctx.Const(log2_element_size)};
|
const Id shift{ctx.Const(log2_element_size)};
|
||||||
buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
|
buffer_offset = ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), shift);
|
||||||
} else {
|
} else {
|
||||||
buffer_offset = ctx.Def(offset);
|
buffer_offset = ctx.Def(offset);
|
||||||
}
|
}
|
||||||
if (!binding.IsImmediate()) {
|
if (!binding.IsImmediate()) {
|
||||||
return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset);
|
return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
|
const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
|
||||||
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)};
|
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)};
|
||||||
return ctx.OpLoad(result_type, access_chain);
|
const Id val = ctx.OpLoad(result_type, access_chain);
|
||||||
|
|
||||||
|
if (offset.IsImmediate() || !ctx.profile.has_broken_robust) {
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto is_float = UniformDefinitions::IsFloat(member_ptr);
|
||||||
|
const auto num_elements = UniformDefinitions::NumElements(member_ptr);
|
||||||
|
const std::array zero_vec{
|
||||||
|
is_float ? ctx.Const(0.0f) : ctx.Const(0u),
|
||||||
|
is_float ? ctx.Const(0.0f) : ctx.Const(0u),
|
||||||
|
is_float ? ctx.Const(0.0f) : ctx.Const(0u),
|
||||||
|
is_float ? ctx.Const(0.0f) : ctx.Const(0u),
|
||||||
|
};
|
||||||
|
const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu));
|
||||||
|
const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements));
|
||||||
|
return ctx.OpSelect(result_type, cond, val, zero);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
|
Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
|
||||||
@ -138,7 +155,7 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde
|
|||||||
const u32 element{(offset.U32() / 4) % 4 + index_offset};
|
const u32 element{(offset.U32() / 4) % 4 + index_offset};
|
||||||
return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
|
return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
|
||||||
}
|
}
|
||||||
const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
|
const Id shift{ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
|
||||||
Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
|
Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
|
||||||
if (index_offset > 0) {
|
if (index_offset > 0) {
|
||||||
element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
|
element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
|
||||||
|
@ -64,6 +64,42 @@ struct UniformDefinitions {
|
|||||||
Id F32{};
|
Id F32{};
|
||||||
Id U32x2{};
|
Id U32x2{};
|
||||||
Id U32x4{};
|
Id U32x4{};
|
||||||
|
|
||||||
|
constexpr static size_t NumElements(Id UniformDefinitions::*member_ptr) {
|
||||||
|
if (member_ptr == &UniformDefinitions::U8) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (member_ptr == &UniformDefinitions::S8) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (member_ptr == &UniformDefinitions::U16) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (member_ptr == &UniformDefinitions::S16) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (member_ptr == &UniformDefinitions::U32) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (member_ptr == &UniformDefinitions::F32) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (member_ptr == &UniformDefinitions::U32x2) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if (member_ptr == &UniformDefinitions::U32x4) {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
ASSERT(false);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr static bool IsFloat(Id UniformDefinitions::*member_ptr) {
|
||||||
|
if (member_ptr == &UniformDefinitions::F32) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct StorageTypeDefinition {
|
struct StorageTypeDefinition {
|
||||||
|
@ -9,7 +9,6 @@ namespace Shader {
|
|||||||
|
|
||||||
struct Profile {
|
struct Profile {
|
||||||
u32 supported_spirv{0x00010000};
|
u32 supported_spirv{0x00010000};
|
||||||
|
|
||||||
bool unified_descriptor_binding{};
|
bool unified_descriptor_binding{};
|
||||||
bool support_descriptor_aliasing{};
|
bool support_descriptor_aliasing{};
|
||||||
bool support_int8{};
|
bool support_int8{};
|
||||||
@ -82,6 +81,9 @@ struct Profile {
|
|||||||
bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
|
bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
|
||||||
|
|
||||||
u32 gl_max_compute_smem_size{};
|
u32 gl_max_compute_smem_size{};
|
||||||
|
|
||||||
|
/// Maxwell and earlier nVidia architectures have broken robust support
|
||||||
|
bool has_broken_robust{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
@ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
|||||||
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
|
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
|
||||||
.ignore_nan_fp_comparisons = false,
|
.ignore_nan_fp_comparisons = false,
|
||||||
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
|
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
|
||||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY};
|
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
|
||||||
|
.has_broken_robust =
|
||||||
|
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell,
|
||||||
|
};
|
||||||
|
|
||||||
host_info = Shader::HostTranslateInfo{
|
host_info = Shader::HostTranslateInfo{
|
||||||
.support_float64 = device.IsFloat64Supported(),
|
.support_float64 = device.IsFloat64Supported(),
|
||||||
.support_float16 = device.IsFloat16Supported(),
|
.support_float16 = device.IsFloat16Supported(),
|
||||||
|
@ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
|
|||||||
|
|
||||||
} // namespace Alternatives
|
} // namespace Alternatives
|
||||||
|
|
||||||
enum class NvidiaArchitecture {
|
|
||||||
KeplerOrOlder,
|
|
||||||
Maxwell,
|
|
||||||
Pascal,
|
|
||||||
Volta,
|
|
||||||
Turing,
|
|
||||||
AmpereOrNewer,
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void SetNext(void**& next, T& data) {
|
void SetNext(void**& next, T& data) {
|
||||||
*next = &data;
|
*next = &data;
|
||||||
@ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
|
|||||||
if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
|
if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
|
||||||
// Only Ampere and newer support this feature
|
// Only Ampere and newer support this feature
|
||||||
// TODO: Find a way to differentiate Ampere and Ada
|
// TODO: Find a way to differentiate Ampere and Ada
|
||||||
return NvidiaArchitecture::AmpereOrNewer;
|
return NvidiaArchitecture::Arch_AmpereOrNewer;
|
||||||
}
|
}
|
||||||
return NvidiaArchitecture::Turing;
|
return NvidiaArchitecture::Arch_Turing;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) {
|
if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) {
|
||||||
@ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
|
|||||||
physical_properties.pNext = &advanced_blending_props;
|
physical_properties.pNext = &advanced_blending_props;
|
||||||
physical.GetProperties2(physical_properties);
|
physical.GetProperties2(physical_properties);
|
||||||
if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) {
|
if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) {
|
||||||
return NvidiaArchitecture::Maxwell;
|
return NvidiaArchitecture::Arch_Maxwell;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) {
|
if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) {
|
||||||
@ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
|
|||||||
physical_properties.pNext = &conservative_raster_props;
|
physical_properties.pNext = &conservative_raster_props;
|
||||||
physical.GetProperties2(physical_properties);
|
physical.GetProperties2(physical_properties);
|
||||||
if (conservative_raster_props.degenerateLinesRasterized) {
|
if (conservative_raster_props.degenerateLinesRasterized) {
|
||||||
return NvidiaArchitecture::Volta;
|
return NvidiaArchitecture::Arch_Volta;
|
||||||
}
|
}
|
||||||
return NvidiaArchitecture::Pascal;
|
return NvidiaArchitecture::Arch_Pascal;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NvidiaArchitecture::KeplerOrOlder;
|
return NvidiaArchitecture::Arch_KeplerOrOlder;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<const char*> ExtensionListForVulkan(
|
std::vector<const char*> ExtensionListForVulkan(
|
||||||
@ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||||||
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
|
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_nvidia) {
|
||||||
|
nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions);
|
||||||
|
}
|
||||||
|
|
||||||
SetupFamilies(surface);
|
SetupFamilies(surface);
|
||||||
const auto queue_cis = GetDeviceQueueCreateInfos();
|
const auto queue_cis = GetDeviceQueueCreateInfos();
|
||||||
|
|
||||||
@ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||||||
|
|
||||||
if (is_nvidia) {
|
if (is_nvidia) {
|
||||||
const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
|
const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
|
||||||
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
|
const auto arch = GetNvidiaArch();
|
||||||
if (arch >= NvidiaArchitecture::AmpereOrNewer) {
|
if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) {
|
||||||
LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
|
LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
|
||||||
features.shader_float16_int8.shaderFloat16 = false;
|
features.shader_float16_int8.shaderFloat16 = false;
|
||||||
} else if (arch <= NvidiaArchitecture::Volta) {
|
} else if (arch <= NvidiaArchitecture::Arch_Volta) {
|
||||||
if (nv_major_version < 527) {
|
if (nv_major_version < 527) {
|
||||||
LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
|
LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
|
||||||
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||||
@ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||||||
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||||
}
|
}
|
||||||
} else if (extensions.push_descriptor && is_nvidia) {
|
} else if (extensions.push_descriptor && is_nvidia) {
|
||||||
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
|
const auto arch = GetNvidiaArch();
|
||||||
if (arch <= NvidiaArchitecture::Pascal) {
|
if (arch <= NvidiaArchitecture::Arch_Pascal) {
|
||||||
LOG_WARNING(Render_Vulkan,
|
LOG_WARNING(Render_Vulkan,
|
||||||
"Pascal and older architectures have broken VK_KHR_push_descriptor");
|
"Pascal and older architectures have broken VK_KHR_push_descriptor");
|
||||||
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||||
|
@ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer };
|
|||||||
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
|
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
|
||||||
const u32 GuestWarpSize = 32;
|
const u32 GuestWarpSize = 32;
|
||||||
|
|
||||||
|
enum class NvidiaArchitecture {
|
||||||
|
Arch_KeplerOrOlder,
|
||||||
|
Arch_Maxwell,
|
||||||
|
Arch_Pascal,
|
||||||
|
Arch_Volta,
|
||||||
|
Arch_Turing,
|
||||||
|
Arch_AmpereOrNewer,
|
||||||
|
};
|
||||||
|
|
||||||
/// Handles data specific to a physical device.
|
/// Handles data specific to a physical device.
|
||||||
class Device {
|
class Device {
|
||||||
public:
|
public:
|
||||||
@ -670,6 +679,14 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsNvidia() const noexcept {
|
||||||
|
return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
|
||||||
|
}
|
||||||
|
|
||||||
|
NvidiaArchitecture GetNvidiaArch() const noexcept {
|
||||||
|
return nvidia_arch;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Checks if the physical device is suitable and configures the object state
|
/// Checks if the physical device is suitable and configures the object state
|
||||||
/// with all necessary info about its properties.
|
/// with all necessary info about its properties.
|
||||||
@ -788,6 +805,7 @@ private:
|
|||||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||||
u32 sets_per_pool{}; ///< Sets per Description Pool
|
u32 sets_per_pool{}; ///< Sets per Description Pool
|
||||||
|
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
||||||
|
|
||||||
// Telemetry parameters
|
// Telemetry parameters
|
||||||
std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
|
std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user