shader/memory: Implement ATOM.ADD
ATOM operates atomically on global memory. For now only add ATOM.ADD since that's what was found in commercial games. This asserts for ATOM.ADD.S32 (handling the others as unimplemented), although ATOM.ADD.U32 shouldn't be any different. This change forces us to change the default type on SPIR-V storage buffers from float to uint. We could also alias the buffers, but it's simpler for now to just use uint. While we are at it, abstract the code to avoid repetition.
This commit is contained in:
parent
05df4a8c94
commit
d95d4ac843
@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
|
||||
Exch = 8,
|
||||
};
|
||||
|
||||
enum class GlobalAtomicOp : u64 {
|
||||
Add = 0,
|
||||
Min = 1,
|
||||
Max = 2,
|
||||
Inc = 3,
|
||||
Dec = 4,
|
||||
And = 5,
|
||||
Or = 6,
|
||||
Xor = 7,
|
||||
Exch = 8,
|
||||
SafeAdd = 10,
|
||||
};
|
||||
|
||||
enum class GlobalAtomicType : u64 {
|
||||
U32 = 0,
|
||||
S32 = 1,
|
||||
U64 = 2,
|
||||
F32_FTZ_RN = 3,
|
||||
F16x2_FTZ_RN = 4,
|
||||
S64 = 5,
|
||||
};
|
||||
|
||||
enum class UniformType : u64 {
|
||||
UnsignedByte = 0,
|
||||
SignedByte = 1,
|
||||
@ -957,6 +979,12 @@ union Instruction {
|
||||
BitField<46, 2, u64> cache_mode;
|
||||
} stg;
|
||||
|
||||
union {
|
||||
BitField<52, 4, GlobalAtomicOp> operation;
|
||||
BitField<49, 3, GlobalAtomicType> type;
|
||||
BitField<28, 20, s64> offset;
|
||||
} atom;
|
||||
|
||||
union {
|
||||
BitField<52, 4, AtomicOp> operation;
|
||||
BitField<28, 2, AtomicType> type;
|
||||
@ -1690,6 +1718,7 @@ public:
|
||||
ST_S,
|
||||
ST, // Store in generic memory
|
||||
STG, // Store in global memory
|
||||
ATOM, // Atomic operation on global memory
|
||||
ATOMS, // Atomic operation on shared memory
|
||||
AL2P, // Transforms attribute memory into physical memory
|
||||
TEX,
|
||||
@ -1994,6 +2023,7 @@ private:
|
||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
|
||||
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||
|
@ -1858,10 +1858,7 @@ private:
|
||||
|
||||
template <const std::string_view& opname, Type type>
|
||||
Expression Atomic(Operation operation) {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
auto& smem = std::get<SmemNode>(*operation[0]);
|
||||
|
||||
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
|
||||
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
|
||||
Visit(operation[1]).As(type)),
|
||||
type};
|
||||
}
|
||||
|
@ -1123,15 +1123,7 @@ private:
|
||||
}
|
||||
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
|
||||
const Id real = AsUint(Visit(gmem->GetRealAddress()));
|
||||
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
|
||||
|
||||
Id offset = OpISub(t_uint, real, base);
|
||||
offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
|
||||
return {OpLoad(t_float,
|
||||
OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
|
||||
Type::Float};
|
||||
return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
|
||||
}
|
||||
|
||||
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
||||
@ -1142,10 +1134,7 @@ private:
|
||||
}
|
||||
|
||||
if (const auto smem = std::get_if<SmemNode>(&*node)) {
|
||||
Id address = AsUint(Visit(smem->GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
|
||||
return {OpLoad(t_uint, pointer), Type::Uint};
|
||||
return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
|
||||
}
|
||||
|
||||
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
|
||||
@ -1339,20 +1328,10 @@ private:
|
||||
target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
|
||||
|
||||
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
Id address = AsUint(Visit(smem->GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
|
||||
target = {GetSharedMemoryPointer(*smem), Type::Uint};
|
||||
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||
const Id real = AsUint(Visit(gmem->GetRealAddress()));
|
||||
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
|
||||
const Id diff = OpISub(t_uint, real, base);
|
||||
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
|
||||
|
||||
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
|
||||
target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
|
||||
Type::Float};
|
||||
target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
|
||||
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
@ -1804,11 +1783,16 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression UAtomicAdd(Operation operation) {
|
||||
const auto& smem = std::get<SmemNode>(*operation[0]);
|
||||
Id address = AsUint(Visit(smem.GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
|
||||
Expression AtomicAdd(Operation operation) {
|
||||
Id pointer;
|
||||
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||
pointer = GetSharedMemoryPointer(*smem);
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||
pointer = GetGlobalMemoryPointer(*gmem);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
return {Constant(t_uint, 0), Type::Uint};
|
||||
}
|
||||
|
||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||
const Id semantics = Constant(t_uint, 0U);
|
||||
@ -2243,6 +2227,22 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Id GetGlobalMemoryPointer(const GmemNode& gmem) {
|
||||
const Id real = AsUint(Visit(gmem.GetRealAddress()));
|
||||
const Id base = AsUint(Visit(gmem.GetBaseAddress()));
|
||||
const Id diff = OpISub(t_uint, real, base);
|
||||
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
|
||||
const Id buffer = global_buffers.at(gmem.GetDescriptor());
|
||||
return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
|
||||
}
|
||||
|
||||
Id GetSharedMemoryPointer(const SmemNode& smem) {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
Id address = AsUint(Visit(smem.GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
return OpAccessChain(t_smem_uint, shared_memory, address);
|
||||
}
|
||||
|
||||
static constexpr std::array operation_decompilers = {
|
||||
&SPIRVDecompiler::Assign,
|
||||
|
||||
@ -2389,7 +2389,7 @@ private:
|
||||
&SPIRVDecompiler::AtomicImageXor,
|
||||
&SPIRVDecompiler::AtomicImageExchange,
|
||||
|
||||
&SPIRVDecompiler::UAtomicAdd,
|
||||
&SPIRVDecompiler::AtomicAdd,
|
||||
|
||||
&SPIRVDecompiler::Branch,
|
||||
&SPIRVDecompiler::BranchIndirect,
|
||||
@ -2485,9 +2485,9 @@ private:
|
||||
|
||||
Id t_smem_uint{};
|
||||
|
||||
const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
|
||||
const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
|
||||
const Id t_gmem_array =
|
||||
Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray");
|
||||
Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
|
||||
const Id t_gmem_struct = MemberDecorate(
|
||||
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
||||
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
||||
|
@ -19,6 +19,8 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::AtomicOp;
|
||||
using Tegra::Shader::AtomicType;
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::GlobalAtomicOp;
|
||||
using Tegra::Shader::GlobalAtomicType;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOM: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
|
||||
static_cast<int>(instr.atom.operation.Value()));
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
|
||||
static_cast<int>(instr.atom.type.Value()));
|
||||
|
||||
const auto [real_address, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true, true);
|
||||
if (!real_address || !base_address) {
|
||||
// Tracking failed, skip atomic.
|
||||
break;
|
||||
}
|
||||
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOMS: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
|
||||
static_cast<int>(instr.atoms.operation.Value()));
|
||||
@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
Node memory = GetSharedMemory(std::move(address));
|
||||
Node data = GetRegister(instr.gpr20);
|
||||
|
||||
Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
|
||||
Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
|
@ -162,7 +162,7 @@ enum class OperationCode {
|
||||
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
||||
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
||||
|
||||
UAtomicAdd, /// (smem, uint) -> uint
|
||||
AtomicAdd, /// (memory, {u}int) -> {u}int
|
||||
|
||||
Branch, /// (uint branch_target) -> void
|
||||
BranchIndirect, /// (uint branch_target) -> void
|
||||
|
Loading…
x
Reference in New Issue
Block a user