Merge pull request #4391 from lioncash/nrvo
video_core: Allow copy elision to take place where applicable
This commit is contained in:
commit
f650cf8a9a
@ -1704,7 +1704,7 @@ std::string ARBDecompiler::HCastFloat(Operation operation) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string ARBDecompiler::HUnpack(Operation operation) {
|
std::string ARBDecompiler::HUnpack(Operation operation) {
|
||||||
const std::string operand = Visit(operation[0]);
|
std::string operand = Visit(operation[0]);
|
||||||
switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
|
switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
|
||||||
case Tegra::Shader::HalfType::H0_H1:
|
case Tegra::Shader::HalfType::H0_H1:
|
||||||
return operand;
|
return operand;
|
||||||
@ -2054,7 +2054,7 @@ std::string ARBDecompiler::InvocationId(Operation) {
|
|||||||
|
|
||||||
std::string ARBDecompiler::YNegate(Operation) {
|
std::string ARBDecompiler::YNegate(Operation) {
|
||||||
LOG_WARNING(Render_OpenGL, "(STUBBED)");
|
LOG_WARNING(Render_OpenGL, "(STUBBED)");
|
||||||
const std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
AddLine("MOV.F {}, 1;", temporary);
|
AddLine("MOV.F {}, 1;", temporary);
|
||||||
return temporary;
|
return temporary;
|
||||||
}
|
}
|
||||||
|
@ -126,7 +126,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
|||||||
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
|
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
|
||||||
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
|
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
|
||||||
entry.graphics_info, entry.compute_info};
|
entry.graphics_info, entry.compute_info};
|
||||||
const auto registry = std::make_shared<Registry>(entry.type, info);
|
auto registry = std::make_shared<Registry>(entry.type, info);
|
||||||
for (const auto& [address, value] : entry.keys) {
|
for (const auto& [address, value] : entry.keys) {
|
||||||
const auto [buffer, offset] = address;
|
const auto [buffer, offset] = address;
|
||||||
registry->InsertKey(buffer, offset, value);
|
registry->InsertKey(buffer, offset, value);
|
||||||
|
@ -1919,7 +1919,7 @@ private:
|
|||||||
Expression Comparison(Operation operation) {
|
Expression Comparison(Operation operation) {
|
||||||
static_assert(!unordered || type == Type::Float);
|
static_assert(!unordered || type == Type::Float);
|
||||||
|
|
||||||
const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
|
Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
|
||||||
|
|
||||||
if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
|
if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
|
||||||
// GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
|
// GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
|
||||||
|
@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
|
|||||||
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
|
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
|
||||||
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
|
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
|
||||||
|
|
||||||
const Node value = [&]() {
|
const Node value = [&] {
|
||||||
const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
|
Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
|
||||||
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
|
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
|
||||||
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
|
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
|
||||||
}
|
}
|
||||||
const Node shifted = [&]() {
|
const Node shifted = [&] {
|
||||||
switch (instr.iadd3.mode) {
|
switch (instr.iadd3.mode) {
|
||||||
case Tegra::Shader::IAdd3Mode::RightShift:
|
case Tegra::Shader::IAdd3Mode::RightShift:
|
||||||
// TODO(tech4me): According to
|
// TODO(tech4me): According to
|
||||||
|
@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
|
|||||||
return pc;
|
return pc;
|
||||||
}
|
}
|
||||||
|
|
||||||
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
|
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
|
||||||
Tegra::Shader::VideoType type, u64 byte_height) {
|
u64 byte_height) {
|
||||||
if (!is_chunk) {
|
if (!is_chunk) {
|
||||||
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
|
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
|
||||||
}
|
}
|
||||||
const Node zero = Immediate(0);
|
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case Tegra::Shader::VideoType::Size16_Low:
|
case VideoType::Size16_Low:
|
||||||
return BitfieldExtract(op, 0, 16);
|
return BitfieldExtract(op, 0, 16);
|
||||||
case Tegra::Shader::VideoType::Size16_High:
|
case VideoType::Size16_High:
|
||||||
return BitfieldExtract(op, 16, 16);
|
return BitfieldExtract(op, 16, 16);
|
||||||
case Tegra::Shader::VideoType::Size32:
|
case VideoType::Size32:
|
||||||
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
|
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
|
||||||
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
|
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
return zero;
|
return Immediate(0);
|
||||||
case Tegra::Shader::VideoType::Invalid:
|
case VideoType::Invalid:
|
||||||
UNREACHABLE_MSG("Invalid instruction encoding");
|
UNREACHABLE_MSG("Invalid instruction encoding");
|
||||||
return zero;
|
return Immediate(0);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return zero;
|
return Immediate(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
|
|||||||
SetTemporary(bb, 0, product);
|
SetTemporary(bb, 0, product);
|
||||||
product = GetTemporary(0);
|
product = GetTemporary(0);
|
||||||
|
|
||||||
const Node original_c = op_c;
|
Node original_c = op_c;
|
||||||
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
|
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
|
||||||
op_c = [&]() {
|
op_c = [&] {
|
||||||
switch (set_mode) {
|
switch (set_mode) {
|
||||||
case Tegra::Shader::XmadMode::None:
|
case Tegra::Shader::XmadMode::None:
|
||||||
return original_c;
|
return original_c;
|
||||||
case Tegra::Shader::XmadMode::CLo:
|
case Tegra::Shader::XmadMode::CLo:
|
||||||
return BitfieldExtract(original_c, 0, 16);
|
return BitfieldExtract(std::move(original_c), 0, 16);
|
||||||
case Tegra::Shader::XmadMode::CHi:
|
case Tegra::Shader::XmadMode::CHi:
|
||||||
return BitfieldExtract(original_c, 16, 16);
|
return BitfieldExtract(std::move(original_c), 16, 16);
|
||||||
case Tegra::Shader::XmadMode::CBcc: {
|
case Tegra::Shader::XmadMode::CBcc: {
|
||||||
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
|
Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
|
||||||
original_b, Immediate(16));
|
original_b, Immediate(16));
|
||||||
return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
|
return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
|
||||||
|
std::move(shifted_b));
|
||||||
}
|
}
|
||||||
case Tegra::Shader::XmadMode::CSfu: {
|
case Tegra::Shader::XmadMode::CSfu: {
|
||||||
const Node comp_a =
|
const Node comp_a =
|
||||||
|
@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
|
|||||||
}
|
}
|
||||||
|
|
||||||
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
|
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
|
||||||
const Node node = MakeNode<InternalFlagNode>(flag);
|
Node node = MakeNode<InternalFlagNode>(flag);
|
||||||
if (negated) {
|
if (negated) {
|
||||||
return Operation(OperationCode::LogicalNegate, node);
|
return Operation(OperationCode::LogicalNegate, std::move(node));
|
||||||
}
|
}
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user