Merge pull request #4869 from bunnei/improve-gpu-sync
Improvements to GPU synchronization & various refactoring
This commit is contained in:
commit
6bbbbe8f85
@ -454,6 +454,8 @@ add_library(core STATIC
|
|||||||
hle/service/nvdrv/nvdrv.h
|
hle/service/nvdrv/nvdrv.h
|
||||||
hle/service/nvdrv/nvmemp.cpp
|
hle/service/nvdrv/nvmemp.cpp
|
||||||
hle/service/nvdrv/nvmemp.h
|
hle/service/nvdrv/nvmemp.h
|
||||||
|
hle/service/nvdrv/syncpoint_manager.cpp
|
||||||
|
hle/service/nvdrv/syncpoint_manager.h
|
||||||
hle/service/nvflinger/buffer_queue.cpp
|
hle/service/nvflinger/buffer_queue.cpp
|
||||||
hle/service/nvflinger/buffer_queue.h
|
hle/service/nvflinger/buffer_queue.h
|
||||||
hle/service/nvflinger/nvflinger.cpp
|
hle/service/nvflinger/nvflinger.cpp
|
||||||
|
@ -179,16 +179,18 @@ struct System::Impl {
|
|||||||
arp_manager.ResetAll();
|
arp_manager.ResetAll();
|
||||||
|
|
||||||
telemetry_session = std::make_unique<Core::TelemetrySession>();
|
telemetry_session = std::make_unique<Core::TelemetrySession>();
|
||||||
|
|
||||||
|
gpu_core = VideoCore::CreateGPU(emu_window, system);
|
||||||
|
if (!gpu_core) {
|
||||||
|
return ResultStatus::ErrorVideoCore;
|
||||||
|
}
|
||||||
|
|
||||||
service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
|
service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
|
||||||
|
|
||||||
Service::Init(service_manager, system);
|
Service::Init(service_manager, system);
|
||||||
GDBStub::DeferStart();
|
GDBStub::DeferStart();
|
||||||
|
|
||||||
interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
|
interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
|
||||||
gpu_core = VideoCore::CreateGPU(emu_window, system);
|
|
||||||
if (!gpu_core) {
|
|
||||||
return ResultStatus::ErrorVideoCore;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize time manager, which must happen after kernel is created
|
// Initialize time manager, which must happen after kernel is created
|
||||||
time_manager.Initialize();
|
time_manager.Initialize();
|
||||||
|
@ -15,8 +15,9 @@
|
|||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
|
nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
|
||||||
: nvdevice(system), events_interface{events_interface} {}
|
SyncpointManager& syncpoint_manager)
|
||||||
|
: nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
|
||||||
nvhost_ctrl::~nvhost_ctrl() = default;
|
nvhost_ctrl::~nvhost_ctrl() = default;
|
||||||
|
|
||||||
u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
||||||
@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
|
|||||||
return NvResult::BadParameter;
|
return NvResult::BadParameter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
|
||||||
|
params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
|
||||||
|
std::memcpy(output.data(), ¶ms, sizeof(params));
|
||||||
|
return NvResult::Success;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
|
||||||
|
syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
|
||||||
|
params.value = new_value;
|
||||||
|
std::memcpy(output.data(), ¶ms, sizeof(params));
|
||||||
|
return NvResult::Success;
|
||||||
|
}
|
||||||
|
|
||||||
auto event = events_interface.events[event_id];
|
auto event = events_interface.events[event_id];
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
|
|
||||||
// This is mostly to take into account unimplemented features. As synced
|
// This is mostly to take into account unimplemented features. As synced
|
||||||
// gpu is always synced.
|
// gpu is always synced.
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
event.writable->Signal();
|
event.event.writable->Signal();
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
auto lock = gpu.LockSync();
|
auto lock = gpu.LockSync();
|
||||||
const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
|
const u32 current_syncpoint_value = event.fence.value;
|
||||||
const s32 diff = current_syncpoint_value - params.threshold;
|
const s32 diff = current_syncpoint_value - params.threshold;
|
||||||
if (diff >= 0) {
|
if (diff >= 0) {
|
||||||
event.writable->Signal();
|
event.event.writable->Signal();
|
||||||
params.value = current_syncpoint_value;
|
params.value = current_syncpoint_value;
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(params));
|
std::memcpy(output.data(), ¶ms, sizeof(params));
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
|
|||||||
params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
|
params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
|
||||||
}
|
}
|
||||||
params.value |= event_id;
|
params.value |= event_id;
|
||||||
event.writable->Clear();
|
event.event.writable->Clear();
|
||||||
gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
|
gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
|
||||||
if (!is_async && ctrl.fresh_call) {
|
if (!is_async && ctrl.fresh_call) {
|
||||||
ctrl.must_delay = true;
|
ctrl.must_delay = true;
|
||||||
@ -157,15 +172,19 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
|
|||||||
u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
|
u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||||
IocCtrlEventSignalParams params{};
|
IocCtrlEventSignalParams params{};
|
||||||
std::memcpy(¶ms, input.data(), sizeof(params));
|
std::memcpy(¶ms, input.data(), sizeof(params));
|
||||||
|
|
||||||
u32 event_id = params.event_id & 0x00FF;
|
u32 event_id = params.event_id & 0x00FF;
|
||||||
LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
|
LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
|
||||||
|
|
||||||
if (event_id >= MaxNvEvents) {
|
if (event_id >= MaxNvEvents) {
|
||||||
return NvResult::BadParameter;
|
return NvResult::BadParameter;
|
||||||
}
|
}
|
||||||
if (events_interface.status[event_id] == EventState::Waiting) {
|
if (events_interface.status[event_id] == EventState::Waiting) {
|
||||||
events_interface.LiberateEvent(event_id);
|
events_interface.LiberateEvent(event_id);
|
||||||
events_interface.events[event_id].writable->Signal();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
|
||||||
|
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices {
|
|||||||
|
|
||||||
class nvhost_ctrl final : public nvdevice {
|
class nvhost_ctrl final : public nvdevice {
|
||||||
public:
|
public:
|
||||||
explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
|
explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface,
|
||||||
|
SyncpointManager& syncpoint_manager);
|
||||||
~nvhost_ctrl() override;
|
~nvhost_ctrl() override;
|
||||||
|
|
||||||
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
||||||
@ -145,6 +146,7 @@ private:
|
|||||||
u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
|
|
||||||
EventInterface& events_interface;
|
EventInterface& events_interface;
|
||||||
|
SyncpointManager& syncpoint_manager;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Service::Nvidia::Devices
|
} // namespace Service::Nvidia::Devices
|
||||||
|
@ -7,14 +7,20 @@
|
|||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
||||||
|
#include "core/hle/service/nvdrv/syncpoint_manager.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
|
nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
|
SyncpointManager& syncpoint_manager)
|
||||||
|
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {
|
||||||
|
channel_fence.id = syncpoint_manager.AllocateSyncpoint();
|
||||||
|
channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
|
||||||
|
}
|
||||||
|
|
||||||
nvhost_gpu::~nvhost_gpu() = default;
|
nvhost_gpu::~nvhost_gpu() = default;
|
||||||
|
|
||||||
u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
||||||
@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
|
|||||||
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
|
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
|
||||||
params.unk3);
|
params.unk3);
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
|
||||||
params.fence_out.id = assigned_syncpoints;
|
|
||||||
params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
|
params.fence_out = channel_fence;
|
||||||
assigned_syncpoints++;
|
|
||||||
std::memcpy(output.data(), ¶ms, output.size());
|
std::memcpy(output.data(), ¶ms, output.size());
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -145,37 +151,98 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
|
||||||
|
return {
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
{fence.value},
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
|
||||||
|
std::vector<Tegra::CommandHeader> result{
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
{}};
|
||||||
|
|
||||||
|
for (u32 count = 0; count < add_increment; ++count) {
|
||||||
|
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing));
|
||||||
|
result.emplace_back(
|
||||||
|
Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
|
||||||
|
u32 add_increment) {
|
||||||
|
std::vector<Tegra::CommandHeader> result{
|
||||||
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
|
||||||
|
Tegra::SubmissionMode::Increasing),
|
||||||
|
{}};
|
||||||
|
const std::vector<Tegra::CommandHeader> increment{
|
||||||
|
BuildIncrementCommandList(fence, add_increment)};
|
||||||
|
|
||||||
|
result.insert(result.end(), increment.begin(), increment.end());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
|
||||||
|
Tegra::CommandList&& entries) {
|
||||||
|
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
|
||||||
|
params.num_entries, params.flags.raw);
|
||||||
|
|
||||||
|
auto& gpu = system.GPU();
|
||||||
|
|
||||||
|
params.fence_out.id = channel_fence.id;
|
||||||
|
|
||||||
|
if (params.flags.add_wait.Value() &&
|
||||||
|
!syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
|
||||||
|
gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
|
||||||
|
const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
|
||||||
|
params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
|
||||||
|
params.fence_out.id, params.AddIncrementValue() + increment_value);
|
||||||
|
} else {
|
||||||
|
params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
entries.RefreshIntegrityChecks(gpu);
|
||||||
|
gpu.PushGPUEntries(std::move(entries));
|
||||||
|
|
||||||
|
if (params.flags.add_increment.Value()) {
|
||||||
|
if (params.flags.suppress_wfi) {
|
||||||
|
gpu.PushGPUEntries(Tegra::CommandList{
|
||||||
|
BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
|
||||||
|
} else {
|
||||||
|
gpu.PushGPUEntries(Tegra::CommandList{
|
||||||
|
BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
|
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||||
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
|
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
IoctlSubmitGpfifo params{};
|
IoctlSubmitGpfifo params{};
|
||||||
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
||||||
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
|
|
||||||
params.num_entries, params.flags.raw);
|
|
||||||
|
|
||||||
ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
|
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader),
|
|
||||||
"Incorrect input size");
|
|
||||||
|
|
||||||
Tegra::CommandList entries(params.num_entries);
|
Tegra::CommandList entries(params.num_entries);
|
||||||
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
|
|
||||||
UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
|
return SubmitGPFIFOImpl(params, output, std::move(entries));
|
||||||
UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
|
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
|
||||||
u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
|
|
||||||
if (params.flags.increment.Value()) {
|
|
||||||
params.fence_out.value += current_syncpoint_value;
|
|
||||||
} else {
|
|
||||||
params.fence_out.value = current_syncpoint_value;
|
|
||||||
}
|
|
||||||
gpu.PushGPUEntries(std::move(entries));
|
|
||||||
|
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo));
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
||||||
@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
|||||||
}
|
}
|
||||||
IoctlSubmitGpfifo params{};
|
IoctlSubmitGpfifo params{};
|
||||||
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo));
|
||||||
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
|
|
||||||
params.num_entries, params.flags.raw);
|
|
||||||
|
|
||||||
Tegra::CommandList entries(params.num_entries);
|
Tegra::CommandList entries(params.num_entries);
|
||||||
if (version == IoctlVersion::Version2) {
|
if (version == IoctlVersion::Version2) {
|
||||||
std::memcpy(entries.data(), input2.data(),
|
std::memcpy(entries.command_lists.data(), input2.data(),
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
} else {
|
} else {
|
||||||
system.Memory().ReadBlock(params.address, entries.data(),
|
system.Memory().ReadBlock(params.address, entries.command_lists.data(),
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
|
|
||||||
UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
|
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
return SubmitGPFIFOImpl(params, output, std::move(entries));
|
||||||
u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
|
|
||||||
if (params.flags.increment.Value()) {
|
|
||||||
params.fence_out.value += current_syncpoint_value;
|
|
||||||
} else {
|
|
||||||
params.fence_out.value = current_syncpoint_value;
|
|
||||||
}
|
|
||||||
gpu.PushGPUEntries(std::move(entries));
|
|
||||||
|
|
||||||
std::memcpy(output.data(), ¶ms, output.size());
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
|
u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||||
|
@ -11,6 +11,11 @@
|
|||||||
#include "common/swap.h"
|
#include "common/swap.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
||||||
#include "core/hle/service/nvdrv/nvdata.h"
|
#include "core/hle/service/nvdrv/nvdata.h"
|
||||||
|
#include "video_core/dma_pusher.h"
|
||||||
|
|
||||||
|
namespace Service::Nvidia {
|
||||||
|
class SyncpointManager;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
|
|||||||
|
|
||||||
class nvhost_gpu final : public nvdevice {
|
class nvhost_gpu final : public nvdevice {
|
||||||
public:
|
public:
|
||||||
explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
|
explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
|
SyncpointManager& syncpoint_manager);
|
||||||
~nvhost_gpu() override;
|
~nvhost_gpu() override;
|
||||||
|
|
||||||
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
|
||||||
@ -162,10 +168,15 @@ private:
|
|||||||
u32_le raw;
|
u32_le raw;
|
||||||
BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
|
BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
|
||||||
BitField<1, 1, u32_le> add_increment; // append an increment to the list
|
BitField<1, 1, u32_le> add_increment; // append an increment to the list
|
||||||
BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
|
BitField<2, 1, u32_le> new_hw_format; // mostly ignored
|
||||||
|
BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt
|
||||||
BitField<8, 1, u32_le> increment; // increment the returned fence
|
BitField<8, 1, u32_le> increment; // increment the returned fence
|
||||||
} flags;
|
} flags;
|
||||||
Fence fence_out; // returned new fence object for others to wait on
|
Fence fence_out; // returned new fence object for others to wait on
|
||||||
|
|
||||||
|
u32 AddIncrementValue() const {
|
||||||
|
return flags.add_increment.Value() << 1;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
|
static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
|
||||||
"IoctlSubmitGpfifo is incorrect size");
|
"IoctlSubmitGpfifo is incorrect size");
|
||||||
@ -190,6 +201,8 @@ private:
|
|||||||
u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
|
u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
|
||||||
|
Tegra::CommandList&& entries);
|
||||||
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
|
||||||
const std::vector<u8>& input2, IoctlVersion version);
|
const std::vector<u8>& input2, IoctlVersion version);
|
||||||
@ -198,7 +211,8 @@ private:
|
|||||||
u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
|
u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
|
||||||
|
|
||||||
std::shared_ptr<nvmap> nvmap_dev;
|
std::shared_ptr<nvmap> nvmap_dev;
|
||||||
u32 assigned_syncpoints{};
|
SyncpointManager& syncpoint_manager;
|
||||||
|
Fence channel_fence;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Service::Nvidia::Devices
|
} // namespace Service::Nvidia::Devices
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "core/hle/service/nvdrv/interface.h"
|
#include "core/hle/service/nvdrv/interface.h"
|
||||||
#include "core/hle/service/nvdrv/nvdrv.h"
|
#include "core/hle/service/nvdrv/nvdrv.h"
|
||||||
#include "core/hle/service/nvdrv/nvmemp.h"
|
#include "core/hle/service/nvdrv/nvmemp.h"
|
||||||
|
#include "core/hle/service/nvdrv/syncpoint_manager.h"
|
||||||
#include "core/hle/service/nvflinger/nvflinger.h"
|
#include "core/hle/service/nvflinger/nvflinger.h"
|
||||||
|
|
||||||
namespace Service::Nvidia {
|
namespace Service::Nvidia {
|
||||||
@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
|
|||||||
nvflinger.SetNVDrvInstance(module_);
|
nvflinger.SetNVDrvInstance(module_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Module::Module(Core::System& system) {
|
Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
|
||||||
auto& kernel = system.Kernel();
|
auto& kernel = system.Kernel();
|
||||||
for (u32 i = 0; i < MaxNvEvents; i++) {
|
for (u32 i = 0; i < MaxNvEvents; i++) {
|
||||||
std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
|
std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
|
||||||
events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label);
|
events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)};
|
||||||
events_interface.status[i] = EventState::Free;
|
events_interface.status[i] = EventState::Free;
|
||||||
events_interface.registered[i] = false;
|
events_interface.registered[i] = false;
|
||||||
}
|
}
|
||||||
auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
|
auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
|
||||||
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
|
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
|
||||||
devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
|
devices["/dev/nvhost-gpu"] =
|
||||||
|
std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
|
||||||
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
|
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
|
||||||
devices["/dev/nvmap"] = nvmap_dev;
|
devices["/dev/nvmap"] = nvmap_dev;
|
||||||
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
|
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
|
||||||
devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
|
devices["/dev/nvhost-ctrl"] =
|
||||||
|
std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
|
||||||
devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
|
devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
|
||||||
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
|
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
|
||||||
devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
|
devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
|
||||||
@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
|
|||||||
if (events_interface.assigned_syncpt[i] == syncpoint_id &&
|
if (events_interface.assigned_syncpt[i] == syncpoint_id &&
|
||||||
events_interface.assigned_value[i] == value) {
|
events_interface.assigned_value[i] == value) {
|
||||||
events_interface.LiberateEvent(i);
|
events_interface.LiberateEvent(i);
|
||||||
events_interface.events[i].writable->Signal();
|
events_interface.events[i].event.writable->Signal();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
|
std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
|
||||||
return events_interface.events[event_id].readable;
|
return events_interface.events[event_id].event.readable;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
|
std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
|
||||||
return events_interface.events[event_id].writable;
|
return events_interface.events[event_id].event.writable;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Service::Nvidia
|
} // namespace Service::Nvidia
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "core/hle/kernel/writable_event.h"
|
#include "core/hle/kernel/writable_event.h"
|
||||||
#include "core/hle/service/nvdrv/nvdata.h"
|
#include "core/hle/service/nvdrv/nvdata.h"
|
||||||
|
#include "core/hle/service/nvdrv/syncpoint_manager.h"
|
||||||
#include "core/hle/service/service.h"
|
#include "core/hle/service/service.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
@ -22,15 +23,23 @@ class NVFlinger;
|
|||||||
|
|
||||||
namespace Service::Nvidia {
|
namespace Service::Nvidia {
|
||||||
|
|
||||||
|
class SyncpointManager;
|
||||||
|
|
||||||
namespace Devices {
|
namespace Devices {
|
||||||
class nvdevice;
|
class nvdevice;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents an Nvidia event
|
||||||
|
struct NvEvent {
|
||||||
|
Kernel::EventPair event;
|
||||||
|
Fence fence{};
|
||||||
|
};
|
||||||
|
|
||||||
struct EventInterface {
|
struct EventInterface {
|
||||||
// Mask representing currently busy events
|
// Mask representing currently busy events
|
||||||
u64 events_mask{};
|
u64 events_mask{};
|
||||||
// Each kernel event associated to an NV event
|
// Each kernel event associated to an NV event
|
||||||
std::array<Kernel::EventPair, MaxNvEvents> events;
|
std::array<NvEvent, MaxNvEvents> events;
|
||||||
// The status of the current NVEvent
|
// The status of the current NVEvent
|
||||||
std::array<EventState, MaxNvEvents> status{};
|
std::array<EventState, MaxNvEvents> status{};
|
||||||
// Tells if an NVEvent is registered or not
|
// Tells if an NVEvent is registered or not
|
||||||
@ -119,6 +128,9 @@ public:
|
|||||||
std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
|
std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// Manages syncpoints on the host
|
||||||
|
SyncpointManager syncpoint_manager;
|
||||||
|
|
||||||
/// Id to use for the next open file descriptor.
|
/// Id to use for the next open file descriptor.
|
||||||
u32 next_fd = 1;
|
u32 next_fd = 1;
|
||||||
|
|
||||||
|
39
src/core/hle/service/nvdrv/syncpoint_manager.cpp
Normal file
39
src/core/hle/service/nvdrv/syncpoint_manager.cpp
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
// Copyright 2020 yuzu emulator team
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "core/hle/service/nvdrv/syncpoint_manager.h"
|
||||||
|
#include "video_core/gpu.h"
|
||||||
|
|
||||||
|
namespace Service::Nvidia {
|
||||||
|
|
||||||
|
SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {}
|
||||||
|
|
||||||
|
SyncpointManager::~SyncpointManager() = default;
|
||||||
|
|
||||||
|
u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
|
||||||
|
syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
|
||||||
|
return GetSyncpointMin(syncpoint_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 SyncpointManager::AllocateSyncpoint() {
|
||||||
|
for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
|
||||||
|
if (!syncpoints[syncpoint_id].is_allocated) {
|
||||||
|
syncpoints[syncpoint_id].is_allocated = true;
|
||||||
|
return syncpoint_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UNREACHABLE_MSG("No more available syncpoints!");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
|
||||||
|
for (u32 index = 0; index < value; ++index) {
|
||||||
|
syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
return GetSyncpointMax(syncpoint_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Service::Nvidia
|
85
src/core/hle/service/nvdrv/syncpoint_manager.h
Normal file
85
src/core/hle/service/nvdrv/syncpoint_manager.h
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
// Copyright 2020 yuzu emulator team
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "core/hle/service/nvdrv/nvdata.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
class GPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Service::Nvidia {
|
||||||
|
|
||||||
|
class SyncpointManager final {
|
||||||
|
public:
|
||||||
|
explicit SyncpointManager(Tegra::GPU& gpu);
|
||||||
|
~SyncpointManager();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the specified syncpoint is expired for the given value.
|
||||||
|
* @param syncpoint_id Syncpoint ID to check.
|
||||||
|
* @param value Value to check against the specified syncpoint.
|
||||||
|
* @returns True if the specified syncpoint is expired for the given value, otherwise False.
|
||||||
|
*/
|
||||||
|
bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
|
||||||
|
return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the lower bound for the specified syncpoint.
|
||||||
|
* @param syncpoint_id Syncpoint ID to get the lower bound for.
|
||||||
|
* @returns The lower bound for the specified syncpoint.
|
||||||
|
*/
|
||||||
|
u32 GetSyncpointMin(u32 syncpoint_id) const {
|
||||||
|
return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the uper bound for the specified syncpoint.
|
||||||
|
* @param syncpoint_id Syncpoint ID to get the upper bound for.
|
||||||
|
* @returns The upper bound for the specified syncpoint.
|
||||||
|
*/
|
||||||
|
u32 GetSyncpointMax(u32 syncpoint_id) const {
|
||||||
|
return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Refreshes the minimum value for the specified syncpoint.
|
||||||
|
* @param syncpoint_id Syncpoint ID to be refreshed.
|
||||||
|
* @returns The new syncpoint minimum value.
|
||||||
|
*/
|
||||||
|
u32 RefreshSyncpoint(u32 syncpoint_id);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocates a new syncoint.
|
||||||
|
* @returns The syncpoint ID for the newly allocated syncpoint.
|
||||||
|
*/
|
||||||
|
u32 AllocateSyncpoint();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increases the maximum value for the specified syncpoint.
|
||||||
|
* @param syncpoint_id Syncpoint ID to be increased.
|
||||||
|
* @param value Value to increase the specified syncpoint by.
|
||||||
|
* @returns The new syncpoint maximum value.
|
||||||
|
*/
|
||||||
|
u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Syncpoint {
|
||||||
|
std::atomic<u32> min;
|
||||||
|
std::atomic<u32> max;
|
||||||
|
std::atomic<bool> is_allocated;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::array<Syncpoint, MaxSyncPoints> syncpoints{};
|
||||||
|
|
||||||
|
Tegra::GPU& gpu;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Service::Nvidia
|
@ -242,6 +242,10 @@ void NVFlinger::Compose() {
|
|||||||
|
|
||||||
const auto& igbp_buffer = buffer->get().igbp_buffer;
|
const auto& igbp_buffer = buffer->get().igbp_buffer;
|
||||||
|
|
||||||
|
if (!system.IsPoweredOn()) {
|
||||||
|
return; // We are likely shutting down
|
||||||
|
}
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
const auto& multi_fence = buffer->get().multi_fence;
|
const auto& multi_fence = buffer->get().multi_fence;
|
||||||
guard->unlock();
|
guard->unlock();
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/cityhash.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
@ -12,6 +13,20 @@
|
|||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
|
void CommandList::RefreshIntegrityChecks(GPU& gpu) {
|
||||||
|
command_list_hashes.resize(command_lists.size());
|
||||||
|
|
||||||
|
for (std::size_t index = 0; index < command_lists.size(); ++index) {
|
||||||
|
const CommandListHeader command_list_header = command_lists[index];
|
||||||
|
std::vector<CommandHeader> command_headers(command_list_header.size);
|
||||||
|
gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
|
||||||
|
command_list_header.size * sizeof(u32));
|
||||||
|
command_list_hashes[index] =
|
||||||
|
Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
|
||||||
|
command_list_header.size * sizeof(u32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
|
DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
|
||||||
|
|
||||||
DmaPusher::~DmaPusher() = default;
|
DmaPusher::~DmaPusher() = default;
|
||||||
@ -45,18 +60,28 @@ bool DmaPusher::Step() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const CommandList& command_list{dma_pushbuffer.front()};
|
CommandList& command_list{dma_pushbuffer.front()};
|
||||||
ASSERT_OR_EXECUTE(!command_list.empty(), {
|
|
||||||
|
ASSERT_OR_EXECUTE(
|
||||||
|
command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
|
||||||
// Somehow the command_list is empty, in order to avoid a crash
|
// Somehow the command_list is empty, in order to avoid a crash
|
||||||
// We ignore it and assume its size is 0.
|
// We ignore it and assume its size is 0.
|
||||||
dma_pushbuffer.pop();
|
dma_pushbuffer.pop();
|
||||||
dma_pushbuffer_subindex = 0;
|
dma_pushbuffer_subindex = 0;
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
|
|
||||||
|
if (command_list.prefetch_command_list.size()) {
|
||||||
|
// Prefetched command list from nvdrv, used for things like synchronization
|
||||||
|
command_headers = std::move(command_list.prefetch_command_list);
|
||||||
|
dma_pushbuffer.pop();
|
||||||
|
} else {
|
||||||
|
const CommandListHeader command_list_header{
|
||||||
|
command_list.command_lists[dma_pushbuffer_subindex]};
|
||||||
|
const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
|
||||||
const GPUVAddr dma_get = command_list_header.addr;
|
const GPUVAddr dma_get = command_list_header.addr;
|
||||||
|
|
||||||
if (dma_pushbuffer_subindex >= command_list.size()) {
|
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
|
||||||
// We've gone through the current list, remove it from the queue
|
// We've gone through the current list, remove it from the queue
|
||||||
dma_pushbuffer.pop();
|
dma_pushbuffer.pop();
|
||||||
dma_pushbuffer_subindex = 0;
|
dma_pushbuffer_subindex = 0;
|
||||||
@ -71,6 +96,15 @@ bool DmaPusher::Step() {
|
|||||||
gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
|
gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||||
command_list_header.size * sizeof(u32));
|
command_list_header.size * sizeof(u32));
|
||||||
|
|
||||||
|
// Integrity check
|
||||||
|
const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
|
||||||
|
command_list_header.size * sizeof(u32));
|
||||||
|
if (new_hash != next_hash) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
|
||||||
|
dma_pushbuffer.pop();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
for (std::size_t index = 0; index < command_headers.size();) {
|
for (std::size_t index = 0; index < command_headers.size();) {
|
||||||
const CommandHeader& command_header = command_headers[index];
|
const CommandHeader& command_header = command_headers[index];
|
||||||
|
|
||||||
|
@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
|
|||||||
IncreaseOnce = 5
|
IncreaseOnce = 5
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
||||||
|
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
||||||
|
// So the values you see in docs might be multiplied by 4.
|
||||||
|
enum class BufferMethods : u32 {
|
||||||
|
BindObject = 0x0,
|
||||||
|
Nop = 0x2,
|
||||||
|
SemaphoreAddressHigh = 0x4,
|
||||||
|
SemaphoreAddressLow = 0x5,
|
||||||
|
SemaphoreSequence = 0x6,
|
||||||
|
SemaphoreTrigger = 0x7,
|
||||||
|
NotifyIntr = 0x8,
|
||||||
|
WrcacheFlush = 0x9,
|
||||||
|
Unk28 = 0xA,
|
||||||
|
UnkCacheFlush = 0xB,
|
||||||
|
RefCnt = 0x14,
|
||||||
|
SemaphoreAcquire = 0x1A,
|
||||||
|
SemaphoreRelease = 0x1B,
|
||||||
|
FenceValue = 0x1C,
|
||||||
|
FenceAction = 0x1D,
|
||||||
|
WaitForInterrupt = 0x1E,
|
||||||
|
Unk7c = 0x1F,
|
||||||
|
Yield = 0x20,
|
||||||
|
NonPullerMethods = 0x40,
|
||||||
|
};
|
||||||
|
|
||||||
struct CommandListHeader {
|
struct CommandListHeader {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
@ -49,9 +74,29 @@ union CommandHeader {
|
|||||||
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
|
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
|
||||||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
|
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
|
||||||
|
|
||||||
|
static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
|
||||||
|
SubmissionMode mode) {
|
||||||
|
CommandHeader result{};
|
||||||
|
result.method.Assign(static_cast<u32>(method));
|
||||||
|
result.arg_count.Assign(arg_count);
|
||||||
|
result.mode.Assign(mode);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
class GPU;
|
class GPU;
|
||||||
|
|
||||||
using CommandList = std::vector<Tegra::CommandListHeader>;
|
struct CommandList final {
|
||||||
|
CommandList() = default;
|
||||||
|
explicit CommandList(std::size_t size) : command_lists(size) {}
|
||||||
|
explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
|
||||||
|
: prefetch_command_list{std::move(prefetch_command_list)} {}
|
||||||
|
|
||||||
|
void RefreshIntegrityChecks(GPU& gpu);
|
||||||
|
|
||||||
|
std::vector<Tegra::CommandListHeader> command_lists;
|
||||||
|
std::vector<u64> command_list_hashes;
|
||||||
|
std::vector<Tegra::CommandHeader> prefetch_command_list;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
|
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
|
||||||
@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
|
|||||||
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
|
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
|
||||||
* details on this implementation.
|
* details on this implementation.
|
||||||
*/
|
*/
|
||||||
class DmaPusher {
|
class DmaPusher final {
|
||||||
public:
|
public:
|
||||||
explicit DmaPusher(Core::System& system, GPU& gpu);
|
explicit DmaPusher(Core::System& system, GPU& gpu);
|
||||||
~DmaPusher();
|
~DmaPusher();
|
||||||
|
@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
|
|||||||
void GPU::OnCommandListEnd() {
|
void GPU::OnCommandListEnd() {
|
||||||
renderer->Rasterizer().ReleaseFences();
|
renderer->Rasterizer().ReleaseFences();
|
||||||
}
|
}
|
||||||
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
|
||||||
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
|
||||||
// So the values you see in docs might be multiplied by 4.
|
|
||||||
enum class BufferMethods {
|
|
||||||
BindObject = 0x0,
|
|
||||||
Nop = 0x2,
|
|
||||||
SemaphoreAddressHigh = 0x4,
|
|
||||||
SemaphoreAddressLow = 0x5,
|
|
||||||
SemaphoreSequence = 0x6,
|
|
||||||
SemaphoreTrigger = 0x7,
|
|
||||||
NotifyIntr = 0x8,
|
|
||||||
WrcacheFlush = 0x9,
|
|
||||||
Unk28 = 0xA,
|
|
||||||
UnkCacheFlush = 0xB,
|
|
||||||
RefCnt = 0x14,
|
|
||||||
SemaphoreAcquire = 0x1A,
|
|
||||||
SemaphoreRelease = 0x1B,
|
|
||||||
FenceValue = 0x1C,
|
|
||||||
FenceAction = 0x1D,
|
|
||||||
Unk78 = 0x1E,
|
|
||||||
Unk7c = 0x1F,
|
|
||||||
Yield = 0x20,
|
|
||||||
NonPullerMethods = 0x40,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class GpuSemaphoreOperation {
|
enum class GpuSemaphoreOperation {
|
||||||
AcquireEqual = 0x1,
|
AcquireEqual = 0x1,
|
||||||
@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
|
|||||||
case BufferMethods::UnkCacheFlush:
|
case BufferMethods::UnkCacheFlush:
|
||||||
case BufferMethods::WrcacheFlush:
|
case BufferMethods::WrcacheFlush:
|
||||||
case BufferMethods::FenceValue:
|
case BufferMethods::FenceValue:
|
||||||
|
break;
|
||||||
case BufferMethods::FenceAction:
|
case BufferMethods::FenceAction:
|
||||||
|
ProcessFenceActionMethod();
|
||||||
|
break;
|
||||||
|
case BufferMethods::WaitForInterrupt:
|
||||||
|
ProcessWaitForInterruptMethod();
|
||||||
break;
|
break;
|
||||||
case BufferMethods::SemaphoreTrigger: {
|
case BufferMethods::SemaphoreTrigger: {
|
||||||
ProcessSemaphoreTriggerMethod();
|
ProcessSemaphoreTriggerMethod();
|
||||||
@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU::ProcessFenceActionMethod() {
|
||||||
|
switch (regs.fence_action.op) {
|
||||||
|
case FenceOperation::Acquire:
|
||||||
|
WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||||
|
break;
|
||||||
|
case FenceOperation::Increment:
|
||||||
|
IncrementSyncPoint(regs.fence_action.syncpoint_id);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented operation {}",
|
||||||
|
static_cast<u32>(regs.fence_action.op.Value()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::ProcessWaitForInterruptMethod() {
|
||||||
|
// TODO(bunnei) ImplementMe
|
||||||
|
LOG_WARNING(HW_GPU, "(STUBBED) called");
|
||||||
|
}
|
||||||
|
|
||||||
void GPU::ProcessSemaphoreTriggerMethod() {
|
void GPU::ProcessSemaphoreTriggerMethod() {
|
||||||
const auto semaphoreOperationMask = 0xF;
|
const auto semaphoreOperationMask = 0xF;
|
||||||
const auto op =
|
const auto op =
|
||||||
|
@ -263,6 +263,24 @@ public:
|
|||||||
return use_nvdec;
|
return use_nvdec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum class FenceOperation : u32 {
|
||||||
|
Acquire = 0,
|
||||||
|
Increment = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
union FenceAction {
|
||||||
|
u32 raw;
|
||||||
|
BitField<0, 1, FenceOperation> op;
|
||||||
|
BitField<8, 24, u32> syncpoint_id;
|
||||||
|
|
||||||
|
static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
|
||||||
|
FenceAction result{};
|
||||||
|
result.op.Assign(op);
|
||||||
|
result.syncpoint_id.Assign(syncpoint_id);
|
||||||
|
return {result.raw};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct Regs {
|
struct Regs {
|
||||||
static constexpr size_t NUM_REGS = 0x40;
|
static constexpr size_t NUM_REGS = 0x40;
|
||||||
|
|
||||||
@ -291,10 +309,7 @@ public:
|
|||||||
u32 semaphore_acquire;
|
u32 semaphore_acquire;
|
||||||
u32 semaphore_release;
|
u32 semaphore_release;
|
||||||
u32 fence_value;
|
u32 fence_value;
|
||||||
union {
|
FenceAction fence_action;
|
||||||
BitField<4, 4, u32> operation;
|
|
||||||
BitField<8, 8, u32> id;
|
|
||||||
} fence_action;
|
|
||||||
INSERT_UNION_PADDING_WORDS(0xE2);
|
INSERT_UNION_PADDING_WORDS(0xE2);
|
||||||
|
|
||||||
// Puller state
|
// Puller state
|
||||||
@ -342,6 +357,8 @@ protected:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void ProcessBindMethod(const MethodCall& method_call);
|
void ProcessBindMethod(const MethodCall& method_call);
|
||||||
|
void ProcessFenceActionMethod();
|
||||||
|
void ProcessWaitForInterruptMethod();
|
||||||
void ProcessSemaphoreTriggerMethod();
|
void ProcessSemaphoreTriggerMethod();
|
||||||
void ProcessSemaphoreRelease();
|
void ProcessSemaphoreRelease();
|
||||||
void ProcessSemaphoreAcquire();
|
void ProcessSemaphoreAcquire();
|
||||||
|
Loading…
Reference in New Issue
Block a user