yuzu/src/video_core/gpu.cpp

440 lines
15 KiB
C++
Raw Normal View History

// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <chrono>
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
2020-02-10 17:32:51 +03:00
#include "core/core_timing_util.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
2020-04-20 20:20:52 +03:00
#include "core/settings.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
2020-07-10 06:36:38 +03:00
#include "video_core/shader_notify.h"
#include "video_core/video_core.h"
namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
: system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
auto& rasterizer{renderer->Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
2020-07-10 06:36:38 +03:00
shader_notify = std::make_unique<VideoCore::ShaderNotify>();
}
GPU::~GPU() = default;
Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
const Engines::Maxwell3D& GPU::Maxwell3D() const {
return *maxwell_3d;
}
Engines::KeplerCompute& GPU::KeplerCompute() {
return *kepler_compute;
}
const Engines::KeplerCompute& GPU::KeplerCompute() const {
return *kepler_compute;
}
MemoryManager& GPU::MemoryManager() {
return *memory_manager;
}
const MemoryManager& GPU::MemoryManager() const {
return *memory_manager;
}
DmaPusher& GPU::DmaPusher() {
return *dma_pusher;
}
const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
// Synced GPU, is always in sync
if (!is_async) {
return;
}
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
syncpoints[syncpoint_id]++;
2019-06-18 23:58:29 +03:00
std::lock_guard lock{sync_mutex};
sync_cv.notify_all();
if (!syncpt_interrupts[syncpoint_id].empty()) {
u32 value = syncpoints[syncpoint_id].load();
auto it = syncpt_interrupts[syncpoint_id].begin();
while (it != syncpt_interrupts[syncpoint_id].end()) {
if (value >= *it) {
TriggerCpuInterrupt(syncpoint_id, *it);
it = syncpt_interrupts[syncpoint_id].erase(it);
continue;
}
it++;
}
}
}
u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
return syncpoints[syncpoint_id].load();
}
void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
auto& interrupt = syncpt_interrupts[syncpoint_id];
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
[value](u32 in_value) { return in_value == value; });
if (contains) {
return;
}
syncpt_interrupts[syncpoint_id].emplace_back(value);
}
bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
2019-06-18 23:58:29 +03:00
std::lock_guard lock{sync_mutex};
auto& interrupt = syncpt_interrupts[syncpoint_id];
const auto iter =
std::find_if(interrupt.begin(), interrupt.end(),
[value](u32 interrupt_value) { return value == interrupt_value; });
if (iter == interrupt.end()) {
return false;
2019-06-08 04:13:20 +03:00
}
interrupt.erase(iter);
return true;
2019-06-08 04:13:20 +03:00
}
2020-04-16 19:29:53 +03:00
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
std::unique_lock lck{flush_request_mutex};
const u64 fence = ++last_flush_fence;
flush_requests.emplace_back(fence, addr, size);
return fence;
}
void GPU::TickWork() {
std::unique_lock lck{flush_request_mutex};
while (!flush_requests.empty()) {
auto& request = flush_requests.front();
const u64 fence = request.fence;
2020-04-16 19:29:53 +03:00
const VAddr addr = request.addr;
const std::size_t size = request.size;
flush_requests.pop_front();
flush_request_mutex.unlock();
renderer->Rasterizer().FlushRegion(addr, size);
current_flush_fence.store(fence);
flush_request_mutex.lock();
}
}
2020-02-10 17:32:51 +03:00
u64 GPU::GetTicks() const {
2020-02-14 01:16:07 +03:00
// This values were reversed engineered by fincs from NVN
// The gpu clock is reported in units of 385/625 nanoseconds
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
configuration: implement per-game configurations (#4098) * Switch game settings to use a pointer In order to add full per-game settings, we need to be able to tell yuzu to switch to using either the global or game configuration. Using a pointer makes it easier to switch. * configuration: add new UI without changing existing funcitonality The new UI also adds General, System, Graphics, Advanced Graphics, and Audio tabs, but as yet they do nothing. This commit keeps yuzu to the same functionality as originally branched. * configuration: Rename files These weren't included in the last commit. Now they are. * configuration: setup global configuration checkbox Global config checkbox now enables/disables the appropriate tabs in the game properties dialog. The use global configuration setting is now saved to the config, defaulting to true. This also addresses some changes requested in the PR. * configuration: swap to per-game config memory for properties dialog Does not set memory going in-game. Swaps to game values when opening the properties dialog, then swaps back when closing it. Uses a `memcpy` to swap. Also implements saving config files, limited to certain groups of configurations so as to not risk setting unsafe configurations. * configuration: change config interfaces to use config-specific pointers When a game is booted, we need to be able to open the configuration dialogs without changing the settings pointer in the game's emualtion. A new pointer specific to just the configuration dialogs can be used to separate changes to just those config dialogs without affecting the emulation. * configuration: boot a game using per-game settings Swaps values where needed to boot a game. * configuration: user correct config during emulation Creates a new pointer specifically for modifying the configuration while emulation is in progress. Both the regular configuration dialog and the game properties dialog now use the pointer Settings::config_values to focus edits to the correct struct. * settings: split Settings::values into two different structs By splitting the settings into two mutually exclusive structs, it becomes easier, as a developer, to determine how to use the Settings structs after per-game configurations is merged. Other benefits include only duplicating the required settings in memory. * settings: move use_docked_mode to Controls group `use_docked_mode` is set in the input settings and cannot be accessed from the system settings. Grouping it with system settings causes it to be saved with per-game settings, which may make transferring configs more difficult later on, especially since docked mode cannot be set from within the game properties dialog. * configuration: Fix the other yuzu executables and a regression In main.cpp, we have to get the title ID before the ROM is loaded, else the renderer will reflect only the global settings and now the user's game specific settings. * settings: use a template to duplicate memory for each setting Replaces the type of each variable in the Settings::Values struct with a new class that allows basic data reading and writing. The new struct Settings::Setting duplicates the data in memory and can manage global overrides per each setting. * configuration: correct add-ons config and swap settings when apropriate Any add-ons interaction happens directly through the global values struct. Swapping bewteen structs now also includes copying the necessary global configs that cannot be changed nor saved in per-game settings. General and System config menus now update based on whether it is viewing the global or per-game settings. * settings: restore old values struct No longer needed with the Settings::Setting class template. * configuration: implement hierarchical game properties dialog This sets the apropriate global or local data in each setting. * clang format * clang format take 2 can the docker container save this? * address comments and style issues * config: read and write settings with global awareness Adds new functions to read and write settings while keeping the global state in focus. Files now generated per-game are much smaller since often they only need address the global state. * settings: restore global state when necessary Upon closing a game or the game properties dialog, we need to restore all global settings to the original global state so that we can properly open the configuration dialog or boot a different game. * configuration: guard setting values incorrectly This disables setting values while a game is running if the setting is overwritten by a per game setting. * config: don't write local settings in the global config Simple guards to prevent writing the wrong settings in the wrong files. * configuration: add comments, assume less, and clang format No longer assumes that a disabled UI element means the global state is turned off, instead opting to directly answer that question. Still however assumes a game is running if it is in that state. * configuration: fix a logic error Should not be negated * restore settings' global state regardless of accept/cancel Fixes loading a properties dialog and causing the global config dialog to show local settings. * fix more logic errors Fixed the frame limit would set the global setting from the game properties dialog. Also strengthened the Settings::Setting member variables and simplified the logic in config reading (ReadSettingGlobal). * fix another logic error In my efforts to guard RestoreGlobalState, I accidentally negated the IsPowered condition. * configure_audio: set toggle_stretched_audio to tristate * fixed custom rtc and rng seed overwriting the global value * clang format * rebased * clang format take 4 * address my own review Basically revert unintended changes * settings: literal instead of casting "No need to cast, use 1U instead" Thanks, Morph! Co-authored-by: Morph <39850852+Morph1984@users.noreply.github.com> * Revert "settings: literal instead of casting " This reverts commit 95e992a87c898f3e882ffdb415bb0ef9f80f613f. * main: fix status buttons reporting wrong settings after stop emulation * settings: Log UseDockedMode in the Controls group This should have happened when use_docked_mode was moved over to the controls group internally. This just reflects this in the log. * main: load settings if the file has a title id In other words, don't exit if the loader has trouble getting a title id. * use a zero * settings: initalize resolution factor with constructor instead of casting * Revert "settings: initalize resolution factor with constructor instead of casting" This reverts commit 54c35ecb46a29953842614620f9b7de1aa9d5dc8. * configure_graphics: guard device selector when Vulkan is global Prevents the user from editing the device selector if Vulkan is the global renderer backend. Also resets the vulkan_device variable when the users switches back-and-forth between global and Vulkan. * address reviewer concerns Changes function variables to const wherever they don't need to be changed. Sets Settings::Setting to final as it should not be inherited from. Sets ConfigurationShared::use_global_text to static. Co-Authored-By: VolcaEM <volcaem@users.noreply.github.com> * main: load per-game settings after LoadROM This prevents `Restart Emulation` from restoring the global settings *after* the per-game settings were applied. Thanks to BSoDGamingYT for finding this bug. * Revert "main: load per-game settings after LoadROM" This reverts commit 9d0d48c52d2dcf3bfb1806cc8fa7d5a271a8a804. * main: only restore global settings when necessary Loading the per-game settings cannot happen after the ROM is loaded, so we have to specify when to restore the global state. Again thanks to BSoD for finding the bug. * configuration_shared: address reviewer concerns except operator overrides Dropping operator override usage in next commit. Co-Authored-By: LC <lioncash@users.noreply.github.com> * settings: Drop operator overrides from Setting template Requires using GetValue and SetValue explicitly. Also reverts a change that broke title ID formatting in the game properties dialog. * complete rebase * configuration_shared: translate "Use global configuration" Uses ConfigurePerGame to do so, since its usage, at least as of now, corresponds with ConfigurationShared. * configure_per_game: address reviewer concern As far as I understand, it prevents the program from unnecessarily copying strings. Co-Authored-By: LC <lioncash@users.noreply.github.com> Co-authored-by: Morph <39850852+Morph1984@users.noreply.github.com> Co-authored-by: VolcaEM <volcaem@users.noreply.github.com> Co-authored-by: LC <lioncash@users.noreply.github.com>
2020-07-10 05:42:09 +03:00
if (Settings::values.use_fast_gpu_time.GetValue()) {
2020-04-20 20:20:52 +03:00
nanoseconds /= 256;
}
2020-02-14 01:16:07 +03:00
const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
2020-02-10 17:32:51 +03:00
}
void GPU::FlushCommands() {
renderer->Rasterizer().FlushCommands();
}
void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost();
}
2020-02-16 23:24:37 +03:00
void GPU::OnCommandListEnd() {
renderer->Rasterizer().ReleaseFences();
2020-02-16 23:24:37 +03:00
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
enum class BufferMethods {
BindObject = 0x0,
Nop = 0x2,
SemaphoreAddressHigh = 0x4,
SemaphoreAddressLow = 0x5,
SemaphoreSequence = 0x6,
SemaphoreTrigger = 0x7,
NotifyIntr = 0x8,
WrcacheFlush = 0x9,
Unk28 = 0xA,
UnkCacheFlush = 0xB,
RefCnt = 0x14,
SemaphoreAcquire = 0x1A,
SemaphoreRelease = 0x1B,
FenceValue = 0x1C,
FenceAction = 0x1D,
Unk78 = 0x1E,
Unk7c = 0x1F,
Yield = 0x20,
NonPullerMethods = 0x40,
};
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
AcquireGequal = 0x4,
AcquireMask = 0x8,
};
void GPU::CallMethod(const MethodCall& method_call) {
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
method_call.subchannel);
ASSERT(method_call.subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method_call.method)) {
CallEngineMethod(method_call);
} else {
CallPullerMethod(method_call);
}
}
2020-04-20 20:42:14 +03:00
void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending) {
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
ASSERT(subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method)) {
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
} else {
for (std::size_t i = 0; i < amount; i++) {
2020-04-20 20:42:14 +03:00
CallPullerMethod(
{method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)});
}
}
}
bool GPU::ExecuteMethodOnEngine(u32 method) {
const auto buffer_method = static_cast<BufferMethods>(method);
return buffer_method >= BufferMethods::NonPullerMethods;
}
void GPU::CallPullerMethod(const MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
const auto method = static_cast<BufferMethods>(method_call.method);
switch (method) {
case BufferMethods::BindObject: {
ProcessBindMethod(method_call);
break;
}
case BufferMethods::Nop:
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
case BufferMethods::RefCnt:
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
case BufferMethods::FenceAction:
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
break;
}
case BufferMethods::NotifyIntr: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
break;
}
case BufferMethods::Unk28: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
break;
}
case BufferMethods::SemaphoreAcquire: {
ProcessSemaphoreAcquire();
break;
}
case BufferMethods::SemaphoreRelease: {
ProcessSemaphoreRelease();
break;
}
case BufferMethods::Yield: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
break;
}
default:
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
static_cast<u32>(method));
break;
}
}
void GPU::CallEngineMethod(const MethodCall& method_call) {
const EngineID engine = bound_engines[method_call.subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
case EngineID::MAXWELL_B:
maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
case EngineID::KEPLER_COMPUTE_B:
2020-04-28 20:53:47 +03:00
kepler_compute->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::MAXWELL_DMA_COPY_A:
maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
2020-04-28 20:53:47 +03:00
kepler_memory->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
2020-04-20 20:42:14 +03:00
void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending) {
const EngineID engine = bound_engines[subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_B:
maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_COMPUTE_B:
kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_DMA_COPY_A:
maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
void GPU::ProcessBindMethod(const MethodCall& method_call) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
const auto engine_id = static_cast<EngineID>(method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
switch (engine_id) {
case EngineID::FERMI_TWOD_A:
dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
break;
case EngineID::MAXWELL_B:
dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
break;
case EngineID::KEPLER_COMPUTE_B:
dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
break;
case EngineID::MAXWELL_DMA_COPY_A:
dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id));
}
}
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
struct Block {
u32 sequence;
u32 zeros = 0;
u64 timestamp;
};
Block block{};
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
2020-02-10 17:32:51 +03:00
block.timestamp = GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
// Nothing to do in this case
} else {
regs.acquire_source = true;
regs.acquire_value = regs.semaphore_sequence;
if (op == GpuSemaphoreOperation::AcquireEqual) {
regs.acquire_active = true;
regs.acquire_mode = false;
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
regs.acquire_active = true;
regs.acquire_mode = true;
} else if (op == GpuSemaphoreOperation::AcquireMask) {
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
// semaphore_sequence, gives a non-0 result
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
} else {
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
}
}
}
}
void GPU::ProcessSemaphoreRelease() {
memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
}
void GPU::ProcessSemaphoreAcquire() {
const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
}
}
} // namespace Tegra