yuzu/src/video_core/dma_pusher.cpp

// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#include "common/cityhash.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"

namespace Tegra {

void CommandList::RefreshIntegrityChecks(GPU& gpu) {
    command_list_hashes.resize(command_lists.size());

    for (std::size_t index = 0; index < command_lists.size(); ++index) {
        const CommandListHeader command_list_header = command_lists[index];
        std::vector<CommandHeader> command_headers(command_list_header.size);
        gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
                                            command_list_header.size * sizeof(u32));
        command_list_hashes[index] =
            Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
                               command_list_header.size * sizeof(u32));
    }
}

DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}

DmaPusher::~DmaPusher() = default;

MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));

void DmaPusher::DispatchCalls() {
    MICROPROFILE_SCOPE(DispatchCalls);

    gpu.SyncGuestHost();
    // On entering GPU code, assume all memory may be touched by the ARM core.
    gpu.Maxwell3D().OnMemoryWrite();

    dma_pushbuffer_subindex = 0;

    dma_state.is_last_call = true;

    while (system.IsPoweredOn()) {
        if (!Step()) {
            break;
        }
    }
    gpu.FlushCommands();
    gpu.SyncGuestHost();
    gpu.OnCommandListEnd();
}

bool DmaPusher::Step() {
    if (!ib_enable || dma_pushbuffer.empty()) {
        // pushbuffer empty and IB empty or nonexistent - nothing to do
        return false;
    }

    CommandList& command_list{dma_pushbuffer.front()};

    ASSERT_OR_EXECUTE(
        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
            // Somehow the command_list is empty, in order to avoid a crash
            // We ignore it and assume its size is 0.
            dma_pushbuffer.pop();
            dma_pushbuffer_subindex = 0;
            return true;
        });

    if (command_list.prefetch_command_list.size()) {
        // Prefetched command list from nvdrv, used for things like synchronization
        command_headers = std::move(command_list.prefetch_command_list);
        dma_pushbuffer.pop();
    } else {
        const CommandListHeader command_list_header{
            command_list.command_lists[dma_pushbuffer_subindex]};
        const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
        const GPUVAddr dma_get = command_list_header.addr;

        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
            // We've gone through the current list, remove it from the queue
            dma_pushbuffer.pop();
            dma_pushbuffer_subindex = 0;
        }

        if (command_list_header.size == 0) {
            return true;
        }

        // Push buffer non-empty, read a word
        command_headers.resize(command_list_header.size);
        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
                                            command_list_header.size * sizeof(u32));

        // Integrity check
        const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
                                                command_list_header.size * sizeof(u32));
        if (new_hash != next_hash) {
            LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
            dma_pushbuffer.pop();
            return true;
        }
    }
    for (std::size_t index = 0; index < command_headers.size();) {
        const CommandHeader& command_header = command_headers[index];

        if (dma_state.method_count) {
            // Data word of methods command
            if (dma_state.non_incrementing) {
                const u32 max_write = static_cast<u32>(
                    std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) -
                    index);
                CallMultiMethod(&command_header.argument, max_write);
                dma_state.method_count -= max_write;
                dma_state.is_last_call = true;
                index += max_write;
                continue;
            } else {
                dma_state.is_last_call = dma_state.method_count <= 1;
                CallMethod(command_header.argument);
            }

            if (!dma_state.non_incrementing) {
                dma_state.method++;
            }

            if (dma_increment_once) {
                dma_state.non_incrementing = true;
            }

            dma_state.method_count--;
        } else {
            // No command active - this is the first word of a new one
            switch (command_header.mode) {
            case SubmissionMode::Increasing:
                SetState(command_header);
                dma_state.non_incrementing = false;
                dma_increment_once = false;
                break;
            case SubmissionMode::NonIncreasing:
                SetState(command_header);
                dma_state.non_incrementing = true;
                dma_increment_once = false;
                break;
            case SubmissionMode::Inline:
                dma_state.method = command_header.method;
                dma_state.subchannel = command_header.subchannel;
                CallMethod(command_header.arg_count);
                dma_state.non_incrementing = true;
                dma_increment_once = false;
                break;
            case SubmissionMode::IncreaseOnce:
                SetState(command_header);
                dma_state.non_incrementing = false;
                dma_increment_once = true;
                break;
            default:
                break;
            }
        }
        index++;
    }

    return true;
}

void DmaPusher::SetState(const CommandHeader& command_header) {
    dma_state.method = command_header.method;
    dma_state.subchannel = command_header.subchannel;
    dma_state.method_count = command_header.method_count;
}

void DmaPusher::CallMethod(u32 argument) const {
    if (dma_state.method < non_puller_methods) {
        gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
    } else {
        subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
                                                      dma_state.is_last_call);
    }
}

void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
    if (dma_state.method < non_puller_methods) {
        gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
                            dma_state.method_count);
    } else {
        subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
                                                           num_methods, dma_state.method_count);
    }
}

} // namespace Tegra
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`// Copyright 2018 yuzu Emulator Project`
			`// Licensed under GPLv2 or any later version`
			`// Refer to the license.txt file included.`

video_core: dma_pusher: Add support for integrity checks. - Log corrupted command lists, rather than crash. 2020-10-30 07:13:48 +03:00			`#include "common/cityhash.h"`
gpu: Move command list profiling to DmaPusher::DispatchCalls. 2018-11-28 02:42:21 +03:00			`#include "common/microprofile.h"`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`#include "core/core.h"`
			`#include "core/memory.h"`
			`#include "video_core/dma_pusher.h"`
			`#include "video_core/engines/maxwell_3d.h"`
			`#include "video_core/gpu.h"`
video_core/texures/texture: Remove unnecessary includes Nothing in this header relies on common_funcs or the memory manager. This gets rid of reliance on indirect inclusions in the OpenGL caches. 2019-04-06 06:59:54 +03:00			`#include "video_core/memory_manager.h"`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00
			`namespace Tegra {`

video_core: dma_pusher: Add support for integrity checks. - Log corrupted command lists, rather than crash. 2020-10-30 07:13:48 +03:00			`void CommandList::RefreshIntegrityChecks(GPU& gpu) {`
			`command_list_hashes.resize(command_lists.size());`

			`for (std::size_t index = 0; index < command_lists.size(); ++index) {`
			`const CommandListHeader command_list_header = command_lists[index];`
			`std::vector<CommandHeader> command_headers(command_list_header.size);`
			`gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),`
			`command_list_header.size * sizeof(u32));`
			`command_list_hashes[index] =`
			`Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),`
			`command_list_header.size * sizeof(u32));`
			`}`
			`}`

dma_pusher: Remove reliance on the global system instance With this, the video core is now has no calls to the global system instance at all. 2020-04-19 23:12:06 +03:00			`DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00
			`DmaPusher::~DmaPusher() = default;`

gpu: Move command list profiling to DmaPusher::DispatchCalls. 2018-11-28 02:42:21 +03:00			`MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));`

gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`void DmaPusher::DispatchCalls() {`
gpu: Move command list profiling to DmaPusher::DispatchCalls. 2018-11-28 02:42:21 +03:00			`MICROPROFILE_SCOPE(DispatchCalls);`

BufferCache: Implement OnCPUWrite and SyncGuestHost 2020-02-16 17:08:07 +03:00			`gpu.SyncGuestHost();`
video_core: Reintroduce dirty flags infrastructure 2019-12-27 04:14:10 +03:00			`// On entering GPU code, assume all memory may be touched by the ARM core.`
			`gpu.Maxwell3D().OnMemoryWrite();`

dma_pushbuffer: Optimize to avoid loop and copy on Push. 2018-11-28 03:17:33 +03:00			`dma_pushbuffer_subindex = 0;`

VideoCore/GPU: Delegate subchannel engines to the dma pusher. 2020-04-28 05:07:21 +03:00			`dma_state.is_last_call = true;`

dma_pusher: Remove reliance on the global system instance With this, the video core is now has no calls to the global system instance at all. 2020-04-19 23:12:06 +03:00			`while (system.IsPoweredOn()) {`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`if (!Step()) {`
			`break;`
			`}`
			`}`
GPU: Flush commands on every dma pusher step. This commit ensures that the host gpu is constantly fed with commands to work with, while the guest gpu keeps producing the rest of the commands. This reduces syncing time between host and guest gpu. 2019-07-26 21:20:43 +03:00			`gpu.FlushCommands();`
GPU: Refactor synchronization on Async GPU 2020-02-16 16:51:37 +03:00			`gpu.SyncGuestHost();`
GPU: Delay Fences. 2020-02-16 23:24:37 +03:00			`gpu.OnCommandListEnd();`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`}`

			`bool DmaPusher::Step() {`
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00			`if (!ib_enable \|\| dma_pushbuffer.empty()) {`
			`// pushbuffer empty and IB empty or nonexistent - nothing to do`
			`return false;`
			`}`

video_core: dma_pusher: Add support for prefetched command lists. 2020-10-30 07:13:04 +03:00			`CommandList& command_list{dma_pushbuffer.front()};`

			`ASSERT_OR_EXECUTE(`
			`command_list.command_lists.size() \|\| command_list.prefetch_command_list.size(), {`
			`// Somehow the command_list is empty, in order to avoid a crash`
			`// We ignore it and assume its size is 0.`
			`dma_pushbuffer.pop();`
			`dma_pushbuffer_subindex = 0;`
			`return true;`
			`});`

			`if (command_list.prefetch_command_list.size()) {`
			`// Prefetched command list from nvdrv, used for things like synchronization`
			`command_headers = std::move(command_list.prefetch_command_list);`
Dma_pusher: ASSERT on empty command_list This is a measure to avoid crashes on command list reading as an empty command_list is considered a NOP. 2019-05-19 03:51:54 +03:00			`dma_pushbuffer.pop();`
video_core: dma_pusher: Add support for prefetched command lists. 2020-10-30 07:13:04 +03:00			`} else {`
			`const CommandListHeader command_list_header{`
			`command_list.command_lists[dma_pushbuffer_subindex]};`
			`const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];`
			`const GPUVAddr dma_get = command_list_header.addr;`

			`if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {`
			`// We've gone through the current list, remove it from the queue`
			`dma_pushbuffer.pop();`
			`dma_pushbuffer_subindex = 0;`
			`}`
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00
video_core: dma_pusher: Add support for prefetched command lists. 2020-10-30 07:13:04 +03:00			`if (command_list_header.size == 0) {`
			`return true;`
			`}`
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00
video_core: dma_pusher: Add support for prefetched command lists. 2020-10-30 07:13:04 +03:00			`// Push buffer non-empty, read a word`
			`command_headers.resize(command_list_header.size);`
			`gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),`
			`command_list_header.size * sizeof(u32));`
video_core: dma_pusher: Add support for integrity checks. - Log corrupted command lists, rather than crash. 2020-10-30 07:13:48 +03:00
			`// Integrity check`
			`const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),`
			`command_list_header.size * sizeof(u32));`
			`if (new_hash != next_hash) {`
			`LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);`
			`dma_pushbuffer.pop();`
			`return true;`
			`}`
video_core: dma_pusher: Add support for prefetched command lists. 2020-10-30 07:13:04 +03:00			`}`
DMAPusher: Propagate multimethod writes into the engines. 2020-04-20 09:16:56 +03:00			`for (std::size_t index = 0; index < command_headers.size();) {`
			`const CommandHeader& command_header = command_headers[index];`

			`if (dma_state.method_count) {`
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00			`// Data word of methods command`
DMAPusher: Propagate multimethod writes into the engines. 2020-04-20 09:16:56 +03:00			`if (dma_state.non_incrementing) {`
			`const u32 max_write = static_cast<u32>(`
			`std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) -`
			`index);`
			`CallMultiMethod(&command_header.argument, max_write);`
			`dma_state.method_count -= max_write;`
VideoCore/GPU: Delegate subchannel engines to the dma pusher. 2020-04-28 05:07:21 +03:00			`dma_state.is_last_call = true;`
DMAPusher: Propagate multimethod writes into the engines. 2020-04-20 09:16:56 +03:00			`index += max_write;`
			`continue;`
			`} else {`
VideoCore/GPU: Delegate subchannel engines to the dma pusher. 2020-04-28 05:07:21 +03:00			`dma_state.is_last_call = dma_state.method_count <= 1;`
DMAPusher: Propagate multimethod writes into the engines. 2020-04-20 09:16:56 +03:00			`CallMethod(command_header.argument);`
			`}`
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00
			`if (!dma_state.non_incrementing) {`
			`dma_state.method++;`
video_core/dma_pusher: The full list of headers at once. Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible. This reduces the Memory::* calls by the dma_pusher by a factor of 10. 2019-02-19 11:44:33 +03:00			`}`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00			`if (dma_increment_once) {`
			`dma_state.non_incrementing = true;`
			`}`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00			`dma_state.method_count--;`
			`} else {`
			`// No command active - this is the first word of a new one`
			`switch (command_header.mode) {`
			`case SubmissionMode::Increasing:`
			`SetState(command_header);`
			`dma_state.non_incrementing = false;`
			`dma_increment_once = false;`
			`break;`
			`case SubmissionMode::NonIncreasing:`
			`SetState(command_header);`
			`dma_state.non_incrementing = true;`
			`dma_increment_once = false;`
			`break;`
			`case SubmissionMode::Inline:`
			`dma_state.method = command_header.method;`
			`dma_state.subchannel = command_header.subchannel;`
			`CallMethod(command_header.arg_count);`
			`dma_state.non_incrementing = true;`
			`dma_increment_once = false;`
			`break;`
			`case SubmissionMode::IncreaseOnce:`
			`SetState(command_header);`
			`dma_state.non_incrementing = false;`
			`dma_increment_once = true;`
			`break;`
video_core: Silent -Wswitch warnings 2019-04-03 10:33:36 +03:00			`default:`
			`break;`
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00			`}`
dma_pushbuffer: Optimize to avoid loop and copy on Push. 2018-11-28 03:17:33 +03:00			`}`
DMAPusher: Propagate multimethod writes into the engines. 2020-04-20 09:16:56 +03:00			`index++;`
video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. 2019-02-19 12:26:58 +03:00			`}`

gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`return true;`
			`}`

			`void DmaPusher::SetState(const CommandHeader& command_header) {`
			`dma_state.method = command_header.method;`
			`dma_state.subchannel = command_header.subchannel;`
			`dma_state.method_count = command_header.method_count;`
			`}`

			`void DmaPusher::CallMethod(u32 argument) const {`
VideoCore/GPU: Delegate subchannel engines to the dma pusher. 2020-04-28 05:07:21 +03:00			`if (dma_state.method < non_puller_methods) {`
			`gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});`
			`} else {`
			`subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,`
			`dma_state.is_last_call);`
			`}`
gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`}`

DMAPusher: Propagate multimethod writes into the engines. 2020-04-20 09:16:56 +03:00			`void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {`
VideoCore/GPU: Delegate subchannel engines to the dma pusher. 2020-04-28 05:07:21 +03:00			`if (dma_state.method < non_puller_methods) {`
			`gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,`
			`dma_state.method_count);`
			`} else {`
			`subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,`
			`num_methods, dma_state.method_count);`
			`}`
DMAPusher: Propagate multimethod writes into the engines. 2020-04-20 09:16:56 +03:00			`}`

gpu: Rewrite GPU command list processing with DmaPusher class. - More accurate impl., fixes Undertale (among other games). 2018-11-24 07:20:56 +03:00			`} // namespace Tegra`