From f350c3d74ea7880fc6d21f7f638b0d4a70a3246b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 1 Jan 2022 22:03:37 +0100
Subject: [PATCH] Texture cache: Fix the remaining issues with memory mnagement
 and unmapping.

---
 .../service/nvdrv/devices/nvhost_as_gpu.cpp   |  3 +++
 src/video_core/control/channel_state_cache.h  | 10 +++++++
 src/video_core/gpu.cpp                        |  8 ++++++
 src/video_core/gpu.h                          |  2 ++
 src/video_core/memory_manager.cpp             | 11 +++++++-
 src/video_core/rasterizer_interface.h         |  2 +-
 .../renderer_opengl/gl_rasterizer.cpp         |  4 +--
 .../renderer_opengl/gl_rasterizer.h           |  2 +-
 .../renderer_vulkan/vk_rasterizer.cpp         |  4 +--
 .../renderer_vulkan/vk_rasterizer.h           |  2 +-
 src/video_core/texture_cache/texture_cache.h  | 27 ++++++++++++++-----
 .../texture_cache/texture_cache_base.h        |  4 +--
 12 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 344ddfc90..db2a6c3b2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -16,6 +16,7 @@
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "video_core/control/channel_state.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 
@@ -24,6 +25,7 @@ namespace Service::Nvidia::Devices {
 nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
     : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{},
       gmmu{} {}
+
 nvhost_as_gpu::~nvhost_as_gpu() = default;
 
 NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -132,6 +134,7 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
     vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
 
     gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS);
+    system.GPU().InitAddressSpace(*gmmu);
     vm.initialised = true;
 
     return NvResult::Success;
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index 9b1d7362b..31d80e8b7 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -3,6 +3,7 @@
 #include <deque>
 #include <limits>
 #include <mutex>
+#include <optional>
 #include <unordered_map>
 
 #include "common/common_types.h"
@@ -59,6 +60,15 @@ public:
         return ref->second.gpu_memory;
     }
 
+    std::optional<size_t> getStorageID(size_t id) const {
+        std::unique_lock<std::mutex> lk(config_mutex);
+        const auto ref = address_spaces.find(id);
+        if (ref == address_spaces.end()) {
+            return std::nullopt;
+        }
+        return ref->second.storage_id;
+    }
+
 protected:
     static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8c0ff0094..eebd7f3ff 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -73,6 +73,10 @@ struct GPU::Impl {
         rasterizer->InitializeChannel(to_init);
     }
 
+    void InitAddressSpace(Tegra::MemoryManager& memory_manager) {
+        memory_manager.BindRasterizer(rasterizer);
+    }
+
     void ReleaseChannel(Control::ChannelState& to_release) {
         UNIMPLEMENTED();
     }
@@ -452,6 +456,10 @@ void GPU::ReleaseChannel(Control::ChannelState& to_release) {
     impl->ReleaseChannel(to_release);
 }
 
+void GPU::InitAddressSpace(Tegra::MemoryManager& memory_manager) {
+    impl->InitAddressSpace(memory_manager);
+}
+
 void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
     impl->BindRenderer(std::move(renderer));
 }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 74d55e074..7e84b0d2f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -118,6 +118,8 @@ public:
 
     void ReleaseChannel(Control::ChannelState& to_release);
 
+    void InitAddressSpace(Tegra::MemoryManager& memory_manager);
+
     /// Request a host GPU memory flush from the CPU.
     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
 
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index d4c0dca78..b36067613 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -59,10 +59,19 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
     }
     for (u64 offset{}; offset < size; offset += page_size) {
         const GPUVAddr current_gpu_addr = gpu_addr + offset;
+        [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr);
         SetEntry(current_gpu_addr, entry_type);
+        if (current_entry_type != entry_type) {
+            rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
+        }
         if constexpr (entry_type == EntryType::Mapped) {
             const VAddr current_cpu_addr = cpu_addr + offset;
             const auto index = PageEntryIndex(current_gpu_addr);
+            const u32 sub_value = static_cast<u32>(current_cpu_addr >> 12ULL);
+            if (current_entry_type == entry_type && sub_value != page_table[index]) {
+                rasterizer->InvalidateRegion(static_cast<VAddr>(page_table[index]) << 12ULL,
+                                             page_size);
+            }
             page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL);
         }
         remaining_size -= page_size;
@@ -168,7 +177,7 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t s
     const size_t page_last{(addr + size + page_size - 1) >> page_bits};
     while (page_index < page_last) {
         const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
-        if (page_addr && *page_addr != 0) {
+        if (page_addr) {
             return page_addr;
         }
         ++page_index;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 8dacb2626..5362aafb6 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -95,7 +95,7 @@ public:
     virtual void UnmapMemory(VAddr addr, u64 size) = 0;
 
     /// Remap GPU memory range. This means underneath backing memory changed
-    virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
+    virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     /// and invalidated
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4eb9bd116..f745fbf56 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -379,10 +379,10 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
     shader_cache.OnCPUWrite(addr, size);
 }
 
-void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
     {
         std::scoped_lock lock{texture_cache.mutex};
-        texture_cache.UnmapGPUMemory(addr, size);
+        texture_cache.UnmapGPUMemory(as_id, addr, size);
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 39b20cfb2..d469075a1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -82,7 +82,7 @@ public:
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
     void UnmapMemory(VAddr addr, u64 size) override;
-    void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
+    void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
     void SignalSemaphore(GPUVAddr addr, u32 value) override;
     void SignalSyncPoint(u32 value) override;
     void SignalReference() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7e0805544..50fdf5e18 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -441,10 +441,10 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
     pipeline_cache.OnCPUWrite(addr, size);
 }
 
-void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
     {
         std::scoped_lock lock{texture_cache.mutex};
-        texture_cache.UnmapGPUMemory(addr, size);
+        texture_cache.UnmapGPUMemory(as_id, addr, size);
     }
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 642fe6576..c836158b8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -78,7 +78,7 @@ public:
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
     void UnmapMemory(VAddr addr, u64 size) override;
-    void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
+    void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
     void SignalSemaphore(GPUVAddr addr, u32 value) override;
     void SignalSyncPoint(u32 value) override;
     void SignalReference() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 5ef07d18f..a483892e4 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -480,12 +480,20 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
 }
 
 template <class P>
-void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
+void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
     std::vector<ImageId> deleted_images;
-    ForEachImageInRegionGPU(gpu_addr, size,
+    ForEachImageInRegionGPU(as_id, gpu_addr, size,
                             [&](ImageId id, Image&) { deleted_images.push_back(id); });
     for (const ImageId id : deleted_images) {
         Image& image = slot_images[id];
+        if (True(image.flags & ImageFlagBits::CpuModified)) {
+            return;
+        }
+        image.flags |= ImageFlagBits::CpuModified;
+        if (True(image.flags & ImageFlagBits::Tracked)) {
+            UntrackImage(image, id);
+        }
+        /*
         if (True(image.flags & ImageFlagBits::Remapped)) {
             continue;
         }
@@ -493,6 +501,7 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
         if (True(image.flags & ImageFlagBits::Tracked)) {
             UntrackImage(image, id);
         }
+        */
     }
 }
 
@@ -1322,13 +1331,19 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
 
 template <class P>
 template <typename Func>
-void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
+void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size,
+                                              Func&& func) {
     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
     boost::container::small_vector<ImageId, 8> images;
-    ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
-        const auto it = channel_state->gpu_page_table->find(page);
-        if (it == channel_state->gpu_page_table->end()) {
+    auto storage_id = getStorageID(as_id);
+    if (!storage_id) {
+        return;
+    }
+    auto& gpu_page_table = gpu_page_table_storage[*storage_id];
+    ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) {
+        const auto it = gpu_page_table.find(page);
+        if (it == gpu_page_table.end()) {
             if constexpr (BOOL_BREAK) {
                 return false;
             } else {
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index b24968b03..2f4db5047 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -173,7 +173,7 @@ public:
     void UnmapMemory(VAddr cpu_addr, size_t size);
 
     /// Remove images in a region
-    void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
+    void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
 
     /// Blit an image with the given parameters
     void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
@@ -309,7 +309,7 @@ private:
     void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
 
     template <typename Func>
-    void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
+    void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
 
     template <typename Func>
     void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);