From 2ec7fcecb7d1f0bc8f943a3f7cb4d2e215bc4e76 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Nov 2021 03:17:02 +0100
Subject: [PATCH 01/10] Vulkan: implement D24S8 <-> RGBA8 convertions.

---
 src/video_core/host_shaders/CMakeLists.txt    |  2 +
 .../host_shaders/convert_abgr8_to_d24s8.frag  | 17 ++++
 .../host_shaders/convert_d24s8_to_abgr8.frag  | 21 ++++
 src/video_core/renderer_vulkan/blit_image.cpp | 98 +++++++++++++++++++
 src/video_core/renderer_vulkan/blit_image.h   | 16 +++
 .../renderer_vulkan/vk_texture_cache.cpp      | 12 +++
 6 files changed, 166 insertions(+)
 create mode 100644 src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
 create mode 100644 src/video_core/host_shaders/convert_d24s8_to_abgr8.frag

diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index d779a967a..fd3e41434 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -10,6 +10,8 @@ set(SHADER_FILES
     astc_decoder.comp
     block_linear_unswizzle_2d.comp
     block_linear_unswizzle_3d.comp
+    convert_abgr8_to_d24s8.frag
+    convert_d24s8_to_abgr8.frag
     convert_depth_to_float.frag
     convert_float_to_depth.frag
     full_screen_triangle.vert
diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
new file mode 100644
index 000000000..f7657e50a
--- /dev/null
+++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
@@ -0,0 +1,17 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+// #extension GL_ARB_shader_stencil_export : require
+
+layout(binding = 0) uniform sampler2D color_texture;
+
+void main() {
+    ivec2 coord = ivec2(gl_FragCoord.xy);
+    uvec4 color = uvec4(texelFetch(color_texture, coord, 0).rgba * (exp2(8) - 1.0f));
+    uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b;
+
+    gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f);
+    // gl_FragStencilRefARB = int(color.a);
+}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
new file mode 100644
index 000000000..ff3bf8209
--- /dev/null
+++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D depth_tex;
+layout(binding = 1) uniform isampler2D stencil_tex;
+
+layout(location = 0) out vec4 color;
+
+void main() {
+    ivec2 coord = ivec2(gl_FragCoord.xy);
+    uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+    uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+
+    color.r = float(depth >> 16) / (exp2(8) - 1.0);
+    color.g = float((depth >> 8) & 0x00FF) / (exp2(8) - 1.0);
+    color.b = float(depth & 0x00FF) / (exp2(8) - 1.0);
+    color.a = float(stencil) / (exp2(8) - 1.0);
+}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b3884a4f5..01535d0c0 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -4,6 +4,8 @@
 
 #include <algorithm>
 
+#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
+#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
 #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
 #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
 #include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
@@ -354,6 +356,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
       blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
       convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
       convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
+      convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
+      convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
       linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
       nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
     if (device.IsExtShaderStencilExportSupported()) {
@@ -448,6 +452,23 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
     Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
 }
 
+void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
+                                          const ImageView& src_image_view, u32 up_scale,
+                                          u32 down_shift) {
+    ConvertPipelineEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
+                      convert_abgr8_to_d24s8_frag, true);
+    Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale,
+            down_shift);
+}
+
+void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
+                                          ImageView& src_image_view, u32 up_scale, u32 down_shift) {
+    ConvertPipelineEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
+                      convert_d24s8_to_abgr8_frag, false);
+    ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale,
+                        down_shift);
+}
+
 void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
                               const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
     const VkPipelineLayout layout = *one_texture_pipeline_layout;
@@ -495,6 +516,54 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
     scheduler.InvalidateState();
 }
 
+void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
+                                          ImageView& src_image_view, u32 up_scale, u32 down_shift) {
+    const VkPipelineLayout layout = *one_texture_pipeline_layout;
+    const VkImageView src_depth_view = src_image_view.DepthView();
+    const VkImageView src_stencil_view = src_image_view.StencilView();
+    const VkSampler sampler = *nearest_sampler;
+    const VkExtent2D extent{
+        .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
+        .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
+    };
+    scheduler.RequestRenderpass(dst_framebuffer);
+    scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale,
+                      down_shift, this](vk::CommandBuffer cmdbuf) {
+        const VkOffset2D offset{
+            .x = 0,
+            .y = 0,
+        };
+        const VkViewport viewport{
+            .x = 0.0f,
+            .y = 0.0f,
+            .width = static_cast<float>(extent.width),
+            .height = static_cast<float>(extent.height),
+            .minDepth = 0.0f,
+            .maxDepth = 0.0f,
+        };
+        const VkRect2D scissor{
+            .offset = offset,
+            .extent = extent,
+        };
+        const PushConstants push_constants{
+            .tex_scale = {viewport.width, viewport.height},
+            .tex_offset = {0.0f, 0.0f},
+        };
+        const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
+        UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
+                                       src_stencil_view);
+        // TODO: Barriers
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+                                  nullptr);
+        cmdbuf.SetViewport(0, viewport);
+        cmdbuf.SetScissor(0, scissor);
+        cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
+        cmdbuf.Draw(3, 1, 0, 0);
+    });
+    scheduler.InvalidateState();
+}
+
 VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
     const auto it = std::ranges::find(blit_color_keys, key);
     if (it != blit_color_keys.end()) {
@@ -636,4 +705,33 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend
     });
 }
 
+void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
+                                        vk::ShaderModule& module, bool single_texture) {
+    if (pipeline) {
+        return;
+    }
+    const std::array stages = MakeStages(*full_screen_vert, *module);
+    pipeline = device.GetLogical().CreateGraphicsPipeline({
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stageCount = static_cast<u32>(stages.size()),
+        .pStages = stages.data(),
+        .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+        .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+        .pTessellationState = nullptr,
+        .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+        .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+        .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
+        .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+        .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout,
+        .renderPass = renderpass,
+        .subpass = 0,
+        .basePipelineHandle = VK_NULL_HANDLE,
+        .basePipelineIndex = 0,
+    });
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index d77f76678..f754a7294 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -56,10 +56,19 @@ public:
     void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
                          u32 up_scale, u32 down_shift);
 
+    void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+                             u32 up_scale, u32 down_shift);
+
+    void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
+                             u32 up_scale, u32 down_shift);
+
 private:
     void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
                  const ImageView& src_image_view, u32 up_scale, u32 down_shift);
 
+    void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
+                             ImageView& src_image_view, u32 up_scale, u32 down_shift);
+
     [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
 
     [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
@@ -68,6 +77,9 @@ private:
 
     void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
 
+    void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
+                           vk::ShaderModule& module, bool single_texture);
+
     const Device& device;
     VKScheduler& scheduler;
     StateTracker& state_tracker;
@@ -83,6 +95,8 @@ private:
     vk::ShaderModule blit_depth_stencil_frag;
     vk::ShaderModule convert_depth_to_float_frag;
     vk::ShaderModule convert_float_to_depth_frag;
+    vk::ShaderModule convert_abgr8_to_d24s8_frag;
+    vk::ShaderModule convert_d24s8_to_abgr8_frag;
     vk::Sampler linear_sampler;
     vk::Sampler nearest_sampler;
 
@@ -94,6 +108,8 @@ private:
     vk::Pipeline convert_r32_to_d32_pipeline;
     vk::Pipeline convert_d16_to_r16_pipeline;
     vk::Pipeline convert_r16_to_d16_pipeline;
+    vk::Pipeline convert_abgr8_to_d24s8_pipeline;
+    vk::Pipeline convert_d24s8_to_abgr8_pipeline;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 407fd2a15..6dfd45f31 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -881,6 +881,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
             return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
         }
         break;
+    case PixelFormat::A8B8G8R8_UNORM:
+    case PixelFormat::B8G8R8A8_UNORM:
+        if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
+            return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift);
+        }
+        break;
     case PixelFormat::R32_FLOAT:
         if (src_view.format == PixelFormat::D32_FLOAT) {
             return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
@@ -891,6 +897,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
             return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
         }
         break;
+    case PixelFormat::S8_UINT_D24_UNORM:
+        if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
+            src_view.format == PixelFormat::B8G8R8A8_UNORM) {
+            return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift);
+        }
+        break;
     case PixelFormat::D32_FLOAT:
         if (src_view.format == PixelFormat::R32_FLOAT) {
             return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);

From b130f648d7c629411c487722f864c6bafcd2562c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Nov 2021 03:17:54 +0100
Subject: [PATCH 02/10] TextureCache: Fix regression caused by ART and improve
 blit detection algorithm to be smarter.

---
 src/video_core/texture_cache/texture_cache.h |  9 +++----
 src/video_core/texture_cache/util.cpp        | 28 +++++++++++++++++---
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 241f71a91..5ade3ce55 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -475,6 +475,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
     const BlitImages images = GetBlitImages(dst, src);
     const ImageId dst_id = images.dst_id;
     const ImageId src_id = images.src_id;
+
     PrepareImage(src_id, false, false);
     PrepareImage(dst_id, true, false);
 
@@ -1094,12 +1095,8 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
         if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
             continue;
         }
-        if (!dst_id) {
-            dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
-        }
-        if (!src_id) {
-            src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
-        }
+        src_id = FindOrInsertImage(src_info, src_addr);
+        dst_id = FindOrInsertImage(dst_info, dst_addr);
     } while (has_deleted_images);
     return BlitImages{
         .dst_id = dst_id,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index ddc9fb13a..8f9eb387c 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1151,17 +1151,37 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
 
 void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
                       const ImageBase* src) {
-    if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+    if (src) {
         src_info.format = src->info.format;
+        src_info.num_samples = src->info.num_samples;
+        src_info.size = src->info.size;
     }
-    if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+    if (dst) {
         dst_info.format = dst->info.format;
+        dst_info.num_samples = dst->info.num_samples;
+        dst_info.size = dst->info.size;
     }
     if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
-        dst_info.format = src->info.format;
+        if (dst) {
+            src_info.format = dst_info.format;
+        } else {
+            dst_info.format = src->info.format;
+        }
     }
     if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
-        src_info.format = dst->info.format;
+        if (src) {
+            if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) {
+                dst_info.format = src->info.format;
+            }
+        } else {
+            src_info.format = dst->info.format;
+        }
+    }
+    if (src_info.num_samples > 1) {
+        dst_info.format = src_info.format;
+    }
+    if (dst_info.num_samples > 1) {
+        src_info.format = dst_info.format;
     }
 }
 

From 0ff228405faae92a39167b9aec072e14744eae35 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Nov 2021 05:46:57 +0100
Subject: [PATCH 03/10] TextureCache: force same image format when resolving an
 image.

---
 src/video_core/texture_cache/texture_cache.h | 10 ++++++++--
 src/video_core/texture_cache/types.h         |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 5ade3ce55..06257f064 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -759,7 +759,8 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
             return ImageId{};
         }
     }
-    const bool broken_views = runtime.HasBrokenTextureViewFormats();
+    const bool broken_views =
+        runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews);
     const bool native_bgr = runtime.HasNativeBgr();
     ImageId image_id;
     const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
@@ -1096,7 +1097,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
             continue;
         }
         src_id = FindOrInsertImage(src_info, src_addr);
-        dst_id = FindOrInsertImage(dst_info, dst_addr);
+        RelaxedOptions dst_options{};
+        if (src_info.num_samples > 1) {
+            // it's a resolve, we must enforce the same format.
+            dst_options = RelaxedOptions::ForceBrokenViews;
+        }
+        dst_id = FindOrInsertImage(dst_info, dst_addr, dst_options);
     } while (has_deleted_images);
     return BlitImages{
         .dst_id = dst_id,
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 5c274abdf..5ac27b3a7 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 {
     Size = 1 << 0,
     Format = 1 << 1,
     Samples = 1 << 2,
+    ForceBrokenViews = 1 << 3,
 };
 DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
 

From b805c7bf058c6da04620cf75880509bdf6d5986c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Nov 2021 06:27:44 +0100
Subject: [PATCH 04/10] TextureCache: Implement additional D24S8 convertions.

---
 src/video_core/host_shaders/CMakeLists.txt    |  2 ++
 .../convert_d24s8_to_b10g11r11.frag           | 21 ++++++++++++++++++
 .../host_shaders/convert_d24s8_to_r16g16.frag | 21 ++++++++++++++++++
 src/video_core/renderer_vulkan/blit_image.cpp | 22 +++++++++++++++++++
 src/video_core/renderer_vulkan/blit_image.h   | 10 +++++++++
 .../renderer_vulkan/vk_texture_cache.cpp      | 10 +++++++++
 6 files changed, 86 insertions(+)
 create mode 100644 src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
 create mode 100644 src/video_core/host_shaders/convert_d24s8_to_r16g16.frag

diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index fd3e41434..87042195a 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -12,6 +12,8 @@ set(SHADER_FILES
     block_linear_unswizzle_3d.comp
     convert_abgr8_to_d24s8.frag
     convert_d24s8_to_abgr8.frag
+    convert_d24s8_to_b10g11r11.frag
+    convert_d24s8_to_r16g16.frag
     convert_depth_to_float.frag
     convert_float_to_depth.frag
     full_screen_triangle.vert
diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
new file mode 100644
index 000000000..c743d3a13
--- /dev/null
+++ b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D depth_tex;
+layout(binding = 1) uniform isampler2D stencil_tex;
+
+layout(location = 0) out vec4 color;
+
+void main() {
+    ivec2 coord = ivec2(gl_FragCoord.xy);
+    uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+    uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+
+    color.b = float(depth >> 22) / (exp2(10) - 1.0);
+    color.g = float((depth >> 11) & 0x00FF) / (exp2(11) - 1.0);
+    color.r = float(depth & 0x00FF) / (exp2(11) - 1.0);
+    color.a = 1.0f;
+}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
new file mode 100644
index 000000000..2a9443d3d
--- /dev/null
+++ b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D depth_tex;
+layout(binding = 1) uniform isampler2D stencil_tex;
+
+layout(location = 0) out vec4 color;
+
+void main() {
+    ivec2 coord = ivec2(gl_FragCoord.xy);
+    uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+    uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+
+    color.r = float(depth >> 16) / (exp2(16) - 1.0);
+    color.g = float((depth >> 16) & 0x00FF) / (exp2(16) - 1.0);
+    color.b = 0.0f;
+    color.a = 1.0f;
+}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 01535d0c0..12b28aadd 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -6,6 +6,8 @@
 
 #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
 #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
+#include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h"
+#include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h"
 #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
 #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
 #include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
@@ -358,6 +360,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
       convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
       convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
       convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
+      convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)),
+      convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)),
       linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
       nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
     if (device.IsExtShaderStencilExportSupported()) {
@@ -469,6 +473,24 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
                         down_shift);
 }
 
+void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer,
+                                              ImageView& src_image_view, u32 up_scale,
+                                              u32 down_shift) {
+    ConvertPipelineEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(),
+                      convert_d24s8_to_b10g11r11_frag, false);
+    ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view,
+                        up_scale, down_shift);
+}
+
+void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer,
+                                           ImageView& src_image_view, u32 up_scale,
+                                           u32 down_shift) {
+    ConvertPipelineEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(),
+                      convert_d24s8_to_r16g16_frag, false);
+    ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view,
+                        up_scale, down_shift);
+}
+
 void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
                               const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
     const VkPipelineLayout layout = *one_texture_pipeline_layout;
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index f754a7294..10d24c4b7 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -62,6 +62,12 @@ public:
     void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
                              u32 up_scale, u32 down_shift);
 
+    void ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
+                                 u32 up_scale, u32 down_shift);
+
+    void ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
+                              u32 up_scale, u32 down_shift);
+
 private:
     void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
                  const ImageView& src_image_view, u32 up_scale, u32 down_shift);
@@ -97,6 +103,8 @@ private:
     vk::ShaderModule convert_float_to_depth_frag;
     vk::ShaderModule convert_abgr8_to_d24s8_frag;
     vk::ShaderModule convert_d24s8_to_abgr8_frag;
+    vk::ShaderModule convert_d24s8_to_b10g11r11_frag;
+    vk::ShaderModule convert_d24s8_to_r16g16_frag;
     vk::Sampler linear_sampler;
     vk::Sampler nearest_sampler;
 
@@ -110,6 +118,8 @@ private:
     vk::Pipeline convert_r16_to_d16_pipeline;
     vk::Pipeline convert_abgr8_to_d24s8_pipeline;
     vk::Pipeline convert_d24s8_to_abgr8_pipeline;
+    vk::Pipeline convert_d24s8_to_b10g11r11_pipeline;
+    vk::Pipeline convert_d24s8_to_r16g16_pipeline;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 6dfd45f31..fd6064271 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -887,6 +887,16 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
             return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift);
         }
         break;
+    case PixelFormat::B10G11R11_FLOAT:
+        if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
+            return blit_image_helper.ConvertD24S8ToB10G11R11(dst, src_view, up_scale, down_shift);
+        }
+        break;
+    case PixelFormat::R16G16_UNORM:
+        if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
+            return blit_image_helper.ConvertD24S8ToR16G16(dst, src_view, up_scale, down_shift);
+        }
+        break;
     case PixelFormat::R32_FLOAT:
         if (src_view.format == PixelFormat::D32_FLOAT) {
             return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);

From 6f896d1fae3d244f83450a485d15e7cebe79abaa Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Nov 2021 22:23:48 +0100
Subject: [PATCH 05/10] TextureCache: Further fixes on resolve algorithm.

---
 src/video_core/texture_cache/texture_cache.h |  8 +++----
 src/video_core/texture_cache/util.cpp        | 25 ++++++++++----------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 06257f064..4188f93c5 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1096,13 +1096,13 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
         if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
             continue;
         }
-        src_id = FindOrInsertImage(src_info, src_addr);
-        RelaxedOptions dst_options{};
+        RelaxedOptions find_options{};
         if (src_info.num_samples > 1) {
             // it's a resolve, we must enforce the same format.
-            dst_options = RelaxedOptions::ForceBrokenViews;
+            find_options = RelaxedOptions::ForceBrokenViews;
         }
-        dst_id = FindOrInsertImage(dst_info, dst_addr, dst_options);
+        src_id = FindOrInsertImage(src_info, src_addr, find_options);
+        dst_id = FindOrInsertImage(dst_info, dst_addr, find_options);
     } while (has_deleted_images);
     return BlitImages{
         .dst_id = dst_id,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 8f9eb387c..e4d82631e 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1151,19 +1151,25 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
 
 void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
                       const ImageBase* src) {
+    bool is_resolve = false;
+    const auto original_src_format = src_info.format;
+    const auto original_dst_format = dst_info.format;
     if (src) {
-        src_info.format = src->info.format;
+        if (GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+            src_info.format = src->info.format;
+        }
+        is_resolve = src->info.num_samples > 1;
         src_info.num_samples = src->info.num_samples;
         src_info.size = src->info.size;
     }
-    if (dst) {
+    if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
         dst_info.format = dst->info.format;
-        dst_info.num_samples = dst->info.num_samples;
-        dst_info.size = dst->info.size;
     }
     if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
         if (dst) {
-            src_info.format = dst_info.format;
+            if (GetFormatType(dst->info.format) == SurfaceType::ColorTexture) {
+                src_info.format = original_src_format;
+            }
         } else {
             dst_info.format = src->info.format;
         }
@@ -1171,18 +1177,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase*
     if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
         if (src) {
             if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) {
-                dst_info.format = src->info.format;
+                dst_info.format = original_dst_format;
             }
         } else {
             src_info.format = dst->info.format;
         }
     }
-    if (src_info.num_samples > 1) {
-        dst_info.format = src_info.format;
-    }
-    if (dst_info.num_samples > 1) {
-        src_info.format = dst_info.format;
-    }
+    ASSERT(!is_resolve || dst_info.format == src_info.format);
 }
 
 u32 MapSizeBytes(const ImageBase& image) {

From 1d5e6a51d7f66cf089d541a009c84c373fd5c6ab Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 19 Nov 2021 23:22:44 +0100
Subject: [PATCH 06/10] TextureCache: Add B10G11R11 to D24S8 converter.

---
 src/video_core/host_shaders/CMakeLists.txt    |  1 +
 .../convert_b10g11r11_to_d24s8.frag           | 19 ++++++
 src/video_core/renderer_vulkan/blit_image.cpp | 62 +++++++++++++++----
 src/video_core/renderer_vulkan/blit_image.h   | 12 +++-
 .../renderer_vulkan/vk_texture_cache.cpp      |  3 +
 5 files changed, 84 insertions(+), 13 deletions(-)
 create mode 100644 src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag

diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 87042195a..a2e046f12 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -11,6 +11,7 @@ set(SHADER_FILES
     block_linear_unswizzle_2d.comp
     block_linear_unswizzle_3d.comp
     convert_abgr8_to_d24s8.frag
+    convert_b10g11r11_to_d24s8.frag
     convert_d24s8_to_abgr8.frag
     convert_d24s8_to_b10g11r11.frag
     convert_d24s8_to_r16g16.frag
diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
new file mode 100644
index 000000000..b7358c15c
--- /dev/null
+++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
@@ -0,0 +1,19 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+// #extension GL_ARB_shader_stencil_export : require
+
+layout(binding = 0) uniform sampler2D color_texture;
+
+void main() {
+    ivec2 coord = ivec2(gl_FragCoord.xy);
+    vec4 color = texelFetch(color_texture, coord, 0).rgba;
+    uint depth_stencil_unorm = (uint(color.b * (exp2(10) - 1.0f)) << 22)
+                      | (uint(color.g * (exp2(11) - 1.0f)) << 11)
+                      | (uint(color.r * (exp2(11) - 1.0f)));
+
+    gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
+    // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
+}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 12b28aadd..e70459de5 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 
 #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
+#include "video_core/host_shaders/convert_b10g11r11_to_d24s8_frag_spv.h"
 #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
 #include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h"
 #include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h"
@@ -359,6 +360,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
       convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
       convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
       convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
+      convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)),
       convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
       convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)),
       convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)),
@@ -459,16 +461,25 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
 void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
                                           const ImageView& src_image_view, u32 up_scale,
                                           u32 down_shift) {
-    ConvertPipelineEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
-                      convert_abgr8_to_d24s8_frag, true);
+    ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
+                                 convert_abgr8_to_d24s8_frag, true);
     Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale,
             down_shift);
 }
 
+void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer,
+                                              const ImageView& src_image_view, u32 up_scale,
+                                              u32 down_shift) {
+    ConvertPipelineDepthTargetEx(convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
+                                 convert_b10g11r11_to_d24s8_frag, true);
+    Convert(*convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale,
+            down_shift);
+}
+
 void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
                                           ImageView& src_image_view, u32 up_scale, u32 down_shift) {
-    ConvertPipelineEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
-                      convert_d24s8_to_abgr8_frag, false);
+    ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
+                                 convert_d24s8_to_abgr8_frag, false);
     ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale,
                         down_shift);
 }
@@ -476,8 +487,8 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
 void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer,
                                               ImageView& src_image_view, u32 up_scale,
                                               u32 down_shift) {
-    ConvertPipelineEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(),
-                      convert_d24s8_to_b10g11r11_frag, false);
+    ConvertPipelineColorTargetEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(),
+                                 convert_d24s8_to_b10g11r11_frag, false);
     ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view,
                         up_scale, down_shift);
 }
@@ -485,8 +496,8 @@ void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer
 void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer,
                                            ImageView& src_image_view, u32 up_scale,
                                            u32 down_shift) {
-    ConvertPipelineEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(),
-                      convert_d24s8_to_r16g16_frag, false);
+    ConvertPipelineColorTargetEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(),
+                                 convert_d24s8_to_r16g16_frag, false);
     ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view,
                         up_scale, down_shift);
 }
@@ -540,7 +551,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
 
 void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
                                           ImageView& src_image_view, u32 up_scale, u32 down_shift) {
-    const VkPipelineLayout layout = *one_texture_pipeline_layout;
+    const VkPipelineLayout layout = *two_textures_pipeline_layout;
     const VkImageView src_depth_view = src_image_view.DepthView();
     const VkImageView src_stencil_view = src_image_view.StencilView();
     const VkSampler sampler = *nearest_sampler;
@@ -727,8 +738,37 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend
     });
 }
 
-void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
-                                        vk::ShaderModule& module, bool single_texture) {
+void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
+                                                   vk::ShaderModule& module, bool single_texture) {
+    if (pipeline) {
+        return;
+    }
+    const std::array stages = MakeStages(*full_screen_vert, *module);
+    pipeline = device.GetLogical().CreateGraphicsPipeline({
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stageCount = static_cast<u32>(stages.size()),
+        .pStages = stages.data(),
+        .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+        .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+        .pTessellationState = nullptr,
+        .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+        .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+        .pDepthStencilState = nullptr,
+        .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
+        .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+        .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout,
+        .renderPass = renderpass,
+        .subpass = 0,
+        .basePipelineHandle = VK_NULL_HANDLE,
+        .basePipelineIndex = 0,
+    });
+}
+
+void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
+                                                   vk::ShaderModule& module, bool single_texture) {
     if (pipeline) {
         return;
     }
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 10d24c4b7..607964b5e 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -59,6 +59,9 @@ public:
     void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
                              u32 up_scale, u32 down_shift);
 
+    void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer,
+                                 const ImageView& src_image_view, u32 up_scale, u32 down_shift);
+
     void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
                              u32 up_scale, u32 down_shift);
 
@@ -83,8 +86,11 @@ private:
 
     void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
 
-    void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
-                           vk::ShaderModule& module, bool single_texture);
+    void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
+                                      vk::ShaderModule& module, bool single_texture);
+
+    void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
+                                      vk::ShaderModule& module, bool single_texture);
 
     const Device& device;
     VKScheduler& scheduler;
@@ -102,6 +108,7 @@ private:
     vk::ShaderModule convert_depth_to_float_frag;
     vk::ShaderModule convert_float_to_depth_frag;
     vk::ShaderModule convert_abgr8_to_d24s8_frag;
+    vk::ShaderModule convert_b10g11r11_to_d24s8_frag;
     vk::ShaderModule convert_d24s8_to_abgr8_frag;
     vk::ShaderModule convert_d24s8_to_b10g11r11_frag;
     vk::ShaderModule convert_d24s8_to_r16g16_frag;
@@ -117,6 +124,7 @@ private:
     vk::Pipeline convert_d16_to_r16_pipeline;
     vk::Pipeline convert_r16_to_d16_pipeline;
     vk::Pipeline convert_abgr8_to_d24s8_pipeline;
+    vk::Pipeline convert_b10g11r11_to_d24s8_pipeline;
     vk::Pipeline convert_d24s8_to_abgr8_pipeline;
     vk::Pipeline convert_d24s8_to_b10g11r11_pipeline;
     vk::Pipeline convert_d24s8_to_r16g16_pipeline;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index fd6064271..28a659c0e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -912,6 +912,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
             src_view.format == PixelFormat::B8G8R8A8_UNORM) {
             return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift);
         }
+        if (src_view.format == PixelFormat::B10G11R11_FLOAT) {
+            return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift);
+        }
         break;
     case PixelFormat::D32_FLOAT:
         if (src_view.format == PixelFormat::R32_FLOAT) {

From e02cff2f69f9a90777f87f85f290f83fc04c16ec Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 20 Nov 2021 00:02:12 +0100
Subject: [PATCH 07/10] TextureCache: Add R16G16 to D24S8 converter.

---
 src/video_core/host_shaders/CMakeLists.txt     |  1 +
 .../host_shaders/convert_r16g16_to_d24s8.frag  | 18 ++++++++++++++++++
 src/video_core/renderer_vulkan/blit_image.cpp  | 11 +++++++++++
 src/video_core/renderer_vulkan/blit_image.h    |  5 +++++
 .../renderer_vulkan/vk_texture_cache.cpp       |  3 +++
 5 files changed, 38 insertions(+)
 create mode 100644 src/video_core/host_shaders/convert_r16g16_to_d24s8.frag

diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index a2e046f12..1c91999d7 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -17,6 +17,7 @@ set(SHADER_FILES
     convert_d24s8_to_r16g16.frag
     convert_depth_to_float.frag
     convert_float_to_depth.frag
+    convert_r16g16_to_d24s8.frag
     full_screen_triangle.vert
     fxaa.frag
     fxaa.vert
diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
new file mode 100644
index 000000000..7b1b914f6
--- /dev/null
+++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
@@ -0,0 +1,18 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+// #extension GL_ARB_shader_stencil_export : require
+
+layout(binding = 0) uniform sampler2D color_texture;
+
+void main() {
+    ivec2 coord = ivec2(gl_FragCoord.xy);
+    vec4 color = texelFetch(color_texture, coord, 0).rgba;
+    uint depth_stencil_unorm = (uint(color.r * (exp2(16) - 1.0f)) << 16)
+                      | (uint(color.g * (exp2(16) - 1.0f)) << 16);
+
+    gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
+    // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
+}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index e70459de5..28b631f73 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -11,6 +11,7 @@
 #include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h"
 #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
 #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
+#include "video_core/host_shaders/convert_r16g16_to_d24s8_frag_spv.h"
 #include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
 #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
 #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
@@ -361,6 +362,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
       convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
       convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
       convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)),
+      convert_r16g16_to_d24s8_frag(BuildShader(device, CONVERT_R16G16_TO_D24S8_FRAG_SPV)),
       convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
       convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)),
       convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)),
@@ -476,6 +478,15 @@ void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer
             down_shift);
 }
 
+void BlitImageHelper::ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer,
+                                           const ImageView& src_image_view, u32 up_scale,
+                                           u32 down_shift) {
+    ConvertPipelineDepthTargetEx(convert_r16g16_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
+                                 convert_r16g16_to_d24s8_frag, true);
+    Convert(*convert_r16g16_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale,
+            down_shift);
+}
+
 void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
                                           ImageView& src_image_view, u32 up_scale, u32 down_shift) {
     ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 607964b5e..cec095341 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -62,6 +62,9 @@ public:
     void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer,
                                  const ImageView& src_image_view, u32 up_scale, u32 down_shift);
 
+    void ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+                              u32 up_scale, u32 down_shift);
+
     void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
                              u32 up_scale, u32 down_shift);
 
@@ -109,6 +112,7 @@ private:
     vk::ShaderModule convert_float_to_depth_frag;
     vk::ShaderModule convert_abgr8_to_d24s8_frag;
     vk::ShaderModule convert_b10g11r11_to_d24s8_frag;
+    vk::ShaderModule convert_r16g16_to_d24s8_frag;
     vk::ShaderModule convert_d24s8_to_abgr8_frag;
     vk::ShaderModule convert_d24s8_to_b10g11r11_frag;
     vk::ShaderModule convert_d24s8_to_r16g16_frag;
@@ -125,6 +129,7 @@ private:
     vk::Pipeline convert_r16_to_d16_pipeline;
     vk::Pipeline convert_abgr8_to_d24s8_pipeline;
     vk::Pipeline convert_b10g11r11_to_d24s8_pipeline;
+    vk::Pipeline convert_r16g16_to_d24s8_pipeline;
     vk::Pipeline convert_d24s8_to_abgr8_pipeline;
     vk::Pipeline convert_d24s8_to_b10g11r11_pipeline;
     vk::Pipeline convert_d24s8_to_r16g16_pipeline;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 28a659c0e..af1a11059 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -915,6 +915,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
         if (src_view.format == PixelFormat::B10G11R11_FLOAT) {
             return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift);
         }
+        if (src_view.format == PixelFormat::R16G16_UNORM) {
+            return blit_image_helper.ConvertR16G16ToD24S8(dst, src_view, up_scale, down_shift);
+        }
         break;
     case PixelFormat::D32_FLOAT:
         if (src_view.format == PixelFormat::R32_FLOAT) {

From 0857f82913d0bcf2de4721233f74cd40ecddcdae Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 20 Nov 2021 06:15:29 +0100
Subject: [PATCH 08/10] TextureCache: Implement buffer copies on Vulkan.

---
 .../renderer_opengl/gl_texture_cache.cpp      |   4 +-
 .../renderer_opengl/gl_texture_cache.h        |   7 +-
 .../renderer_vulkan/vk_texture_cache.cpp      | 174 ++++++++++++++++++
 .../renderer_vulkan/vk_texture_cache.h        |  11 +-
 src/video_core/texture_cache/texture_cache.h  |   4 +-
 .../texture_cache/texture_cache_base.h        |   2 -
 6 files changed, 193 insertions(+), 9 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 6956535e5..e70bbec81 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -526,8 +526,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
     }
 }
 
-void TextureCacheRuntime::ConvertImage(Image& dst, Image& src,
-                                       std::span<const VideoCommon::ImageCopy> copies) {
+void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
+                                           std::span<const VideoCommon::ImageCopy> copies) {
     LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
     format_conversion_pass.ConvertImage(dst, src, copies);
 }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 578f8d523..ad5157d66 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -84,9 +84,13 @@ public:
 
     u64 GetDeviceLocalMemory() const;
 
+    bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
+        return true;
+    }
+
     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
 
-    void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+    void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
 
     void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
         UNIMPLEMENTED();
@@ -338,7 +342,6 @@ struct TextureCacheParams {
     static constexpr bool FRAMEBUFFER_BLITS = true;
     static constexpr bool HAS_EMULATED_COPIES = true;
     static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
-    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true;
 
     using Runtime = OpenGL::TextureCacheRuntime;
     using Image = OpenGL::Image;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index af1a11059..02215cfc2 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -308,6 +308,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     };
 }
 
+[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src,
+                                                    VkImageAspectFlags aspect_mask) noexcept {
+    return VkBufferImageCopy{
+        .bufferOffset = 0,
+        .bufferRowLength = 0,
+        .bufferImageHeight = 0,
+        .imageSubresource = MakeImageSubresourceLayers(
+            is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask),
+        .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset),
+        .imageExtent = MakeExtent3D(copy.extent),
+    };
+}
+
 [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
     std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
     std::vector<VkBufferCopy> result(copies.size());
@@ -754,6 +767,167 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
     return staging_buffer_pool.Request(size, MemoryUsage::Download);
 }
 
+bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
+    if (VideoCore::Surface::GetFormatType(dst.info.format) ==
+        VideoCore::Surface::SurfaceType::DepthStencil) {
+        return !device.IsExtShaderStencilExportSupported();
+    }
+    return false;
+}
+
+[[nodiscard]] size_t NextPow2(size_t value) {
+    return static_cast<size_t>(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U)));
+}
+
+VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
+    const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
+    if (buffer_commits[level]) {
+        return *buffers[level];
+    }
+    const auto new_size = NextPow2(needed_size);
+    VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+                               VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+                               VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
+    buffers[level] = device.GetLogical().CreateBuffer({
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = new_size,
+        .usage = flags,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
+    buffer_commits[level] = std::make_unique<MemoryCommit>(
+        memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
+    return *buffers[level];
+}
+
+void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
+                                           std::span<const VideoCommon::ImageCopy> copies) {
+    std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
+    std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
+    const VkImageAspectFlags src_aspect_mask = src.AspectMask();
+    const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
+
+    std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) {
+        return MakeBufferImageCopy(copy, true, src_aspect_mask);
+    });
+    std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) {
+        return MakeBufferImageCopy(copy, false, dst_aspect_mask);
+    });
+    const u32 img_bpp = BytesPerBlock(src.info.format);
+    size_t total_size = 0;
+    for (const auto& copy : copies) {
+        total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp;
+    }
+    const VkBuffer copy_buffer = GetTemporaryBuffer(total_size);
+    const VkImage dst_image = dst.Handle();
+    const VkImage src_image = src.Handle();
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask,
+                      vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) {
+        RangedBarrierRange dst_range;
+        RangedBarrierRange src_range;
+        for (const VkBufferImageCopy& copy : vk_in_copies) {
+            src_range.AddLayers(copy.imageSubresource);
+        }
+        for (const VkBufferImageCopy& copy : vk_out_copies) {
+            dst_range.AddLayers(copy.imageSubresource);
+        }
+        static constexpr VkMemoryBarrier READ_BARRIER{
+            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+        };
+        static constexpr VkMemoryBarrier WRITE_BARRIER{
+            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+        };
+        const std::array pre_barriers{
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                                 VK_ACCESS_TRANSFER_WRITE_BIT,
+                .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+                .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = src_image,
+                .subresourceRange = src_range.SubresourceRange(src_aspect_mask),
+            },
+        };
+        const std::array middle_in_barrier{
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = 0,
+                .dstAccessMask = 0,
+                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = src_image,
+                .subresourceRange = src_range.SubresourceRange(src_aspect_mask),
+            },
+        };
+        const std::array middle_out_barrier{
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                                 VK_ACCESS_TRANSFER_WRITE_BIT,
+                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+                .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = dst_image,
+                .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
+            },
+        };
+        const std::array post_barriers{
+            VkImageMemoryBarrier{
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .pNext = nullptr,
+                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
+                                 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                                 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+                .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = dst_image,
+                .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
+            },
+        };
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                               0, {}, {}, pre_barriers);
+
+        cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer,
+                                 vk_in_copies);
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                               0, WRITE_BARRIER, nullptr, middle_in_barrier);
+
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                               0, READ_BARRIER, {}, middle_out_barrier);
+        cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies);
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                               0, {}, {}, post_barriers);
+    });
+}
+
 void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
                                     const Region2D& dst_region, const Region2D& src_region,
                                     Tegra::Engines::Fermi2D::Filter filter,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f5f8f9a74..44e9dcee4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -61,6 +61,10 @@ public:
 
     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
 
+    bool ShouldReinterpret(Image& dst, Image& src);
+
+    void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
     void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
 
     bool CanAccelerateImageUpload(Image&) const noexcept {
@@ -82,6 +86,8 @@ public:
         return true;
     }
 
+    [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
+
     const Device& device;
     VKScheduler& scheduler;
     MemoryAllocator& memory_allocator;
@@ -90,6 +96,10 @@ public:
     ASTCDecoderPass& astc_decoder_pass;
     RenderPassCache& render_pass_cache;
     const Settings::ResolutionScalingInfo& resolution;
+
+    constexpr static size_t indexing_slots = 8 * sizeof(size_t);
+    std::array<vk::Buffer, indexing_slots> buffers{};
+    std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
 };
 
 class Image : public VideoCommon::ImageBase {
@@ -316,7 +326,6 @@ struct TextureCacheParams {
     static constexpr bool FRAMEBUFFER_BLITS = false;
     static constexpr bool HAS_EMULATED_COPIES = false;
     static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
-    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false;
 
     using Runtime = Vulkan::TextureCacheRuntime;
     using Image = Vulkan::Image;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 4188f93c5..44a0d42ba 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1762,8 +1762,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
     }
     UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
     UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
-    if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) {
-        return runtime.ConvertImage(dst, src, copies);
+    if (runtime.ShouldReinterpret(dst, src)) {
+        return runtime.ReinterpretImage(dst, src, copies);
     }
     for (const ImageCopy& copy : copies) {
         UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index a9504c0e8..643ad811c 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -59,8 +59,6 @@ class TextureCache {
     static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
     /// True when the API can provide info about the memory of the device.
     static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
-    /// True when the API provides utilities for pixel format conversions.
-    static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS;
 
     static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
     static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;

From 4ca6e9a9e23b96d259cc1f1ba50b9464a8fa12e8 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 20 Nov 2021 06:17:01 +0100
Subject: [PATCH 09/10] TextureCache: Assure full conversions on depth/stencil
 write shaders.

---
 src/video_core/host_shaders/convert_abgr8_to_d24s8.frag     | 4 ++--
 src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag | 4 ++--
 src/video_core/host_shaders/convert_r16g16_to_d24s8.frag    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
index f7657e50a..4e4ab6a26 100644
--- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
+++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.
 
 #version 450
-// #extension GL_ARB_shader_stencil_export : require
+#extension GL_ARB_shader_stencil_export : require
 
 layout(binding = 0) uniform sampler2D color_texture;
 
@@ -13,5 +13,5 @@ void main() {
     uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b;
 
     gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f);
-    // gl_FragStencilRefARB = int(color.a);
+    gl_FragStencilRefARB = int(color.a);
 }
diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
index b7358c15c..2999a84cf 100644
--- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
+++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.
 
 #version 450
-// #extension GL_ARB_shader_stencil_export : require
+#extension GL_ARB_shader_stencil_export : require
 
 layout(binding = 0) uniform sampler2D color_texture;
 
@@ -15,5 +15,5 @@ void main() {
                       | (uint(color.r * (exp2(11) - 1.0f)));
 
     gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
-    // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
+    gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
 }
diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
index 7b1b914f6..3df70575e 100644
--- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
+++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.
 
 #version 450
-// #extension GL_ARB_shader_stencil_export : require
+#extension GL_ARB_shader_stencil_export : require
 
 layout(binding = 0) uniform sampler2D color_texture;
 
@@ -14,5 +14,5 @@ void main() {
                       | (uint(color.g * (exp2(16) - 1.0f)) << 16);
 
     gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
-    // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
+    gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
 }

From da2fe8190518d3266df7f4a48f9b651eaea84d4b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 20 Nov 2021 14:46:19 +0100
Subject: [PATCH 10/10] TextureCache: Refactor and fix linux compiling.

---
 src/common/bit_util.h                               | 7 +++++++
 src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 ++----
 src/video_core/renderer_vulkan/vk_texture_cache.cpp | 7 ++-----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 64520ca4e..eef8c1c5a 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -7,6 +7,7 @@
 #include <bit>
 #include <climits>
 #include <cstddef>
+#include <type_traits>
 
 #include "common/common_types.h"
 
@@ -44,4 +45,10 @@ template <typename T>
     return static_cast<u32>(log2_f + static_cast<u64>((value ^ (1ULL << log2_f)) != 0ULL));
 }
 
+template <typename T>
+requires std::is_integral_v<T>
+[[nodiscard]] T NextPow2(T value) {
+    return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U)));
+}
+
 } // namespace Common
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index e70bbec81..ecb215a7d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -9,6 +9,7 @@
 
 #include <glad/glad.h>
 
+#include "common/bit_util.h"
 #include "common/literals.h"
 #include "common/settings.h"
 #include "video_core/renderer_opengl/gl_device.h"
@@ -397,9 +398,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
     return GL_R32UI;
 }
 
-[[nodiscard]] u32 NextPow2(u32 value) {
-    return 1U << (32U - std::countl_zero(value - 1U));
-}
 } // Anonymous namespace
 
 ImageBufferMap::~ImageBufferMap() {
@@ -1308,7 +1306,7 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
         const u32 copy_size = region.width * region.height * region.depth * img_bpp;
         if (pbo_size < copy_size) {
             intermediate_pbo.Create();
-            pbo_size = NextPow2(copy_size);
+            pbo_size = Common::NextPow2(copy_size);
             glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY);
         }
         // Copy from source to PBO
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 02215cfc2..f194110e5 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "common/bit_cast.h"
+#include "common/bit_util.h"
 #include "common/settings.h"
 
 #include "video_core/engines/fermi_2d.h"
@@ -775,16 +776,12 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
     return false;
 }
 
-[[nodiscard]] size_t NextPow2(size_t value) {
-    return static_cast<size_t>(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U)));
-}
-
 VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
     const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
     if (buffer_commits[level]) {
         return *buffers[level];
     }
-    const auto new_size = NextPow2(needed_size);
+    const auto new_size = Common::NextPow2(needed_size);
     VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
                                VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
                                VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;