Merge pull request #761 from bunnei/improve-raster-cache
Improvements to rasterizer cache
This commit is contained in:
commit
4cd5df95d6
@ -487,7 +487,12 @@ public:
|
||||
};
|
||||
} rt_control;
|
||||
|
||||
INSERT_PADDING_WORDS(0x2B);
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
|
||||
u32 zeta_width;
|
||||
u32 zeta_height;
|
||||
|
||||
INSERT_PADDING_WORDS(0x27);
|
||||
|
||||
u32 depth_test_enable;
|
||||
|
||||
@ -540,7 +545,11 @@ public:
|
||||
|
||||
u32 vb_element_base;
|
||||
|
||||
INSERT_PADDING_WORDS(0x49);
|
||||
INSERT_PADDING_WORDS(0x40);
|
||||
|
||||
u32 zeta_enable;
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
struct {
|
||||
u32 tsc_address_high;
|
||||
@ -865,6 +874,8 @@ ASSERT_REG_POSITION(clear_depth, 0x364);
|
||||
ASSERT_REG_POSITION(zeta, 0x3F8);
|
||||
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
|
||||
ASSERT_REG_POSITION(rt_control, 0x487);
|
||||
ASSERT_REG_POSITION(zeta_width, 0x48a);
|
||||
ASSERT_REG_POSITION(zeta_height, 0x48b);
|
||||
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
|
||||
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
|
||||
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
|
||||
@ -874,6 +885,7 @@ ASSERT_REG_POSITION(blend, 0x4CF);
|
||||
ASSERT_REG_POSITION(stencil, 0x4E0);
|
||||
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
|
||||
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
||||
ASSERT_REG_POSITION(zeta_enable, 0x54E);
|
||||
ASSERT_REG_POSITION(tsc, 0x557);
|
||||
ASSERT_REG_POSITION(tic, 0x55D);
|
||||
ASSERT_REG_POSITION(stencil_two_side, 0x565);
|
||||
|
@ -387,7 +387,7 @@ void RasterizerOpenGL::Clear() {
|
||||
}
|
||||
if (regs.clear_buffers.Z) {
|
||||
clear_mask |= GL_DEPTH_BUFFER_BIT;
|
||||
use_depth_fb = true;
|
||||
use_depth_fb = regs.zeta_enable != 0;
|
||||
|
||||
// Always enable the depth write when clearing the depth buffer. The depth write mask is
|
||||
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
|
||||
@ -413,11 +413,13 @@ void RasterizerOpenGL::Clear() {
|
||||
glClear(clear_mask);
|
||||
|
||||
// Mark framebuffer surfaces as dirty
|
||||
if (Settings::values.use_accurate_framebuffers) {
|
||||
if (dirty_color_surface != nullptr) {
|
||||
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
|
||||
res_cache.FlushSurface(dirty_color_surface);
|
||||
}
|
||||
if (dirty_depth_surface != nullptr) {
|
||||
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
|
||||
res_cache.FlushSurface(dirty_depth_surface);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -431,7 +433,7 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
ScopeAcquireGLContext acquire_context;
|
||||
|
||||
auto [dirty_color_surface, dirty_depth_surface] =
|
||||
ConfigureFramebuffers(true, regs.zeta.Address() != 0);
|
||||
ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0);
|
||||
|
||||
SyncDepthTestState();
|
||||
SyncBlendState();
|
||||
@ -520,11 +522,13 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
state.Apply();
|
||||
|
||||
// Mark framebuffer surfaces as dirty
|
||||
if (Settings::values.use_accurate_framebuffers) {
|
||||
if (dirty_color_surface != nullptr) {
|
||||
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
|
||||
res_cache.FlushSurface(dirty_color_surface);
|
||||
}
|
||||
if (dirty_depth_surface != nullptr) {
|
||||
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
|
||||
res_cache.FlushSurface(dirty_depth_surface);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,8 +65,8 @@ struct FormatTuple {
|
||||
return params;
|
||||
}
|
||||
|
||||
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
|
||||
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
|
||||
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
|
||||
Tegra::GPUVAddr zeta_address,
|
||||
Tegra::DepthFormat format) {
|
||||
|
||||
SurfaceParams params{};
|
||||
@ -77,9 +77,9 @@ struct FormatTuple {
|
||||
params.component_type = ComponentTypeFromDepthFormat(format);
|
||||
params.type = GetFormatType(params.pixel_format);
|
||||
params.size_in_bytes = params.SizeInBytes();
|
||||
params.width = config.width;
|
||||
params.height = config.height;
|
||||
params.unaligned_height = config.height;
|
||||
params.width = zeta_width;
|
||||
params.height = zeta_height;
|
||||
params.unaligned_height = zeta_height;
|
||||
params.size_in_bytes = params.SizeInBytes();
|
||||
return params;
|
||||
}
|
||||
@ -254,6 +254,60 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
|
||||
cur_state.Apply();
|
||||
}
|
||||
|
||||
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
|
||||
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
|
||||
GLuint read_fb_handle, GLuint draw_fb_handle) {
|
||||
OpenGLState prev_state{OpenGLState::GetCurState()};
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
OpenGLState state;
|
||||
state.draw.read_framebuffer = read_fb_handle;
|
||||
state.draw.draw_framebuffer = draw_fb_handle;
|
||||
state.Apply();
|
||||
|
||||
u32 buffers{};
|
||||
|
||||
if (type == SurfaceType::ColorTexture) {
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
|
||||
0);
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
|
||||
0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
|
||||
buffers = GL_COLOR_BUFFER_BIT;
|
||||
} else if (type == SurfaceType::Depth) {
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
|
||||
buffers = GL_DEPTH_BUFFER_BIT;
|
||||
} else if (type == SurfaceType::DepthStencil) {
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
||||
src_tex, 0);
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
||||
dst_tex, 0);
|
||||
|
||||
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||
}
|
||||
|
||||
glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
|
||||
dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
|
||||
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
|
||||
texture.Create();
|
||||
const auto& rect{params.GetRect()};
|
||||
@ -519,8 +573,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
|
||||
}
|
||||
|
||||
if (using_depth_fb) {
|
||||
depth_params =
|
||||
SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
|
||||
depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
|
||||
regs.zeta.Address(), regs.zeta.format);
|
||||
}
|
||||
|
||||
MathUtil::Rectangle<u32> color_rect{};
|
||||
@ -565,17 +619,9 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
|
||||
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
|
||||
if (Settings::values.use_accurate_framebuffers) {
|
||||
// If enabled, always flush dirty surfaces
|
||||
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
|
||||
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
||||
surface->FlushGLBuffer();
|
||||
} else {
|
||||
// Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
|
||||
// and flushes are very slow and do not seem to improve accuracy
|
||||
const auto& params{surface->GetSurfaceParams()};
|
||||
Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
|
||||
}
|
||||
}
|
||||
|
||||
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
|
||||
@ -588,25 +634,53 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
|
||||
if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)
|
||||
return {};
|
||||
|
||||
// Check for an exact match in existing surfaces
|
||||
const auto& surface_key{SurfaceKey::Create(params)};
|
||||
const auto& search{surface_cache.find(surface_key)};
|
||||
// Look up surface in the cache based on address
|
||||
const auto& search{surface_cache.find(params.addr)};
|
||||
Surface surface;
|
||||
if (search != surface_cache.end()) {
|
||||
surface = search->second;
|
||||
if (Settings::values.use_accurate_framebuffers) {
|
||||
// Reload the surface from Switch memory
|
||||
LoadSurface(surface);
|
||||
}
|
||||
// If use_accurate_framebuffers is enabled, always load from memory
|
||||
FlushSurface(surface);
|
||||
UnregisterSurface(surface);
|
||||
} else if (surface->GetSurfaceParams() != params) {
|
||||
// If surface parameters changed, recreate the surface from the old one
|
||||
return RecreateSurface(surface, params);
|
||||
} else {
|
||||
// Use the cached surface as-is
|
||||
return surface;
|
||||
}
|
||||
}
|
||||
|
||||
// No surface found - create a new one
|
||||
surface = std::make_shared<CachedSurface>(params);
|
||||
RegisterSurface(surface);
|
||||
LoadSurface(surface);
|
||||
}
|
||||
|
||||
return surface;
|
||||
}
|
||||
|
||||
Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
|
||||
const SurfaceParams& new_params) {
|
||||
// Verify surface is compatible for blitting
|
||||
const auto& params{surface->GetSurfaceParams()};
|
||||
ASSERT(params.type == new_params.type);
|
||||
ASSERT(params.pixel_format == new_params.pixel_format);
|
||||
ASSERT(params.component_type == new_params.component_type);
|
||||
|
||||
// Create a new surface with the new parameters, and blit the previous surface to it
|
||||
Surface new_surface{std::make_shared<CachedSurface>(new_params)};
|
||||
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
|
||||
new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle,
|
||||
draw_framebuffer.handle);
|
||||
|
||||
// Update cache accordingly
|
||||
UnregisterSurface(surface);
|
||||
RegisterSurface(new_surface);
|
||||
|
||||
return new_surface;
|
||||
}
|
||||
|
||||
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
|
||||
// Tries to find the GPU address of a framebuffer based on the CPU address. This is because
|
||||
// final output framebuffers are specified by CPU address, but internally our GPU cache uses
|
||||
@ -652,22 +726,20 @@ void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size)
|
||||
|
||||
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
|
||||
const auto& params{surface->GetSurfaceParams()};
|
||||
const auto& surface_key{SurfaceKey::Create(params)};
|
||||
const auto& search{surface_cache.find(surface_key)};
|
||||
const auto& search{surface_cache.find(params.addr)};
|
||||
|
||||
if (search != surface_cache.end()) {
|
||||
// Registered already
|
||||
return;
|
||||
}
|
||||
|
||||
surface_cache[surface_key] = surface;
|
||||
surface_cache[params.addr] = surface;
|
||||
UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
|
||||
const auto& params{surface->GetSurfaceParams()};
|
||||
const auto& surface_key{SurfaceKey::Create(params)};
|
||||
const auto& search{surface_cache.find(surface_key)};
|
||||
const auto& search{surface_cache.find(params.addr)};
|
||||
|
||||
if (search == surface_cache.end()) {
|
||||
// Unregistered already
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <vector>
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include "common/common_types.h"
|
||||
#include "common/hash.h"
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
@ -137,6 +136,7 @@ struct SurfaceParams {
|
||||
ASSERT(static_cast<size_t>(format) < bpp_table.size());
|
||||
return bpp_table[static_cast<size_t>(format)];
|
||||
}
|
||||
|
||||
u32 GetFormatBpp() const {
|
||||
return GetFormatBpp(pixel_format);
|
||||
}
|
||||
@ -365,9 +365,21 @@ struct SurfaceParams {
|
||||
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
|
||||
|
||||
/// Creates SurfaceParams for a depth buffer configuration
|
||||
static SurfaceParams CreateForDepthBuffer(
|
||||
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
|
||||
Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
|
||||
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
|
||||
Tegra::GPUVAddr zeta_address,
|
||||
Tegra::DepthFormat format);
|
||||
|
||||
bool operator==(const SurfaceParams& other) const {
|
||||
return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width,
|
||||
height, unaligned_height, size_in_bytes) ==
|
||||
std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format,
|
||||
other.component_type, other.type, other.width, other.height,
|
||||
other.unaligned_height, other.size_in_bytes);
|
||||
}
|
||||
|
||||
bool operator!=(const SurfaceParams& other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
Tegra::GPUVAddr addr;
|
||||
bool is_tiled;
|
||||
@ -381,24 +393,6 @@ struct SurfaceParams {
|
||||
size_t size_in_bytes;
|
||||
};
|
||||
|
||||
/// Hashable variation of SurfaceParams, used for a key in the surface cache
|
||||
struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
|
||||
static SurfaceKey Create(const SurfaceParams& params) {
|
||||
SurfaceKey res;
|
||||
res.state = params;
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<SurfaceKey> {
|
||||
size_t operator()(const SurfaceKey& k) const {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
class CachedSurface final {
|
||||
public:
|
||||
CachedSurface(const SurfaceParams& params);
|
||||
@ -444,8 +438,8 @@ public:
|
||||
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
|
||||
const MathUtil::Rectangle<s32>& viewport);
|
||||
|
||||
/// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
|
||||
void MarkSurfaceAsDirty(const Surface& surface);
|
||||
/// Flushes the surface to Switch memory
|
||||
void FlushSurface(const Surface& surface);
|
||||
|
||||
/// Tries to find a framebuffer GPU address based on the provided CPU address
|
||||
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
|
||||
@ -460,6 +454,9 @@ private:
|
||||
void LoadSurface(const Surface& surface);
|
||||
Surface GetSurface(const SurfaceParams& params);
|
||||
|
||||
/// Recreates a surface with new parameters
|
||||
Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
|
||||
|
||||
/// Register surface into the cache
|
||||
void RegisterSurface(const Surface& surface);
|
||||
|
||||
@ -469,7 +466,7 @@ private:
|
||||
/// Increase/decrease the number of surface in pages touching the specified region
|
||||
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
|
||||
|
||||
std::unordered_map<SurfaceKey, Surface> surface_cache;
|
||||
std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;
|
||||
PageMap cached_pages;
|
||||
|
||||
OGLFramebuffer read_framebuffer;
|
||||
|
Loading…
Reference in New Issue
Block a user