Merge pull request #5741 from ReinUsesLisp/new-bufcache
video_core: Reimplement the buffer cache
This commit is contained in:
commit
d3c7a7e7cf
@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") {
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
|
||||
}
|
||||
|
||||
TEST_CASE("BufferBase: Cached write") {
|
||||
RasterizerInterface rasterizer;
|
||||
BufferBase buffer(rasterizer, c, WORD);
|
||||
buffer.UnmarkRegionAsCpuModified(c, WORD);
|
||||
buffer.CachedCpuWrite(c + PAGE, PAGE);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
buffer.MarkRegionAsCpuModified(c, WORD);
|
||||
REQUIRE(rasterizer.Count() == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("BufferBase: Multiple cached write") {
|
||||
RasterizerInterface rasterizer;
|
||||
BufferBase buffer(rasterizer, c, WORD);
|
||||
buffer.UnmarkRegionAsCpuModified(c, WORD);
|
||||
buffer.CachedCpuWrite(c + PAGE, PAGE);
|
||||
buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
|
||||
buffer.MarkRegionAsCpuModified(c, WORD);
|
||||
REQUIRE(rasterizer.Count() == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("BufferBase: Cached write unmarked") {
|
||||
RasterizerInterface rasterizer;
|
||||
BufferBase buffer(rasterizer, c, WORD);
|
||||
buffer.UnmarkRegionAsCpuModified(c, WORD);
|
||||
buffer.CachedCpuWrite(c + PAGE, PAGE);
|
||||
buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
buffer.MarkRegionAsCpuModified(c, WORD);
|
||||
REQUIRE(rasterizer.Count() == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("BufferBase: Cached write iterated") {
|
||||
RasterizerInterface rasterizer;
|
||||
BufferBase buffer(rasterizer, c, WORD);
|
||||
buffer.UnmarkRegionAsCpuModified(c, WORD);
|
||||
buffer.CachedCpuWrite(c + PAGE, PAGE);
|
||||
int num = 0;
|
||||
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
REQUIRE(num == 0);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
buffer.MarkRegionAsCpuModified(c, WORD);
|
||||
REQUIRE(rasterizer.Count() == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("BufferBase: Cached write downloads") {
|
||||
RasterizerInterface rasterizer;
|
||||
BufferBase buffer(rasterizer, c, WORD);
|
||||
buffer.UnmarkRegionAsCpuModified(c, WORD);
|
||||
REQUIRE(rasterizer.Count() == 64);
|
||||
buffer.CachedCpuWrite(c + PAGE, PAGE);
|
||||
REQUIRE(rasterizer.Count() == 63);
|
||||
buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
|
||||
int num = 0;
|
||||
buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
REQUIRE(num == 0);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
|
||||
buffer.MarkRegionAsCpuModified(c, WORD);
|
||||
REQUIRE(rasterizer.Count() == 0);
|
||||
}
|
||||
|
@ -2,10 +2,8 @@ add_subdirectory(host_shaders)
|
||||
|
||||
add_library(video_core STATIC
|
||||
buffer_cache/buffer_base.h
|
||||
buffer_cache/buffer_block.h
|
||||
buffer_cache/buffer_cache.cpp
|
||||
buffer_cache/buffer_cache.h
|
||||
buffer_cache/map_interval.cpp
|
||||
buffer_cache/map_interval.h
|
||||
cdma_pusher.cpp
|
||||
cdma_pusher.h
|
||||
command_classes/codecs/codec.cpp
|
||||
@ -152,8 +150,6 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/vk_staging_buffer_pool.h
|
||||
renderer_vulkan/vk_state_tracker.cpp
|
||||
renderer_vulkan/vk_state_tracker.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h
|
||||
renderer_vulkan/vk_swapchain.cpp
|
||||
renderer_vulkan/vk_swapchain.h
|
||||
renderer_vulkan/vk_texture_cache.cpp
|
||||
|
@ -19,6 +19,7 @@ namespace VideoCommon {
|
||||
|
||||
enum class BufferFlagBits {
|
||||
Picked = 1 << 0,
|
||||
CachedWrites = 1 << 1,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
|
||||
|
||||
@ -40,7 +41,7 @@ class BufferBase {
|
||||
static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
|
||||
|
||||
/// Vector tracking modified pages tightly packed with small vector optimization
|
||||
union WrittenWords {
|
||||
union WordsArray {
|
||||
/// Returns the pointer to the words state
|
||||
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
|
||||
return is_short ? &stack : heap;
|
||||
@ -55,49 +56,59 @@ class BufferBase {
|
||||
u64* heap; ///< Not-small buffers pointer to the storage
|
||||
};
|
||||
|
||||
struct GpuCpuWords {
|
||||
explicit GpuCpuWords() = default;
|
||||
explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} {
|
||||
struct Words {
|
||||
explicit Words() = default;
|
||||
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
|
||||
if (IsShort()) {
|
||||
cpu.stack = ~u64{0};
|
||||
gpu.stack = 0;
|
||||
cached_cpu.stack = 0;
|
||||
untracked.stack = ~u64{0};
|
||||
} else {
|
||||
// Share allocation between CPU and GPU pages and set their default values
|
||||
const size_t num_words = NumWords();
|
||||
u64* const alloc = new u64[num_words * 2];
|
||||
u64* const alloc = new u64[num_words * 4];
|
||||
cpu.heap = alloc;
|
||||
gpu.heap = alloc + num_words;
|
||||
cached_cpu.heap = alloc + num_words * 2;
|
||||
untracked.heap = alloc + num_words * 3;
|
||||
std::fill_n(cpu.heap, num_words, ~u64{0});
|
||||
std::fill_n(gpu.heap, num_words, 0);
|
||||
std::fill_n(cached_cpu.heap, num_words, 0);
|
||||
std::fill_n(untracked.heap, num_words, ~u64{0});
|
||||
}
|
||||
// Clean up tailing bits
|
||||
const u64 last_local_page =
|
||||
Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE);
|
||||
const u64 last_word_size = size_bytes % BYTES_PER_WORD;
|
||||
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
|
||||
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
|
||||
u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1];
|
||||
last_word = (last_word << shift) >> shift;
|
||||
const u64 last_word = (~u64{0} << shift) >> shift;
|
||||
cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
|
||||
untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
|
||||
}
|
||||
|
||||
~GpuCpuWords() {
|
||||
~Words() {
|
||||
Release();
|
||||
}
|
||||
|
||||
GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept {
|
||||
Words& operator=(Words&& rhs) noexcept {
|
||||
Release();
|
||||
size_bytes = rhs.size_bytes;
|
||||
cpu = rhs.cpu;
|
||||
gpu = rhs.gpu;
|
||||
cached_cpu = rhs.cached_cpu;
|
||||
untracked = rhs.untracked;
|
||||
rhs.cpu.heap = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
GpuCpuWords(GpuCpuWords&& rhs) noexcept
|
||||
: size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} {
|
||||
Words(Words&& rhs) noexcept
|
||||
: size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
|
||||
cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
|
||||
rhs.cpu.heap = nullptr;
|
||||
}
|
||||
|
||||
GpuCpuWords& operator=(const GpuCpuWords&) = delete;
|
||||
GpuCpuWords(const GpuCpuWords&) = delete;
|
||||
Words& operator=(const Words&) = delete;
|
||||
Words(const Words&) = delete;
|
||||
|
||||
/// Returns true when the buffer fits in the small vector optimization
|
||||
[[nodiscard]] bool IsShort() const noexcept {
|
||||
@ -118,8 +129,17 @@ class BufferBase {
|
||||
}
|
||||
|
||||
u64 size_bytes = 0;
|
||||
WrittenWords cpu;
|
||||
WrittenWords gpu;
|
||||
WordsArray cpu;
|
||||
WordsArray gpu;
|
||||
WordsArray cached_cpu;
|
||||
WordsArray untracked;
|
||||
};
|
||||
|
||||
enum class Type {
|
||||
CPU,
|
||||
GPU,
|
||||
CachedCPU,
|
||||
Untracked,
|
||||
};
|
||||
|
||||
public:
|
||||
@ -132,68 +152,93 @@ public:
|
||||
BufferBase& operator=(const BufferBase&) = delete;
|
||||
BufferBase(const BufferBase&) = delete;
|
||||
|
||||
BufferBase& operator=(BufferBase&&) = default;
|
||||
BufferBase(BufferBase&&) = default;
|
||||
|
||||
/// Returns the inclusive CPU modified range in a begin end pair
|
||||
[[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
|
||||
u64 query_size) const noexcept {
|
||||
const u64 offset = query_cpu_addr - cpu_addr;
|
||||
return ModifiedRegion<false>(offset, query_size);
|
||||
return ModifiedRegion<Type::CPU>(offset, query_size);
|
||||
}
|
||||
|
||||
/// Returns the inclusive GPU modified range in a begin end pair
|
||||
[[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
|
||||
u64 query_size) const noexcept {
|
||||
const u64 offset = query_cpu_addr - cpu_addr;
|
||||
return ModifiedRegion<true>(offset, query_size);
|
||||
return ModifiedRegion<Type::GPU>(offset, query_size);
|
||||
}
|
||||
|
||||
/// Returns true if a region has been modified from the CPU
|
||||
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
|
||||
const u64 offset = query_cpu_addr - cpu_addr;
|
||||
return IsRegionModified<false>(offset, query_size);
|
||||
return IsRegionModified<Type::CPU>(offset, query_size);
|
||||
}
|
||||
|
||||
/// Returns true if a region has been modified from the GPU
|
||||
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
|
||||
const u64 offset = query_cpu_addr - cpu_addr;
|
||||
return IsRegionModified<true>(offset, query_size);
|
||||
return IsRegionModified<Type::GPU>(offset, query_size);
|
||||
}
|
||||
|
||||
/// Mark region as CPU modified, notifying the rasterizer about this change
|
||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
|
||||
ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size);
|
||||
ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
|
||||
}
|
||||
|
||||
/// Unmark region as CPU modified, notifying the rasterizer about this change
|
||||
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
|
||||
ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size);
|
||||
ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
|
||||
}
|
||||
|
||||
/// Mark region as modified from the host GPU
|
||||
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
|
||||
ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size);
|
||||
ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
|
||||
}
|
||||
|
||||
/// Unmark region as modified from the host GPU
|
||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
|
||||
ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size);
|
||||
ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
|
||||
}
|
||||
|
||||
/// Mark region as modified from the CPU
|
||||
/// but don't mark it as modified until FlusHCachedWrites is called.
|
||||
void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
|
||||
flags |= BufferFlagBits::CachedWrites;
|
||||
ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
|
||||
}
|
||||
|
||||
/// Flushes cached CPU writes, and notify the rasterizer about the deltas
|
||||
void FlushCachedWrites() noexcept {
|
||||
flags &= ~BufferFlagBits::CachedWrites;
|
||||
const u64 num_words = NumWords();
|
||||
const u64* const cached_words = Array<Type::CachedCPU>();
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const cpu_words = Array<Type::CPU>();
|
||||
for (u64 word_index = 0; word_index < num_words; ++word_index) {
|
||||
const u64 cached_bits = cached_words[word_index];
|
||||
NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
|
||||
untracked_words[word_index] |= cached_bits;
|
||||
cpu_words[word_index] |= cached_bits;
|
||||
}
|
||||
}
|
||||
|
||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||
template <typename Func>
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
|
||||
ForEachModifiedRange<false, true>(query_cpu_range, size, func);
|
||||
ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
|
||||
}
|
||||
|
||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||
template <typename Func>
|
||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
|
||||
ForEachModifiedRange<true, false>(query_cpu_range, size, func);
|
||||
ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
|
||||
}
|
||||
|
||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||
template <typename Func>
|
||||
void ForEachDownloadRange(Func&& func) {
|
||||
ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func);
|
||||
ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
|
||||
}
|
||||
|
||||
/// Mark buffer as picked
|
||||
@ -206,6 +251,16 @@ public:
|
||||
flags &= ~BufferFlagBits::Picked;
|
||||
}
|
||||
|
||||
/// Increases the likeliness of this being a stream buffer
|
||||
void IncreaseStreamScore(int score) noexcept {
|
||||
stream_score += score;
|
||||
}
|
||||
|
||||
/// Returns the likeliness of this being a stream buffer
|
||||
[[nodiscard]] int StreamScore() const noexcept {
|
||||
return stream_score;
|
||||
}
|
||||
|
||||
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
||||
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
|
||||
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
|
||||
@ -216,6 +271,11 @@ public:
|
||||
return True(flags & BufferFlagBits::Picked);
|
||||
}
|
||||
|
||||
/// Returns true when the buffer has pending cached writes
|
||||
[[nodiscard]] bool HasCachedWrites() const noexcept {
|
||||
return True(flags & BufferFlagBits::CachedWrites);
|
||||
}
|
||||
|
||||
/// Returns the base CPU address of the buffer
|
||||
[[nodiscard]] VAddr CpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
@ -233,26 +293,48 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
template <Type type>
|
||||
u64* Array() noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return words.cpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return words.gpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::CachedCPU) {
|
||||
return words.cached_cpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return words.untracked.Pointer(IsShort());
|
||||
}
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
const u64* Array() const noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return words.cpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return words.gpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::CachedCPU) {
|
||||
return words.cached_cpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return words.untracked.Pointer(IsShort());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the state of a range of pages
|
||||
*
|
||||
* @param written_words Pages to be marked or unmarked as modified
|
||||
* @param dirty_addr Base address to mark or unmark as modified
|
||||
* @param size Size in bytes to mark or unmark as modified
|
||||
*
|
||||
* @tparam enable True when the bits will be set to one, false for zero
|
||||
* @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
|
||||
*/
|
||||
template <bool enable, bool notify_rasterizer>
|
||||
void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr,
|
||||
s64 size) noexcept(!notify_rasterizer) {
|
||||
template <Type type, bool enable>
|
||||
void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
|
||||
const s64 difference = dirty_addr - cpu_addr;
|
||||
const u64 offset = std::max<s64>(difference, 0);
|
||||
size += std::min<s64>(difference, 0);
|
||||
if (offset >= SizeBytes() || size < 0) {
|
||||
return;
|
||||
}
|
||||
u64* const state_words = written_words.Pointer(IsShort());
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const state_words = Array<type>();
|
||||
const u64 offset_end = std::min(offset + size, SizeBytes());
|
||||
const u64 begin_page_index = offset / BYTES_PER_PAGE;
|
||||
const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
|
||||
@ -268,13 +350,19 @@ private:
|
||||
u64 bits = ~u64{0};
|
||||
bits = (bits >> right_offset) << right_offset;
|
||||
bits = (bits << left_offset) >> left_offset;
|
||||
if constexpr (notify_rasterizer) {
|
||||
NotifyRasterizer<!enable>(word_index, state_words[word_index], bits);
|
||||
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
|
||||
NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
|
||||
}
|
||||
if constexpr (enable) {
|
||||
state_words[word_index] |= bits;
|
||||
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
|
||||
untracked_words[word_index] |= bits;
|
||||
}
|
||||
} else {
|
||||
state_words[word_index] &= ~bits;
|
||||
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
|
||||
untracked_words[word_index] &= ~bits;
|
||||
}
|
||||
}
|
||||
page_index = 0;
|
||||
++word_index;
|
||||
@ -291,7 +379,7 @@ private:
|
||||
* @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
|
||||
*/
|
||||
template <bool add_to_rasterizer>
|
||||
void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) {
|
||||
void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
|
||||
u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
|
||||
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
|
||||
while (changed_bits != 0) {
|
||||
@ -315,21 +403,20 @@ private:
|
||||
* @param query_cpu_range Base CPU address to loop over
|
||||
* @param size Size in bytes of the CPU range to loop over
|
||||
* @param func Function to call for each turned off region
|
||||
*
|
||||
* @tparam gpu True for host GPU pages, false for CPU pages
|
||||
* @tparam notify_rasterizer True when the rasterizer should be notified about state changes
|
||||
*/
|
||||
template <bool gpu, bool notify_rasterizer, typename Func>
|
||||
template <Type type, typename Func>
|
||||
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const s64 difference = query_cpu_range - cpu_addr;
|
||||
const u64 query_begin = std::max<s64>(difference, 0);
|
||||
size += std::min<s64>(difference, 0);
|
||||
if (query_begin >= SizeBytes() || size < 0) {
|
||||
return;
|
||||
}
|
||||
const u64* const cpu_words = words.cpu.Pointer(IsShort());
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const state_words = Array<type>();
|
||||
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
|
||||
u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
|
||||
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
|
||||
u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
|
||||
|
||||
@ -345,7 +432,8 @@ private:
|
||||
const u64 word_index_end = std::distance(state_words, last_modified_word);
|
||||
|
||||
const unsigned local_page_begin = std::countr_zero(*first_modified_word);
|
||||
const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]);
|
||||
const unsigned local_page_end =
|
||||
static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
|
||||
const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
|
||||
const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
|
||||
const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
|
||||
@ -371,11 +459,13 @@ private:
|
||||
const u64 current_word = state_words[word_index] & bits;
|
||||
state_words[word_index] &= ~bits;
|
||||
|
||||
// Exclude CPU modified pages when visiting GPU pages
|
||||
const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0);
|
||||
if constexpr (notify_rasterizer) {
|
||||
NotifyRasterizer<true>(word_index, word, ~u64{0});
|
||||
if constexpr (type == Type::CPU) {
|
||||
const u64 current_bits = untracked_words[word_index] & bits;
|
||||
untracked_words[word_index] &= ~bits;
|
||||
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
|
||||
}
|
||||
// Exclude CPU modified pages when visiting GPU pages
|
||||
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
|
||||
u64 page = page_begin;
|
||||
page_begin = 0;
|
||||
|
||||
@ -416,17 +506,20 @@ private:
|
||||
* @param offset Offset in bytes from the start of the buffer
|
||||
* @param size Size in bytes of the region to query for modifications
|
||||
*/
|
||||
template <bool gpu>
|
||||
template <Type type>
|
||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
const u64* const cpu_words = words.cpu.Pointer(IsShort());
|
||||
const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
const u64 word_end = std::min(word_begin + num_query_words, NumWords());
|
||||
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
|
||||
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
|
||||
const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
@ -445,13 +538,13 @@ private:
|
||||
*
|
||||
* @param offset Offset in bytes from the start of the buffer
|
||||
* @param size Size in bytes of the region to query for modifications
|
||||
*
|
||||
* @tparam gpu True to query GPU modified pages, false for CPU pages
|
||||
*/
|
||||
template <bool gpu>
|
||||
template <Type type>
|
||||
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
|
||||
const u64* const cpu_words = words.cpu.Pointer(IsShort());
|
||||
const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
const u64 word_end = std::min(word_begin + num_query_words, NumWords());
|
||||
@ -460,7 +553,8 @@ private:
|
||||
u64 begin = std::numeric_limits<u64>::max();
|
||||
u64 end = 0;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
|
||||
const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
@ -488,8 +582,9 @@ private:
|
||||
|
||||
RasterizerInterface* rasterizer = nullptr;
|
||||
VAddr cpu_addr = 0;
|
||||
GpuCpuWords words;
|
||||
Words words;
|
||||
BufferFlagBits flags{};
|
||||
int stream_score = 0;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
@ -1,62 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class BufferBlock {
|
||||
public:
|
||||
[[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
|
||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
|
||||
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::size_t Offset(VAddr in_addr) const {
|
||||
return static_cast<std::size_t>(in_addr - cpu_addr);
|
||||
}
|
||||
|
||||
[[nodiscard]] VAddr CpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
[[nodiscard]] VAddr CpuAddrEnd() const {
|
||||
return cpu_addr_end;
|
||||
}
|
||||
|
||||
void SetCpuAddr(VAddr new_addr) {
|
||||
cpu_addr = new_addr;
|
||||
cpu_addr_end = new_addr + size;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::size_t Size() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
[[nodiscard]] u64 Epoch() const {
|
||||
return epoch;
|
||||
}
|
||||
|
||||
void SetEpoch(u64 new_epoch) {
|
||||
epoch = new_epoch;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
|
||||
SetCpuAddr(cpu_addr_);
|
||||
}
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
VAddr cpu_addr_end{};
|
||||
std::size_t size{};
|
||||
u64 epoch{};
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
13
src/video_core/buffer_cache/buffer_cache.cpp
Normal file
13
src/video_core/buffer_cache/buffer_cache.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/microprofile.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
|
||||
MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
|
||||
MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
|
||||
|
||||
} // namespace VideoCommon
|
File diff suppressed because it is too large
Load Diff
@ -1,33 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/buffer_cache/map_interval.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
MapIntervalAllocator::MapIntervalAllocator() {
|
||||
FillFreeList(first_chunk);
|
||||
}
|
||||
|
||||
MapIntervalAllocator::~MapIntervalAllocator() = default;
|
||||
|
||||
void MapIntervalAllocator::AllocateNewChunk() {
|
||||
*new_chunk = std::make_unique<Chunk>();
|
||||
FillFreeList(**new_chunk);
|
||||
new_chunk = &(*new_chunk)->next;
|
||||
}
|
||||
|
||||
void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
|
||||
const std::size_t old_size = free_list.size();
|
||||
free_list.resize(old_size + chunk.data.size());
|
||||
std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
|
||||
[](MapInterval& interval) { return &interval; });
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
@ -1,93 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/intrusive/set_hook.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
|
||||
MapInterval() = default;
|
||||
|
||||
/*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
|
||||
|
||||
explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
|
||||
: start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
|
||||
|
||||
bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
|
||||
return start <= other_start && other_end <= end;
|
||||
}
|
||||
|
||||
bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
|
||||
return start < other_end && other_start < end;
|
||||
}
|
||||
|
||||
void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
|
||||
is_modified = is_modified_;
|
||||
ticks = ticks_;
|
||||
}
|
||||
|
||||
boost::intrusive::set_member_hook<> member_hook_;
|
||||
VAddr start = 0;
|
||||
VAddr end = 0;
|
||||
GPUVAddr gpu_addr = 0;
|
||||
u64 ticks = 0;
|
||||
bool is_written = false;
|
||||
bool is_modified = false;
|
||||
bool is_registered = false;
|
||||
bool is_memory_marked = false;
|
||||
bool is_sync_pending = false;
|
||||
};
|
||||
|
||||
struct MapIntervalCompare {
|
||||
constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
|
||||
return lhs.start < rhs.start;
|
||||
}
|
||||
};
|
||||
|
||||
class MapIntervalAllocator {
|
||||
public:
|
||||
MapIntervalAllocator();
|
||||
~MapIntervalAllocator();
|
||||
|
||||
MapInterval* Allocate() {
|
||||
if (free_list.empty()) {
|
||||
AllocateNewChunk();
|
||||
}
|
||||
MapInterval* const interval = free_list.back();
|
||||
free_list.pop_back();
|
||||
return interval;
|
||||
}
|
||||
|
||||
void Release(MapInterval* interval) {
|
||||
free_list.push_back(interval);
|
||||
}
|
||||
|
||||
private:
|
||||
struct Chunk {
|
||||
std::unique_ptr<Chunk> next;
|
||||
std::array<MapInterval, 0x8000> data;
|
||||
};
|
||||
|
||||
void AllocateNewChunk();
|
||||
|
||||
void FillFreeList(Chunk& chunk);
|
||||
|
||||
std::vector<MapInterval*> free_list;
|
||||
|
||||
Chunk first_chunk;
|
||||
|
||||
std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
@ -110,12 +110,10 @@ void Vic::Execute() {
|
||||
converted_frame_buffer.get(), block_height, 0, 0);
|
||||
|
||||
gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
} else {
|
||||
// send pitch linear frame
|
||||
gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
|
||||
linear_size);
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -163,7 +161,6 @@ void Vic::Execute() {
|
||||
}
|
||||
gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
|
||||
chroma_buffer.size());
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -12,13 +12,30 @@
|
||||
#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
|
||||
|
||||
namespace VideoCommon::Dirty {
|
||||
|
||||
namespace {
|
||||
using Tegra::Engines::Maxwell3D;
|
||||
|
||||
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
|
||||
void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
|
||||
static constexpr std::size_t num_array = 3;
|
||||
for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
|
||||
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
|
||||
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
|
||||
|
||||
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
|
||||
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
|
||||
}
|
||||
}
|
||||
|
||||
void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
|
||||
FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
|
||||
}
|
||||
|
||||
void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
|
||||
FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
|
||||
FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
|
||||
}
|
||||
|
||||
void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
|
||||
static constexpr std::size_t num_per_rt = NUM(rt[0]);
|
||||
static constexpr std::size_t begin = OFF(rt);
|
||||
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
|
||||
@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
|
||||
FillBlock(table, OFF(zeta), NUM(zeta), flag);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
|
||||
SetupDirtyVertexBuffers(tables);
|
||||
SetupIndexBuffer(tables);
|
||||
SetupDirtyDescriptors(tables);
|
||||
SetupDirtyRenderTargets(tables);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Dirty
|
||||
|
@ -30,6 +30,12 @@ enum : u8 {
|
||||
ColorBuffer7,
|
||||
ZetaBuffer,
|
||||
|
||||
VertexBuffers,
|
||||
VertexBuffer0,
|
||||
VertexBuffer31 = VertexBuffer0 + 31,
|
||||
|
||||
IndexBuffer,
|
||||
|
||||
LastCommonEntry,
|
||||
};
|
||||
|
||||
@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
|
||||
FillBlock(tables[1], begin, num, index_b);
|
||||
}
|
||||
|
||||
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
|
||||
void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
|
||||
|
||||
} // namespace VideoCommon::Dirty
|
||||
|
@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() {
|
||||
MICROPROFILE_SCOPE(DispatchCalls);
|
||||
|
||||
gpu.SyncGuestHost();
|
||||
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
|
||||
dma_pushbuffer_subindex = 0;
|
||||
|
||||
|
@ -18,8 +18,8 @@ Fermi2D::Fermi2D() {
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
||||
void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
rasterizer = &rasterizer_;
|
||||
void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
~Fermi2D();
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
rasterizer = &rasterizer_;
|
||||
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
|
||||
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ public:
|
||||
~KeplerCompute();
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
static constexpr std::size_t NumConstBuffers = 8;
|
||||
|
||||
|
@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
|
||||
case KEPLERMEMORY_REG_INDEX(data): {
|
||||
upload_state.ProcessData(method_argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
||||
|
||||
Maxwell3D::~Maxwell3D() = default;
|
||||
|
||||
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
rasterizer = &rasterizer_;
|
||||
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void Maxwell3D::InitializeRegisterDefaults() {
|
||||
@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
|
||||
case MAXWELL3D_REG_INDEX(data_upload):
|
||||
upload_state.ProcessData(argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
OnMemoryWrite();
|
||||
}
|
||||
return;
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
|
||||
void Maxwell3D::ProcessCBBind(size_t stage_index) {
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
||||
auto& shader = state.shader_stages[stage_index];
|
||||
auto& bind_data = regs.cb_bind[stage_index];
|
||||
|
||||
ASSERT(bind_data.index < Regs::MaxConstBuffers);
|
||||
auto& buffer = shader.const_buffers[bind_data.index];
|
||||
|
||||
const auto& bind_data = regs.cb_bind[stage_index];
|
||||
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
|
||||
buffer.enabled = bind_data.valid.Value() != 0;
|
||||
buffer.address = regs.const_buffer.BufferAddress();
|
||||
buffer.size = regs.const_buffer.cb_size;
|
||||
|
||||
const bool is_enabled = bind_data.valid.Value() != 0;
|
||||
const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
|
||||
const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
|
||||
rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBData(u32 value) {
|
||||
@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() {
|
||||
|
||||
const u32 id = cb_data_state.id;
|
||||
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
|
||||
OnMemoryWrite();
|
||||
|
||||
cb_data_state.id = null_cb_data;
|
||||
cb_data_state.current = null_cb_data;
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
~Maxwell3D();
|
||||
|
||||
/// Binds a rasterizer to this engine.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
/// Register structure of the Maxwell3D engine.
|
||||
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
|
||||
@ -1314,8 +1314,7 @@ public:
|
||||
|
||||
GPUVAddr LimitAddress() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
|
||||
limit_low) +
|
||||
1;
|
||||
limit_low);
|
||||
}
|
||||
} vertex_array_limit[NumVertexArrays];
|
||||
|
||||
@ -1403,6 +1402,7 @@ public:
|
||||
};
|
||||
|
||||
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
|
||||
|
||||
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
|
||||
};
|
||||
|
||||
@ -1452,11 +1452,6 @@ public:
|
||||
return *rasterizer;
|
||||
}
|
||||
|
||||
/// Notify a memory write has happened.
|
||||
void OnMemoryWrite() {
|
||||
dirty.flags |= dirty.on_write_stores;
|
||||
}
|
||||
|
||||
enum class MMEDrawMode : u32 {
|
||||
Undefined,
|
||||
Array,
|
||||
@ -1478,7 +1473,6 @@ public:
|
||||
using Tables = std::array<Table, 2>;
|
||||
|
||||
Flags flags;
|
||||
Flags on_write_stores;
|
||||
Tables tables{};
|
||||
} dirty;
|
||||
|
||||
@ -1541,7 +1535,7 @@ private:
|
||||
void FinishCBData();
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(std::size_t stage_index);
|
||||
void ProcessCBBind(size_t stage_index);
|
||||
|
||||
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
|
||||
void DrawArrays();
|
||||
|
@ -60,9 +60,6 @@ void MaxwellDMA::Launch() {
|
||||
return;
|
||||
}
|
||||
|
||||
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||
system.GPU().Maxwell3D().OnMemoryWrite();
|
||||
|
||||
if (is_src_pitch && is_dst_pitch) {
|
||||
CopyPitchToPitch();
|
||||
} else {
|
||||
|
@ -143,22 +143,26 @@ private:
|
||||
}
|
||||
|
||||
bool ShouldWait() const {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
|
||||
query_cache.ShouldWaitAsyncFlushes();
|
||||
}
|
||||
|
||||
bool ShouldFlush() const {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
|
||||
query_cache.HasUncommittedFlushes();
|
||||
}
|
||||
|
||||
void PopAsyncFlushes() {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.PopAsyncFlushes();
|
||||
buffer_cache.PopAsyncFlushes();
|
||||
query_cache.PopAsyncFlushes();
|
||||
}
|
||||
|
||||
void CommitAsyncFlushes() {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.CommitAsyncFlushes();
|
||||
buffer_cache.CommitAsyncFlushes();
|
||||
query_cache.CommitAsyncFlushes();
|
||||
|
@ -44,8 +44,8 @@ GPU::~GPU() = default;
|
||||
|
||||
void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
|
||||
renderer = std::move(renderer_);
|
||||
rasterizer = renderer->ReadRasterizer();
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
|
||||
memory_manager->BindRasterizer(rasterizer);
|
||||
maxwell_3d->BindRasterizer(rasterizer);
|
||||
fermi_2d->BindRasterizer(rasterizer);
|
||||
@ -171,7 +171,7 @@ void GPU::TickWork() {
|
||||
const std::size_t size = request.size;
|
||||
flush_requests.pop_front();
|
||||
flush_request_mutex.unlock();
|
||||
renderer->Rasterizer().FlushRegion(addr, size);
|
||||
rasterizer->FlushRegion(addr, size);
|
||||
current_flush_fence.store(fence);
|
||||
flush_request_mutex.lock();
|
||||
}
|
||||
@ -193,11 +193,11 @@ u64 GPU::GetTicks() const {
|
||||
}
|
||||
|
||||
void GPU::FlushCommands() {
|
||||
renderer->Rasterizer().FlushCommands();
|
||||
rasterizer->FlushCommands();
|
||||
}
|
||||
|
||||
void GPU::SyncGuestHost() {
|
||||
renderer->Rasterizer().SyncGuestHost();
|
||||
rasterizer->SyncGuestHost();
|
||||
}
|
||||
|
||||
enum class GpuSemaphoreOperation {
|
||||
|
@ -366,6 +366,7 @@ protected:
|
||||
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
|
||||
std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
|
||||
std::unique_ptr<VideoCore::RendererBase> renderer;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
const bool use_nvdec;
|
||||
|
||||
private:
|
||||
|
@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||
}
|
||||
|
||||
auto current_context = context.Acquire();
|
||||
VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
|
||||
|
||||
CommandDataContainer next;
|
||||
while (state.is_running) {
|
||||
@ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
||||
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
|
||||
renderer.Rasterizer().ReleaseFences();
|
||||
rasterizer->ReleaseFences();
|
||||
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
|
||||
system.GPU().TickWork();
|
||||
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
|
||||
rasterizer->FlushRegion(flush->addr, flush->size);
|
||||
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
|
||||
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
|
||||
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
||||
return;
|
||||
} else {
|
||||
@ -84,6 +85,7 @@ ThreadManager::~ThreadManager() {
|
||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||
Core::Frontend::GraphicsContext& context,
|
||||
Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
|
||||
rasterizer = renderer.ReadRasterizer();
|
||||
thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
|
||||
std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
|
||||
}
|
||||
@ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
|
||||
rasterizer->OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
||||
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
|
||||
rasterizer->OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::WaitIdle() const {
|
||||
|
@ -27,6 +27,7 @@ class System;
|
||||
} // namespace Core
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
@ -151,11 +152,12 @@ private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
u64 PushCommand(CommandData&& command_data);
|
||||
|
||||
SynchState state;
|
||||
Core::System& system;
|
||||
std::thread thread;
|
||||
std::thread::id thread_id;
|
||||
const bool is_async;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
SynchState state;
|
||||
std::thread thread;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
|
@ -12,7 +12,6 @@ set(SHADER_FILES
|
||||
vulkan_blit_depth_stencil.frag
|
||||
vulkan_present.frag
|
||||
vulkan_present.vert
|
||||
vulkan_quad_array.comp
|
||||
vulkan_quad_indexed.comp
|
||||
vulkan_uint8.comp
|
||||
)
|
||||
|
@ -1,28 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (local_size_x = 1024) in;
|
||||
|
||||
layout (std430, set = 0, binding = 0) buffer OutputBuffer {
|
||||
uint output_indexes[];
|
||||
};
|
||||
|
||||
layout (push_constant) uniform PushConstants {
|
||||
uint first;
|
||||
};
|
||||
|
||||
void main() {
|
||||
uint primitive = gl_GlobalInvocationID.x;
|
||||
if (primitive * 6 >= output_indexes.length()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
|
||||
for (uint vertex = 0; vertex < 6; ++vertex) {
|
||||
uint index = first + primitive * 4 + quad_map[vertex];
|
||||
output_indexes[primitive * 6 + vertex] = index;
|
||||
}
|
||||
}
|
@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
|
||||
uint16_t output_indexes[];
|
||||
};
|
||||
|
||||
uint AssembleIndex(uint id) {
|
||||
// Most primitive restart indices are 0xFF
|
||||
// Hardcode this to 0xFF for now
|
||||
uint index = uint(input_indexes[id]);
|
||||
return index == 0xFF ? 0xFFFF : index;
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint id = gl_GlobalInvocationID.x;
|
||||
if (id < input_indexes.length()) {
|
||||
output_indexes[id] = uint16_t(input_indexes[id]);
|
||||
output_indexes[id] = uint16_t(AssembleIndex(id));
|
||||
}
|
||||
}
|
||||
|
@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_)
|
||||
|
||||
MemoryManager::~MemoryManager() = default;
|
||||
|
||||
void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
rasterizer = &rasterizer_;
|
||||
void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
|
||||
|
@ -72,7 +72,7 @@ public:
|
||||
~MemoryManager();
|
||||
|
||||
/// Binds a renderer to the memory manager.
|
||||
void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
|
||||
|
||||
@ -157,6 +157,8 @@ private:
|
||||
|
||||
using MapRange = std::pair<GPUVAddr, size_t>;
|
||||
std::vector<MapRange> map_ranges;
|
||||
|
||||
std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/gpu.h"
|
||||
@ -49,6 +50,10 @@ public:
|
||||
/// Records a GPU query and caches it
|
||||
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
|
||||
|
||||
/// Signal an uniform buffer binding
|
||||
virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||
u32 size) = 0;
|
||||
|
||||
/// Signal a GPU based semaphore as a fence
|
||||
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
|
||||
|
||||
|
@ -37,15 +37,11 @@ public:
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context);
|
||||
virtual ~RendererBase();
|
||||
|
||||
/// Initialize the renderer
|
||||
[[nodiscard]] virtual bool Init() = 0;
|
||||
|
||||
/// Shutdown the renderer
|
||||
virtual void ShutDown() = 0;
|
||||
|
||||
/// Finalize rendering the guest frame and draw into the presentation texture
|
||||
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
|
||||
|
||||
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
|
||||
|
||||
// Getter/setter functions:
|
||||
// ------------------------
|
||||
|
||||
@ -57,14 +53,6 @@ public:
|
||||
return m_current_frame;
|
||||
}
|
||||
|
||||
[[nodiscard]] RasterizerInterface& Rasterizer() {
|
||||
return *rasterizer;
|
||||
}
|
||||
|
||||
[[nodiscard]] const RasterizerInterface& Rasterizer() const {
|
||||
return *rasterizer;
|
||||
}
|
||||
|
||||
[[nodiscard]] Core::Frontend::GraphicsContext& Context() {
|
||||
return *context;
|
||||
}
|
||||
@ -98,7 +86,6 @@ public:
|
||||
|
||||
protected:
|
||||
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
|
||||
std::unique_ptr<RasterizerInterface> rasterizer;
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context;
|
||||
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
|
||||
int m_current_frame = 0; ///< Current frame, should be set by the renderer
|
||||
|
@ -2,98 +2,208 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
GLsizei padding;
|
||||
};
|
||||
static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
constexpr std::array PROGRAM_LUT{
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
|
||||
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
|
||||
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
|
||||
buffer.Create();
|
||||
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
|
||||
|
||||
if (runtime.has_unified_vertex_buffers) {
|
||||
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
|
||||
}
|
||||
}
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
||||
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data_size), data);
|
||||
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
|
||||
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
|
||||
const GLintptr gl_offset = static_cast<GLintptr>(offset);
|
||||
if (read_buffer.handle == 0) {
|
||||
read_buffer.Create();
|
||||
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
|
||||
GL_STREAM_READ);
|
||||
}
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
|
||||
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
|
||||
void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
|
||||
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
|
||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size) {
|
||||
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
|
||||
}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, OGLStreamBuffer& stream_buffer_,
|
||||
StateTracker& state_tracker)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
|
||||
if (!device.HasFastBufferSubData()) {
|
||||
void Buffer::MakeResident(GLenum access) noexcept {
|
||||
// Abuse GLenum's order to exit early
|
||||
// GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
|
||||
if (access <= current_residency_access || buffer.handle == 0) {
|
||||
return;
|
||||
}
|
||||
if (std::exchange(current_residency_access, access) != GL_NONE) {
|
||||
// If the buffer is already resident, remove its residency before promoting it
|
||||
glMakeNamedBufferNonResidentNV(buffer.handle);
|
||||
}
|
||||
glMakeNamedBufferResidentNV(buffer.handle, access);
|
||||
}
|
||||
|
||||
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
||||
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
for (const GLuint cbuf : cbufs) {
|
||||
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
|
||||
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
|
||||
use_assembly_shaders{device.UseAssemblyShaders()},
|
||||
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
|
||||
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
|
||||
GLint gl_max_attributes;
|
||||
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
|
||||
max_attributes = static_cast<u32>(gl_max_attributes);
|
||||
for (auto& stage_uniforms : fast_uniforms) {
|
||||
for (OGLBuffer& buffer : stage_uniforms) {
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
for (auto& stage_uniforms : copy_uniforms) {
|
||||
for (OGLBuffer& buffer : stage_uniforms) {
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
}
|
||||
for (OGLBuffer& buffer : copy_compute_uniforms) {
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::~OGLBufferCache() {
|
||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(
|
||||
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
|
||||
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, cpu_addr, size);
|
||||
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
|
||||
index_buffer_offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
return {0, 0, 0};
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
|
||||
u32 stride) {
|
||||
if (index >= max_attributes) {
|
||||
return;
|
||||
}
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
|
||||
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizei>(stride));
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||
std::size_t size) {
|
||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
u32 offset, u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
GLuint handle;
|
||||
if (offset != 0) {
|
||||
handle = copy_uniforms[stage][binding_index].handle;
|
||||
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
|
||||
} else {
|
||||
handle = buffer.Handle();
|
||||
}
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||
return {cbuf, 0, 0};
|
||||
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||
u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
GLuint handle;
|
||||
if (offset != 0) {
|
||||
handle = copy_compute_uniforms[binding_index].handle;
|
||||
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
|
||||
} else {
|
||||
handle = buffer.Handle();
|
||||
}
|
||||
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
u32 offset, u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||
u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else if (size == 0) {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
|
||||
u32 size) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -5,79 +5,157 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/dynamic_library.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
class StateTracker;
|
||||
class BufferCacheRuntime;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
|
||||
public:
|
||||
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
|
||||
~Buffer();
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
|
||||
u64 size_bytes);
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
|
||||
|
||||
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
|
||||
void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
|
||||
|
||||
void Download(std::size_t offset, std::size_t data_size, u8* data);
|
||||
void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
|
||||
|
||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size);
|
||||
void MakeResident(GLenum access) noexcept;
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return gl_buffer.handle;
|
||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||
return address;
|
||||
}
|
||||
|
||||
u64 Address() const noexcept {
|
||||
return gpu_address;
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
OGLBuffer read_buffer;
|
||||
u64 gpu_address = 0;
|
||||
GLuint64EXT address = 0;
|
||||
OGLBuffer buffer;
|
||||
GLenum current_residency_access = GL_NONE;
|
||||
};
|
||||
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
class BufferCacheRuntime {
|
||||
friend Buffer;
|
||||
|
||||
public:
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, OGLStreamBuffer& stream_buffer,
|
||||
StateTracker& state_tracker);
|
||||
~OGLBufferCache();
|
||||
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||
explicit BufferCacheRuntime(const Device& device_);
|
||||
|
||||
void Acquire() noexcept {
|
||||
cbuf_cursor = 0;
|
||||
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
|
||||
|
||||
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
|
||||
bool is_written);
|
||||
|
||||
void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
|
||||
bool is_written);
|
||||
|
||||
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
const GLuint handle = fast_uniforms[stage][binding_index].handle;
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
||||
fast_uniforms[stage][binding_index].handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
|
||||
if (use_assembly_shaders) {
|
||||
glProgramBufferParametersIuivNV(
|
||||
PABO_LUT[stage], binding_index, 0,
|
||||
static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
|
||||
reinterpret_cast<const GLuint*>(data.data()));
|
||||
} else {
|
||||
glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
}
|
||||
|
||||
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
||||
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
|
||||
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
return mapped_span;
|
||||
}
|
||||
|
||||
[[nodiscard]] const GLvoid* IndexOffset() const noexcept {
|
||||
return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
|
||||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
|
||||
std::size_t cbuf_cursor = 0;
|
||||
std::array<GLuint, NUM_CBUFS> cbufs{};
|
||||
bool has_fast_buffer_sub_data = false;
|
||||
bool use_assembly_shaders = false;
|
||||
bool has_unified_vertex_buffers = false;
|
||||
|
||||
u32 max_attributes = 0;
|
||||
|
||||
std::optional<StreamBuffer> stream_buffer;
|
||||
|
||||
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||
VideoCommon::NUM_STAGES>
|
||||
fast_uniforms;
|
||||
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||
VideoCommon::NUM_STAGES>
|
||||
copy_uniforms;
|
||||
std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
|
||||
|
||||
u32 index_buffer_offset = 0;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
using Runtime = OpenGL::BufferCacheRuntime;
|
||||
using Buffer = OpenGL::Buffer;
|
||||
|
||||
static constexpr bool IS_OPENGL = true;
|
||||
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
||||
static constexpr bool USE_MEMORY_MAPS = false;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -21,9 +21,7 @@
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
// One uniform block is reserved for emulation purposes
|
||||
constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
@ -197,11 +195,13 @@ bool IsASTCSupported() {
|
||||
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device()
|
||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||
Device::Device() {
|
||||
if (!GLAD_GL_VERSION_4_6) {
|
||||
LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
|
||||
throw std::runtime_error{"Insufficient version"};
|
||||
}
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
const std::vector extensions = GetExtensions();
|
||||
@ -217,6 +217,9 @@ Device::Device()
|
||||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
max_uniform_buffers = BuildMaxUniformBuffers();
|
||||
base_bindings = BuildBaseBindings();
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
|
@ -10,11 +10,9 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
static constexpr u32 EmulationUniformBlockBinding = 0;
|
||||
|
||||
class Device final {
|
||||
class Device {
|
||||
public:
|
||||
struct BaseBindings final {
|
||||
struct BaseBindings {
|
||||
u32 uniform_buffer{};
|
||||
u32 shader_storage_buffer{};
|
||||
u32 sampler{};
|
||||
|
@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
|
||||
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::GPU& gpu_, TextureCache& texture_cache_,
|
||||
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
BufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
||||
|
@ -32,14 +32,13 @@ private:
|
||||
};
|
||||
|
||||
using Fence = std::shared_ptr<GLInnerFence>;
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
|
||||
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
|
||||
|
||||
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_);
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCache& texture_cache, BufferCache& buffer_cache,
|
||||
QueryCache& query_cache);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
|
@ -44,28 +44,14 @@ using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
|
||||
constexpr size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||
|
||||
struct TextureHandle {
|
||||
constexpr TextureHandle(u32 data, bool via_header_index) {
|
||||
@ -101,20 +87,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
|
||||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry) {
|
||||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
return Maxwell::MaxConstBufferSize;
|
||||
}
|
||||
|
||||
return buffer.size;
|
||||
}
|
||||
|
||||
/// Translates hardware transform feedback indices
|
||||
/// @param location Hardware location
|
||||
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||
@ -147,14 +119,6 @@ void oglEnable(GLenum cap, bool state) {
|
||||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
|
||||
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
|
||||
if (num_ssbos == 0) {
|
||||
return;
|
||||
}
|
||||
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
|
||||
reinterpret_cast<const GLuint*>(ssbos));
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::Buffer;
|
||||
@ -201,44 +165,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
|
||||
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
stream_buffer(device, state_tracker),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
query_cache(*this, maxwell3d, gpu_memory),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
async_shaders(emu_window_) {
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
for (const GLuint cbuf : staging_cbufs) {
|
||||
glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
|
||||
nullptr, 0);
|
||||
}
|
||||
}
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
async_shaders.AllocateWorkers();
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() {
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
}
|
||||
}
|
||||
RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
||||
void RasterizerOpenGL::SetupVertexFormat() {
|
||||
void RasterizerOpenGL::SyncVertexFormats() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexFormats]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::VertexFormats] = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
|
||||
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
|
||||
// the first 16 vertex attributes always, as we don't know which ones are actually used until
|
||||
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
|
||||
@ -274,55 +222,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexBuffer() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexBuffers]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::VertexBuffers] = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
const auto& regs = maxwell3d.regs;
|
||||
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
|
||||
if (!flags[Dirty::VertexBuffer0 + index]) {
|
||||
continue;
|
||||
}
|
||||
flags[Dirty::VertexBuffer0 + index] = false;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
if (!vertex_array.IsEnabled()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const GPUVAddr start = vertex_array.StartAddress();
|
||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||
ASSERT(end >= start);
|
||||
|
||||
const GLuint gl_index = static_cast<GLuint>(index);
|
||||
const u64 size = end - start;
|
||||
if (size == 0) {
|
||||
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||
if (use_unified_memory) {
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const auto info = buffer_cache.UploadMemory(start, size);
|
||||
if (use_unified_memory) {
|
||||
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
|
||||
info.address + info.offset, size);
|
||||
} else {
|
||||
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexInstances() {
|
||||
void RasterizerOpenGL::SyncVertexInstances() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexInstances]) {
|
||||
return;
|
||||
@ -343,17 +243,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
|
||||
}
|
||||
}
|
||||
|
||||
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const std::size_t size = CalculateIndexBufferSize();
|
||||
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
|
||||
return info.offset;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
void RasterizerOpenGL::SetupShaders(bool is_indexed) {
|
||||
u32 clip_distances = 0;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
|
||||
@ -410,11 +300,19 @@ void RasterizerOpenGL::SetupShaders() {
|
||||
const size_t stage = index == 0 ? 0 : index - 1;
|
||||
shaders[stage] = shader;
|
||||
|
||||
SetupDrawConstBuffers(stage, shader);
|
||||
SetupDrawGlobalMemory(stage, shader);
|
||||
SetupDrawTextures(shader, stage);
|
||||
SetupDrawImages(shader, stage);
|
||||
|
||||
buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
|
||||
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : shader->GetEntries().global_memory_entries) {
|
||||
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
|
||||
buffer.cbuf_offset, buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||
@ -430,43 +328,26 @@ void RasterizerOpenGL::SetupShaders() {
|
||||
SyncClipEnabled(clip_distances);
|
||||
maxwell3d.dirty.flags[Dirty::Shaders] = false;
|
||||
|
||||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader* const shader = shaders[stage];
|
||||
if (shader) {
|
||||
const auto base = device.GetBaseBindings(stage);
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
buffer_cache.BindHostStageBuffers(stage);
|
||||
const auto& base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
std::size_t size = 0;
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!regs.vertex_array[index].IsEnabled())
|
||||
continue;
|
||||
|
||||
const GPUVAddr start = regs.vertex_array[index].StartAddress();
|
||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||
|
||||
size += end - start;
|
||||
ASSERT(end >= start);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
|
||||
return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
|
||||
static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
|
||||
@ -475,6 +356,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Clears);
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
return;
|
||||
}
|
||||
@ -525,11 +407,9 @@ void RasterizerOpenGL::Clear() {
|
||||
}
|
||||
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
||||
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
}
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
@ -541,7 +421,6 @@ void RasterizerOpenGL::Clear() {
|
||||
} else if (use_stencil) {
|
||||
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
||||
}
|
||||
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
||||
@ -550,75 +429,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
SyncViewport();
|
||||
SyncRasterizeEnable();
|
||||
SyncPolygonModes();
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
SyncDepthTestState();
|
||||
SyncDepthClamp();
|
||||
SyncStencilTestState();
|
||||
SyncBlendState();
|
||||
SyncLogicOpState();
|
||||
SyncCullMode();
|
||||
SyncPrimitiveRestart();
|
||||
SyncScissorTest();
|
||||
SyncPointState();
|
||||
SyncLineState();
|
||||
SyncPolygonOffset();
|
||||
SyncAlphaTest();
|
||||
SyncFramebufferSRGB();
|
||||
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
std::size_t buffer_size = CalculateVertexArraysSize();
|
||||
|
||||
// Add space for index buffer
|
||||
if (is_indexed) {
|
||||
buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
|
||||
}
|
||||
|
||||
// Uniform space for the 5 shader stages
|
||||
buffer_size =
|
||||
Common::AlignUp<std::size_t>(buffer_size, 4) +
|
||||
(sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
|
||||
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size += Maxwell::MaxConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
// Prepare the vertex array.
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
SetupVertexFormat();
|
||||
|
||||
// Upload vertex and index data.
|
||||
SetupVertexBuffer();
|
||||
SetupVertexInstances();
|
||||
GLintptr index_buffer_offset = 0;
|
||||
if (is_indexed) {
|
||||
index_buffer_offset = SetupIndexBuffer();
|
||||
}
|
||||
|
||||
// Setup emulation uniform buffer.
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
MaxwellUniformData ubo;
|
||||
ubo.SetFromRegs(maxwell3d);
|
||||
const auto info =
|
||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
}
|
||||
SyncState();
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
SetupShaders();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
SetupShaders(is_indexed);
|
||||
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
buffer_cache.Unmap();
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
@ -632,7 +448,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
if (is_indexed) {
|
||||
const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
|
||||
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
|
||||
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
|
||||
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
|
||||
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
|
||||
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
|
||||
glDrawElements(primitive_mode, num_vertices, format, offset);
|
||||
@ -672,22 +488,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
BindComputeTextures(kernel);
|
||||
|
||||
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
SetupComputeGlobalMemory(kernel);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
const auto& entries = kernel->GetEntries();
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : entries.global_memory_entries) {
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
|
||||
buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||
@ -703,6 +519,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||
query_cache.Query(gpu_addr, type, timestamp);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||
u32 size) {
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
@ -711,19 +533,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.MustFlushRegion(addr, size);
|
||||
return buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.MustFlushRegion(addr, size);
|
||||
buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
@ -732,11 +558,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
@ -745,26 +574,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGuestHost() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
buffer_cache.SyncGuestHost();
|
||||
shader_cache.SyncGuestHost();
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.FlushCachedWrites();
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
@ -799,14 +637,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::WaitForIdle() {
|
||||
// Place a barrier on everything that is not framebuffer related.
|
||||
// This is related to another flag that is not currently implemented.
|
||||
glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
|
||||
GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
|
||||
GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
|
||||
GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
|
||||
GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
|
||||
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
|
||||
glMemoryBarrier(GL_ALL_BARRIER_BITS);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
@ -831,18 +662,21 @@ void RasterizerOpenGL::TickFrame() {
|
||||
num_queued_commands = 0;
|
||||
|
||||
fence_manager.TickFrame();
|
||||
buffer_cache.TickFrame();
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.TickFrame();
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.TickFrame();
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.BlitImage(dst, src, copy_config);
|
||||
return true;
|
||||
}
|
||||
@ -854,7 +688,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
|
||||
if (!image_view) {
|
||||
return false;
|
||||
@ -921,166 +755,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& stages = maxwell3d.state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
const auto& entries = shader->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
|
||||
|
||||
const auto base_bindings = device.GetBaseBindings(stage_index);
|
||||
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const u32 index = entry.GetIndex();
|
||||
const auto& buffer = shader_stage.const_buffers[index];
|
||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
|
||||
base_unified_offset + index * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
|
||||
entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
|
||||
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
const auto& entries = kernel->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||
Tegra::Engines::ConstBufferInfo buffer;
|
||||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
|
||||
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
|
||||
// UBO alignment requirements.
|
||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||
|
||||
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
|
||||
|
||||
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||
const GPUVAddr gpu_addr = buffer.address;
|
||||
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UNIMPLEMENTED_IF(use_unified);
|
||||
if (info.offset != 0) {
|
||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
|
||||
info.handle = staging_cbuf;
|
||||
info.offset = 0;
|
||||
}
|
||||
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (use_unified) {
|
||||
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
|
||||
unified_offset, size);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array TARGET_LUT = {
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (assembly_shaders) {
|
||||
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
|
||||
const size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
*ssbo = BindlessSSBO{
|
||||
.address = static_cast<GLuint64EXT>(info.address + info.offset),
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
@ -1128,6 +802,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncState() {
|
||||
SyncViewport();
|
||||
SyncRasterizeEnable();
|
||||
SyncPolygonModes();
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
SyncDepthTestState();
|
||||
SyncDepthClamp();
|
||||
SyncStencilTestState();
|
||||
SyncBlendState();
|
||||
SyncLogicOpState();
|
||||
SyncCullMode();
|
||||
SyncPrimitiveRestart();
|
||||
SyncScissorTest();
|
||||
SyncPointState();
|
||||
SyncLineState();
|
||||
SyncPolygonOffset();
|
||||
SyncAlphaTest();
|
||||
SyncFramebufferSRGB();
|
||||
SyncVertexFormats();
|
||||
SyncVertexInstances();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
@ -1163,9 +861,11 @@ void RasterizerOpenGL::SyncViewport() {
|
||||
if (regs.screen_y_control.y_negate != 0) {
|
||||
flip_y = !flip_y;
|
||||
}
|
||||
glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
|
||||
regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
|
||||
: GL_NEGATIVE_ONE_TO_ONE);
|
||||
const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
|
||||
const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
|
||||
const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
|
||||
state_tracker.ClipControl(origin, depth);
|
||||
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
|
||||
}
|
||||
|
||||
if (dirty_viewport) {
|
||||
@ -1649,36 +1349,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
SyncTransformFeedback();
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||
const auto& binding = regs.tfb_bindings[index];
|
||||
if (!binding.buffer_enable) {
|
||||
if (enabled_transform_feedback_buffers[index]) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
|
||||
0);
|
||||
}
|
||||
enabled_transform_feedback_buffers[index] = false;
|
||||
continue;
|
||||
}
|
||||
enabled_transform_feedback_buffers[index] = true;
|
||||
|
||||
auto& tfb_buffer = transform_feedback_buffers[index];
|
||||
tfb_buffer.Create();
|
||||
|
||||
const GLuint handle = tfb_buffer.handle;
|
||||
const std::size_t size = binding.buffer_size;
|
||||
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
|
||||
|
||||
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||
@ -1692,23 +1369,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
glEndTransformFeedback();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||
const auto& binding = regs.tfb_bindings[index];
|
||||
if (!binding.buffer_enable) {
|
||||
continue;
|
||||
}
|
||||
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||
|
||||
const GLuint handle = transform_feedback_buffers[index].handle;
|
||||
const GPUVAddr gpu_addr = binding.Address();
|
||||
const std::size_t size = binding.buffer_size;
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -30,7 +30,6 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
@ -72,6 +71,7 @@ public:
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
@ -119,27 +119,6 @@ private:
|
||||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(Shader* kernel);
|
||||
|
||||
/// Configures a global memory buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
@ -152,6 +131,9 @@ private:
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs state to match guest's
|
||||
void SyncState();
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport();
|
||||
|
||||
@ -215,6 +197,12 @@ private:
|
||||
/// Syncs the framebuffer sRGB state to match the guest state
|
||||
void SyncFramebufferSRGB();
|
||||
|
||||
/// Syncs vertex formats to match the guest state
|
||||
void SyncVertexFormats();
|
||||
|
||||
/// Syncs vertex instances to match the guest state
|
||||
void SyncVertexInstances();
|
||||
|
||||
/// Syncs transform feedback state to match guest state
|
||||
/// @note Only valid on assembly shaders
|
||||
void SyncTransformFeedback();
|
||||
@ -225,19 +213,7 @@ private:
|
||||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
|
||||
/// Updates the current vertex format
|
||||
void SetupVertexFormat();
|
||||
|
||||
void SetupVertexBuffer();
|
||||
void SetupVertexInstances();
|
||||
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
void SetupShaders();
|
||||
void SetupShaders(bool is_indexed);
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
@ -249,12 +225,12 @@ private:
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLStreamBuffer stream_buffer;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
QueryCache query_cache;
|
||||
OGLBufferCache buffer_cache;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
@ -262,20 +238,8 @@ private:
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles;
|
||||
std::array<GLuint, MAX_IMAGES> image_handles;
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
transform_feedback_buffers;
|
||||
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
enabled_transform_feedback_buffers;
|
||||
|
||||
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles{};
|
||||
std::array<GLuint, MAX_IMAGES> image_handles{};
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
@ -171,12 +171,6 @@ void OGLBuffer::Release() {
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
|
||||
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
|
||||
|
||||
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
void OGLSync::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
@ -234,9 +234,6 @@ public:
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
// Converts the buffer into a stream copy buffer with a fixed size
|
||||
void MakeStreamCopy(std::size_t buffer_size);
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
|
@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
|
||||
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
|
||||
|
||||
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
|
||||
constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
|
||||
#define ftou floatBitsToUint
|
||||
#define itof intBitsToFloat
|
||||
#define utof uintBitsToFloat
|
||||
@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
|
||||
|
||||
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
|
||||
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
|
||||
|
||||
layout (std140, binding = {}) uniform vs_config {{
|
||||
float y_direction;
|
||||
}};
|
||||
)";
|
||||
|
||||
class ShaderWriter final {
|
||||
@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||
}
|
||||
|
||||
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
|
||||
// We waste one UBO for emulation
|
||||
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
|
||||
return num_ubos > num_available_ubos;
|
||||
}
|
||||
|
||||
struct GenericVaryingDescription {
|
||||
std::string name;
|
||||
u8 first_element = 0;
|
||||
@ -420,9 +409,8 @@ public:
|
||||
explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
|
||||
ShaderType stage_, std::string_view identifier_,
|
||||
std::string_view suffix_)
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
|
||||
suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
|
||||
UseUnifiedUniforms(device_, ir_, stage_)} {
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
|
||||
identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
|
||||
if (stage != ShaderType::Compute) {
|
||||
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||
}
|
||||
@ -516,7 +504,8 @@ private:
|
||||
if (!identifier.empty()) {
|
||||
code.AddLine("// {}", identifier);
|
||||
}
|
||||
code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
|
||||
const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
|
||||
code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
|
||||
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
|
||||
if (device.HasShaderBallot()) {
|
||||
code.AddLine("#extension GL_ARB_shader_ballot : require");
|
||||
@ -542,7 +531,7 @@ private:
|
||||
|
||||
code.AddNewLine();
|
||||
|
||||
code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
|
||||
code.AddLine(COMMON_DECLARATIONS);
|
||||
}
|
||||
|
||||
void DeclareVertex() {
|
||||
@ -865,17 +854,6 @@ private:
|
||||
}
|
||||
|
||||
void DeclareConstantBuffers() {
|
||||
if (use_unified_uniforms) {
|
||||
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
|
||||
static_cast<u32>(ir.GetGlobalMemory().size());
|
||||
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
|
||||
binding);
|
||||
code.AddLine(" uint cbufs[];");
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
return;
|
||||
}
|
||||
|
||||
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
for (const auto& [index, info] : ir.GetConstantBuffers()) {
|
||||
const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
|
||||
@ -1081,29 +1059,17 @@ private:
|
||||
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
|
||||
const Node offset = cbuf->GetOffset();
|
||||
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
|
||||
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
// Direct access
|
||||
const u32 offset_imm = immediate->GetValue();
|
||||
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
|
||||
Type::Uint};
|
||||
} else {
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
Type::Uint};
|
||||
}
|
||||
}
|
||||
|
||||
// Indirect access
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
|
||||
Visit(offset).AsUint()),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||
|
||||
@ -2293,7 +2259,6 @@ private:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (header.ps.omap.depth) {
|
||||
// The depth output is always 2 registers after the last color output, and current_reg
|
||||
// already contains one past the last color register.
|
||||
@ -2337,7 +2302,8 @@ private:
|
||||
}
|
||||
|
||||
Expression YNegate(Operation operation) {
|
||||
return {"y_direction", Type::Float};
|
||||
// Y_NEGATE is mapped to this uniform value
|
||||
return {"gl_FrontMaterial.ambient.a", Type::Float};
|
||||
}
|
||||
|
||||
template <u32 element>
|
||||
@ -2787,7 +2753,6 @@ private:
|
||||
const std::string_view identifier;
|
||||
const std::string_view suffix;
|
||||
const Header header;
|
||||
const bool use_unified_uniforms;
|
||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||
|
||||
ShaderWriter code;
|
||||
@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
|
||||
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
|
||||
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
|
||||
}
|
||||
for (const auto& buffer : entries.const_buffers) {
|
||||
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
|
||||
}
|
||||
entries.shader_length = ir.GetLength();
|
||||
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ struct ShaderEntries {
|
||||
std::vector<ImageEntry> images;
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
bool use_unified_uniforms{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
|
@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
|
||||
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
|
||||
}
|
||||
|
||||
void SetupDirtyVertexArrays(Tables& tables) {
|
||||
static constexpr std::size_t num_array = 3;
|
||||
void SetupDirtyVertexInstances(Tables& tables) {
|
||||
static constexpr std::size_t instance_base_offset = 3;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
|
||||
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
|
||||
|
||||
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
|
||||
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
|
||||
|
||||
const std::size_t instance_array_offset = array_offset + instance_base_offset;
|
||||
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
|
||||
tables[1][instance_array_offset] = VertexInstances;
|
||||
@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
|
||||
auto& dirty = gpu.Maxwell3D().dirty;
|
||||
auto& tables = dirty.tables;
|
||||
SetupDirtyRenderTargets(tables);
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyColorMasks(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyVertexArrays(tables);
|
||||
SetupDirtyVertexInstances(tables);
|
||||
SetupDirtyVertexFormat(tables);
|
||||
SetupDirtyShaders(tables);
|
||||
SetupDirtyPolygonModes(tables);
|
||||
@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
|
||||
SetupDirtyClipControl(tables);
|
||||
SetupDirtyDepthClampEnabled(tables);
|
||||
SetupDirtyMisc(tables);
|
||||
|
||||
auto& store = dirty.on_write_stores;
|
||||
store[VertexBuffers] = true;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
store[VertexBuffer0 + i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateStreamBuffer() {
|
||||
flags[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -28,10 +28,6 @@ enum : u8 {
|
||||
VertexFormat0,
|
||||
VertexFormat31 = VertexFormat0 + 31,
|
||||
|
||||
VertexBuffers,
|
||||
VertexBuffer0,
|
||||
VertexBuffer31 = VertexBuffer0 + 31,
|
||||
|
||||
VertexInstances,
|
||||
VertexInstance0,
|
||||
VertexInstance31 = VertexInstance0 + 31,
|
||||
@ -92,8 +88,6 @@ class StateTracker {
|
||||
public:
|
||||
explicit StateTracker(Tegra::GPU& gpu);
|
||||
|
||||
void InvalidateStreamBuffer();
|
||||
|
||||
void BindIndexBuffer(GLuint new_index_buffer) {
|
||||
if (index_buffer == new_index_buffer) {
|
||||
return;
|
||||
@ -110,13 +104,32 @@ public:
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
|
||||
}
|
||||
|
||||
void ClipControl(GLenum new_origin, GLenum new_depth) {
|
||||
if (new_origin == origin && new_depth == depth) {
|
||||
return;
|
||||
}
|
||||
origin = new_origin;
|
||||
depth = new_depth;
|
||||
glClipControl(origin, depth);
|
||||
}
|
||||
|
||||
void SetYNegate(bool new_y_negate) {
|
||||
if (new_y_negate == y_negate) {
|
||||
return;
|
||||
}
|
||||
// Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
|
||||
y_negate = new_y_negate;
|
||||
const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
|
||||
glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
|
||||
}
|
||||
|
||||
void NotifyScreenDrawVertexArray() {
|
||||
flags[OpenGL::Dirty::VertexFormats] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexBuffers] = true;
|
||||
flags[OpenGL::Dirty::VertexBuffer0] = true;
|
||||
flags[VideoCommon::Dirty::VertexBuffers] = true;
|
||||
flags[VideoCommon::Dirty::VertexBuffer0] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexInstances] = true;
|
||||
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
|
||||
@ -202,6 +215,9 @@ private:
|
||||
|
||||
GLuint framebuffer = 0;
|
||||
GLuint index_buffer = 0;
|
||||
GLenum origin = GL_LOWER_LEFT;
|
||||
GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
|
||||
bool y_negate = false;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -1,70 +1,64 @@
|
||||
// Copyright 2018 Citra Emulator Project
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
MP_RGB(128, 128, 192));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
|
||||
: state_tracker{state_tracker_} {
|
||||
gl_buffer.Create();
|
||||
|
||||
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
|
||||
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
StreamBuffer::StreamBuffer() {
|
||||
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
||||
buffer.Create();
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
|
||||
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
|
||||
mapped_pointer =
|
||||
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
|
||||
for (OGLSync& sync : fences) {
|
||||
sync.Create();
|
||||
}
|
||||
}
|
||||
|
||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
glUnmapNamedBuffer(gl_buffer.handle);
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= BUFFER_SIZE);
|
||||
ASSERT(alignment <= BUFFER_SIZE);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
|
||||
ASSERT(size < REGION_SIZE);
|
||||
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
|
||||
++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = iterator;
|
||||
|
||||
if (buffer_pos + size > BUFFER_SIZE) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
glInvalidateBufferData(gl_buffer.handle);
|
||||
state_tracker.InvalidateStreamBuffer();
|
||||
|
||||
buffer_pos = 0;
|
||||
for (size_t region = Region(free_iterator) + 1,
|
||||
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
|
||||
region < region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
|
||||
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
|
||||
if (size > 0) {
|
||||
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
|
||||
if (iterator + size > free_iterator) {
|
||||
free_iterator = iterator + size;
|
||||
}
|
||||
if (iterator + size > STREAM_BUFFER_SIZE) {
|
||||
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
buffer_pos += size;
|
||||
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return {std::span(mapped_pointer + offset, size), offset};
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -1,9 +1,12 @@
|
||||
// Copyright 2018 Citra Emulator Project
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
@ -13,48 +16,35 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
class StreamBuffer {
|
||||
static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
|
||||
static constexpr size_t MAX_ALIGNMENT = 256;
|
||||
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
|
||||
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
|
||||
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
|
||||
~OGLStreamBuffer();
|
||||
explicit StreamBuffer();
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, and the offset within the buffer.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
u64 Address() const {
|
||||
return gpu_address;
|
||||
}
|
||||
|
||||
GLsizeiptr Size() const noexcept {
|
||||
return BUFFER_SIZE;
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
[[nodiscard]] static size_t Region(size_t offset) noexcept {
|
||||
return offset / REGION_SIZE;
|
||||
}
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
GLuint64EXT gpu_address = 0;
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
size_t iterator = 0;
|
||||
size_t used_iterator = 0;
|
||||
size_t free_iterator = 0;
|
||||
u8* mapped_pointer = nullptr;
|
||||
OGLBuffer buffer;
|
||||
std::array<OGLSync, NUM_SYNCS> fences;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
|
||||
: span(map, size), sync{sync_}, handle{handle_} {}
|
||||
|
||||
ImageBufferMap::~ImageBufferMap() {
|
||||
if (sync) {
|
||||
sync->Create();
|
||||
@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
|
||||
glFinish();
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
|
||||
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return upload_buffers.RequestMap(size, true);
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
|
||||
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return download_buffers.RequestMap(size, false);
|
||||
}
|
||||
|
||||
@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
|
||||
size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
switch (image.info.type) {
|
||||
case ImageType::e2D:
|
||||
return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
|
||||
return util_shaders.BlockLinearUpload2D(image, map, swizzles);
|
||||
case ImageType::e3D:
|
||||
return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
|
||||
return util_shaders.BlockLinearUpload3D(image, map, swizzles);
|
||||
case ImageType::Linear:
|
||||
return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
|
||||
return util_shaders.PitchUpload(image, map, swizzles);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
|
||||
bool insert_fence) {
|
||||
const size_t index = RequestBuffer(requested_size);
|
||||
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
|
||||
return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
|
||||
return ImageBufferMap{
|
||||
.mapped_span = std::span(maps[index], requested_size),
|
||||
.sync = sync,
|
||||
.buffer = buffers[index].handle,
|
||||
};
|
||||
}
|
||||
|
||||
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
|
||||
@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
||||
}
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
|
||||
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
|
||||
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
|
||||
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
|
||||
@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
current_image_height = copy.buffer_image_height;
|
||||
glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
|
||||
}
|
||||
CopyBufferToImage(copy, buffer_offset);
|
||||
CopyBufferToImage(copy, map.offset);
|
||||
}
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
|
||||
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
|
||||
copy.dst_offset, copy.size);
|
||||
}
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::DownloadMemory(ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||
|
||||
u32 current_row_length = std::numeric_limits<u32>::max();
|
||||
@ -759,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
||||
current_image_height = copy.buffer_image_height;
|
||||
glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
|
||||
}
|
||||
CopyImageToBuffer(copy, buffer_offset);
|
||||
CopyImageToBuffer(copy, map.offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -31,23 +31,13 @@ using VideoCommon::NUM_RT;
|
||||
using VideoCommon::Offset2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
|
||||
class ImageBufferMap {
|
||||
public:
|
||||
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
|
||||
struct ImageBufferMap {
|
||||
~ImageBufferMap();
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
private:
|
||||
std::span<u8> span;
|
||||
std::span<u8> mapped_span;
|
||||
size_t offset = 0;
|
||||
OGLSync* sync;
|
||||
GLuint handle;
|
||||
GLuint buffer;
|
||||
};
|
||||
|
||||
struct FormatProperties {
|
||||
@ -69,9 +59,9 @@ public:
|
||||
|
||||
void Finish();
|
||||
|
||||
ImageBufferMap MapUploadBuffer(size_t size);
|
||||
ImageBufferMap UploadStagingBuffer(size_t size);
|
||||
|
||||
ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
ImageBufferMap DownloadStagingBuffer(size_t size);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
@ -89,7 +79,7 @@ public:
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void InsertUploadMemoryBarrier();
|
||||
@ -148,14 +138,12 @@ public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return texture.handle;
|
||||
|
@ -29,9 +29,7 @@
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr GLint PositionLocation = 0;
|
||||
constexpr GLint TexCoordLocation = 1;
|
||||
constexpr GLint ModelViewMatrixLocation = 0;
|
||||
@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
|
||||
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
|
||||
program_manager{device},
|
||||
rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
AddTelemetryFields();
|
||||
InitOpenGLObjects();
|
||||
}
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
||||
@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
|
||||
++m_current_frame;
|
||||
|
||||
rasterizer->TickFrame();
|
||||
rasterizer.TickFrame();
|
||||
|
||||
context->SwapBuffers();
|
||||
render_window.OnFrameDisplayed();
|
||||
@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
|
||||
framebuffer_crop_rect = framebuffer.crop_rect;
|
||||
|
||||
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
|
||||
if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
|
||||
if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
|
||||
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() {
|
||||
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
|
||||
}
|
||||
|
||||
void RendererOpenGL::CreateRasterizer() {
|
||||
if (rasterizer) {
|
||||
return;
|
||||
}
|
||||
rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
|
||||
screen_info, program_manager, state_tracker);
|
||||
}
|
||||
|
||||
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer) {
|
||||
texture.width = framebuffer.width;
|
||||
@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||
|
||||
program_manager.BindHostPipeline(pipeline.handle);
|
||||
|
||||
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glEnable(GL_CULL_FACE);
|
||||
if (screen_info.display_srgb) {
|
||||
glEnable(GL_FRAMEBUFFER_SRGB);
|
||||
@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||
glCullFace(GL_BACK);
|
||||
glFrontFace(GL_CW);
|
||||
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
|
||||
static_cast<GLfloat>(layout.height));
|
||||
glDepthRangeIndexed(0, 0.0, 0.0);
|
||||
@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() {
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
bool RendererOpenGL::Init() {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
|
||||
AddTelemetryFields();
|
||||
|
||||
if (!GLAD_GL_VERSION_4_6) {
|
||||
return false;
|
||||
}
|
||||
|
||||
InitOpenGLObjects();
|
||||
CreateRasterizer();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RendererOpenGL::ShutDown() {}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
@ -63,18 +64,18 @@ public:
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
|
||||
~RendererOpenGL() override;
|
||||
|
||||
bool Init() override;
|
||||
void ShutDown() override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Initializes the OpenGL state and creates persistent objects.
|
||||
void InitOpenGLObjects();
|
||||
|
||||
void AddTelemetryFields();
|
||||
|
||||
void CreateRasterizer();
|
||||
|
||||
void ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer);
|
||||
|
||||
@ -98,8 +99,10 @@ private:
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
const Device device;
|
||||
StateTracker state_tracker{gpu};
|
||||
Device device;
|
||||
StateTracker state_tracker;
|
||||
ProgramManager program_manager;
|
||||
RasterizerOpenGL rasterizer;
|
||||
|
||||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
@ -115,9 +118,6 @@ private:
|
||||
/// Display information for Switch screen
|
||||
ScreenInfo screen_info;
|
||||
|
||||
/// Global dummy shader pipeline
|
||||
ProgramManager program_manager;
|
||||
|
||||
/// OpenGL framebuffer data
|
||||
std::vector<u8> gl_framebuffer_data;
|
||||
|
||||
|
@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||
|
||||
UtilShaders::~UtilShaders() = default;
|
||||
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
||||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||
@ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
|
||||
|
||||
@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glUniform1ui(8, params.block_depth);
|
||||
glUniform1ui(9, params.block_depth_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
@ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
|
||||
@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
|
||||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
||||
@ -167,13 +167,13 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
|
@ -15,21 +15,22 @@
|
||||
namespace OpenGL {
|
||||
|
||||
class Image;
|
||||
class ImageBufferMap;
|
||||
class ProgramManager;
|
||||
|
||||
struct ImageBufferMap;
|
||||
|
||||
class UtilShaders {
|
||||
public:
|
||||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
~UtilShaders();
|
||||
|
||||
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
void PitchUpload(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void CopyBC4(Image& dst_image, Image& src_image,
|
||||
|
@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
|
||||
return {};
|
||||
}
|
||||
|
||||
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
|
||||
VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
switch (index_format) {
|
||||
case Maxwell::IndexFormat::UnsignedByte:
|
||||
if (!device.IsExtIndexTypeUint8Supported()) {
|
||||
UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
}
|
||||
return VK_INDEX_TYPE_UINT8_EXT;
|
||||
case Maxwell::IndexFormat::UnsignedShort:
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
|
@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
|
||||
|
||||
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
|
||||
|
||||
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
|
||||
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
|
||||
|
||||
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
|
||||
|
||||
|
@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
|
||||
return separated_extensions;
|
||||
}
|
||||
|
||||
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
|
||||
VkSurfaceKHR surface) {
|
||||
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
|
||||
const s32 device_index = Settings::values.vulkan_device.GetValue();
|
||||
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
|
||||
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
|
||||
}
|
||||
const vk::PhysicalDevice physical_device(devices[device_index], dld);
|
||||
return Device(*instance, physical_device, surface, dld);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
|
||||
: RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
|
||||
cpu_memory{cpu_memory_}, gpu{gpu_} {}
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
|
||||
: RendererBase(emu_window, std::move(context_)),
|
||||
telemetry_session(telemetry_session_),
|
||||
cpu_memory(cpu_memory_),
|
||||
gpu(gpu_),
|
||||
library(OpenLibrary()),
|
||||
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
|
||||
true, Settings::values.renderer_debug)),
|
||||
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
|
||||
surface(CreateSurface(instance, render_window)),
|
||||
device(CreateDevice(instance, dld, *surface)),
|
||||
memory_allocator(device, false),
|
||||
state_tracker(gpu),
|
||||
scheduler(device, state_tracker),
|
||||
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
|
||||
render_window.GetFramebufferLayout().height, false),
|
||||
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
|
||||
screen_info),
|
||||
rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
|
||||
memory_allocator, state_tracker, scheduler) {
|
||||
Report();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
|
||||
}
|
||||
|
||||
RendererVulkan::~RendererVulkan() {
|
||||
ShutDown();
|
||||
void(device.GetLogical().WaitIdle());
|
||||
}
|
||||
|
||||
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
|
||||
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
|
||||
const bool use_accelerated =
|
||||
rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||
if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
|
||||
swapchain->Create(layout.width, layout.height, is_srgb);
|
||||
blit_screen->Recreate();
|
||||
if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
|
||||
swapchain.Create(layout.width, layout.height, is_srgb);
|
||||
blit_screen.Recreate();
|
||||
}
|
||||
|
||||
scheduler->WaitWorker();
|
||||
scheduler.WaitWorker();
|
||||
|
||||
swapchain->AcquireNextImage();
|
||||
const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
|
||||
swapchain.AcquireNextImage();
|
||||
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
|
||||
|
||||
scheduler->Flush(render_semaphore);
|
||||
scheduler.Flush(render_semaphore);
|
||||
|
||||
if (swapchain->Present(render_semaphore)) {
|
||||
blit_screen->Recreate();
|
||||
if (swapchain.Present(render_semaphore)) {
|
||||
blit_screen.Recreate();
|
||||
}
|
||||
|
||||
rasterizer->TickFrame();
|
||||
rasterizer.TickFrame();
|
||||
}
|
||||
|
||||
render_window.OnFrameDisplayed();
|
||||
}
|
||||
|
||||
bool RendererVulkan::Init() try {
|
||||
library = OpenLibrary();
|
||||
instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
|
||||
true, Settings::values.renderer_debug);
|
||||
if (Settings::values.renderer_debug) {
|
||||
debug_callback = CreateDebugCallback(instance);
|
||||
}
|
||||
surface = CreateSurface(instance, render_window);
|
||||
|
||||
InitializeDevice();
|
||||
Report();
|
||||
|
||||
memory_allocator = std::make_unique<MemoryAllocator>(*device);
|
||||
|
||||
state_tracker = std::make_unique<StateTracker>(gpu);
|
||||
|
||||
scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
|
||||
|
||||
const auto& framebuffer = render_window.GetFramebufferLayout();
|
||||
swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
|
||||
swapchain->Create(framebuffer.width, framebuffer.height, false);
|
||||
|
||||
rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
|
||||
cpu_memory, screen_info, *device,
|
||||
*memory_allocator, *state_tracker, *scheduler);
|
||||
|
||||
blit_screen =
|
||||
std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
|
||||
*memory_allocator, *swapchain, *scheduler, screen_info);
|
||||
return true;
|
||||
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
return false;
|
||||
}
|
||||
|
||||
void RendererVulkan::ShutDown() {
|
||||
if (!device) {
|
||||
return;
|
||||
}
|
||||
if (const auto& dev = device->GetLogical()) {
|
||||
dev.WaitIdle();
|
||||
}
|
||||
rasterizer.reset();
|
||||
blit_screen.reset();
|
||||
scheduler.reset();
|
||||
swapchain.reset();
|
||||
memory_allocator.reset();
|
||||
device.reset();
|
||||
}
|
||||
|
||||
void RendererVulkan::InitializeDevice() {
|
||||
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
|
||||
const s32 device_index = Settings::values.vulkan_device.GetValue();
|
||||
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
|
||||
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
|
||||
}
|
||||
const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
|
||||
device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
|
||||
}
|
||||
|
||||
void RendererVulkan::Report() const {
|
||||
const std::string vendor_name{device->GetVendorName()};
|
||||
const std::string model_name{device->GetModelName()};
|
||||
const std::string driver_version = GetDriverVersion(*device);
|
||||
const std::string vendor_name{device.GetVendorName()};
|
||||
const std::string model_name{device.GetModelName()};
|
||||
const std::string driver_version = GetDriverVersion(device);
|
||||
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
|
||||
|
||||
const std::string api_version = GetReadableVersion(device->ApiVersion());
|
||||
const std::string api_version = GetReadableVersion(device.ApiVersion());
|
||||
|
||||
const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
|
||||
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
|
||||
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
|
||||
@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||
}
|
||||
|
||||
std::vector<std::string> RendererVulkan::EnumerateDevices() try {
|
||||
vk::InstanceDispatch dld;
|
||||
const Common::DynamicLibrary library = OpenLibrary();
|
||||
const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
|
||||
const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
|
||||
std::vector<std::string> names;
|
||||
names.reserve(physical_devices.size());
|
||||
for (const VkPhysicalDevice device : physical_devices) {
|
||||
names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
|
||||
}
|
||||
return names;
|
||||
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -9,8 +9,14 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/dynamic_library.h"
|
||||
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
@ -27,20 +33,6 @@ class GPU;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
class MemoryAllocator;
|
||||
class VKBlitScreen;
|
||||
class VKSwapchain;
|
||||
class VKScheduler;
|
||||
|
||||
struct VKScreenInfo {
|
||||
VkImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
bool is_srgb{};
|
||||
};
|
||||
|
||||
class RendererVulkan final : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
|
||||
@ -49,15 +41,13 @@ public:
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
|
||||
~RendererVulkan() override;
|
||||
|
||||
bool Init() override;
|
||||
void ShutDown() override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
|
||||
static std::vector<std::string> EnumerateDevices();
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
|
||||
private:
|
||||
void InitializeDevice();
|
||||
|
||||
void Report() const;
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
@ -68,18 +58,18 @@ private:
|
||||
vk::InstanceDispatch dld;
|
||||
|
||||
vk::Instance instance;
|
||||
|
||||
vk::DebugUtilsMessenger debug_callback;
|
||||
vk::SurfaceKHR surface;
|
||||
|
||||
VKScreenInfo screen_info;
|
||||
|
||||
vk::DebugUtilsMessenger debug_callback;
|
||||
std::unique_ptr<Device> device;
|
||||
std::unique_ptr<MemoryAllocator> memory_allocator;
|
||||
std::unique_ptr<StateTracker> state_tracker;
|
||||
std::unique_ptr<VKScheduler> scheduler;
|
||||
std::unique_ptr<VKSwapchain> swapchain;
|
||||
std::unique_ptr<VKBlitScreen> blit_screen;
|
||||
Device device;
|
||||
MemoryAllocator memory_allocator;
|
||||
StateTracker state_tracker;
|
||||
VKScheduler scheduler;
|
||||
VKSwapchain swapchain;
|
||||
VKBlitScreen blit_screen;
|
||||
RasterizerVulkan rasterizer;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
|
||||
};
|
||||
|
||||
VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
|
||||
Core::Frontend::EmuWindow& render_window_,
|
||||
VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Core::Frontend::EmuWindow& render_window_, const Device& device_,
|
||||
MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
|
||||
VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
|
||||
: cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
|
||||
device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_},
|
||||
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
|
||||
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
|
||||
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
|
||||
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
|
||||
resource_ticks.resize(image_count);
|
||||
|
||||
CreateStaticResources();
|
||||
@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||
SetUniformData(data, framebuffer);
|
||||
SetVertexData(data, framebuffer);
|
||||
|
||||
const std::span<u8> map = buffer_commit.Map();
|
||||
std::memcpy(map.data(), &data, sizeof(data));
|
||||
const std::span<u8> mapped_span = buffer_commit.Map();
|
||||
std::memcpy(mapped_span.data(), &data, sizeof(data));
|
||||
|
||||
if (!use_accelerated) {
|
||||
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
|
||||
@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
|
||||
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
|
||||
const size_t size_bytes = GetSizeInBytes(framebuffer);
|
||||
rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
|
||||
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
|
||||
framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = image_offset,
|
||||
@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||
cmdbuf.Draw(4, 1, 0, 0);
|
||||
cmdbuf.EndRenderPass();
|
||||
});
|
||||
|
||||
return *semaphores[image_index];
|
||||
}
|
||||
|
||||
|
@ -38,12 +38,18 @@ class RasterizerVulkan;
|
||||
class VKScheduler;
|
||||
class VKSwapchain;
|
||||
|
||||
class VKBlitScreen final {
|
||||
struct VKScreenInfo {
|
||||
VkImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
bool is_srgb{};
|
||||
};
|
||||
|
||||
class VKBlitScreen {
|
||||
public:
|
||||
explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
|
||||
Core::Frontend::EmuWindow& render_window,
|
||||
VideoCore::RasterizerInterface& rasterizer, const Device& device,
|
||||
MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
|
||||
Core::Frontend::EmuWindow& render_window, const Device& device,
|
||||
MemoryAllocator& memory_manager, VKSwapchain& swapchain,
|
||||
VKScheduler& scheduler, const VKScreenInfo& screen_info);
|
||||
~VKBlitScreen();
|
||||
|
||||
@ -84,7 +90,6 @@ private:
|
||||
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Core::Frontend::EmuWindow& render_window;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
VKSwapchain& swapchain;
|
||||
|
@ -3,188 +3,308 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "core/core.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
|
||||
return VkBufferCopy{
|
||||
.srcOffset = copy.src_offset,
|
||||
.dstOffset = copy.dst_offset,
|
||||
.size = copy.size,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr VkBufferUsageFlags BUFFER_USAGE =
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
|
||||
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
|
||||
return VK_INDEX_TYPE_UINT8_EXT;
|
||||
}
|
||||
if (num_elements <= 0xffff) {
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
}
|
||||
return VK_INDEX_TYPE_UINT32;
|
||||
}
|
||||
|
||||
constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
|
||||
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
|
||||
constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
|
||||
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
|
||||
|
||||
constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
|
||||
size_t BytesPerIndex(VkIndexType index_type) {
|
||||
switch (index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
return 1;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
return 2;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
return 4;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid index type={}", index_type);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
|
||||
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
|
||||
std::ranges::transform(indices, indices.begin(),
|
||||
[quad, first](u32 index) { return first + index + quad * 4; });
|
||||
return indices;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
|
||||
StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
|
||||
staging_pool_} {
|
||||
buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
|
||||
buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = static_cast<VkDeviceSize>(size_),
|
||||
.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.size = SizeBytes(),
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
|
||||
}
|
||||
commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
|
||||
}
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKDescriptorPool& descriptor_pool)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
|
||||
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
|
||||
|
||||
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
||||
const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
|
||||
std::memcpy(staging.mapped_span.data(), data, data_size);
|
||||
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
const VkBuffer handle = Handle();
|
||||
scheduler.Record([staging = staging.buffer, handle, offset, data_size,
|
||||
&device = device](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
|
||||
VK_ACCESS_HOST_WRITE_BIT |
|
||||
(device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, READ_BARRIER);
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, WRITE_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
|
||||
u32 base_vertex, u32 num_indices, VkBuffer buffer,
|
||||
u32 offset, [[maybe_unused]] u32 size) {
|
||||
VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
|
||||
VkDeviceSize vk_offset = offset;
|
||||
VkBuffer vk_buffer = buffer;
|
||||
if (topology == PrimitiveTopology::Quads) {
|
||||
vk_index_type = VK_INDEX_TYPE_UINT32;
|
||||
std::tie(vk_buffer, vk_offset) =
|
||||
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
|
||||
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
|
||||
vk_index_type = VK_INDEX_TYPE_UINT16;
|
||||
std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
|
||||
}
|
||||
if (vk_buffer == VK_NULL_HANDLE) {
|
||||
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
|
||||
ReserveNullIndexBuffer();
|
||||
vk_buffer = *null_index_buffer;
|
||||
}
|
||||
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
|
||||
ReserveQuadArrayLUT(first + count, true);
|
||||
|
||||
// The LUT has the indices 0, 1, 2, and 3 copied as an array
|
||||
// To apply these 'first' offsets we can apply an offset based on the modulus.
|
||||
const VkIndexType index_type = quad_array_lut_index_type;
|
||||
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
|
||||
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
|
||||
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
|
||||
u32 stride) {
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = offset;
|
||||
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
|
||||
const VkDeviceSize vk_stride = stride;
|
||||
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
|
||||
});
|
||||
} else {
|
||||
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindVertexBuffer(index, buffer, offset);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
|
||||
u32 size) {
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
// Already logged in the rasterizer
|
||||
return;
|
||||
}
|
||||
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = offset;
|
||||
const VkDeviceSize vk_size = size;
|
||||
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
|
||||
if (num_indices <= current_num_indices) {
|
||||
return;
|
||||
}
|
||||
if (wait_for_idle) {
|
||||
scheduler.Finish();
|
||||
}
|
||||
current_num_indices = num_indices;
|
||||
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
|
||||
|
||||
const u32 num_quads = num_indices / 4;
|
||||
const u32 num_triangle_indices = num_quads * 6;
|
||||
const u32 num_first_offset_copies = 4;
|
||||
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
|
||||
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
|
||||
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = size_bytes,
|
||||
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
quad_array_lut.SetObjectNameEXT("Quad LUT");
|
||||
}
|
||||
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
|
||||
|
||||
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
|
||||
u8* staging_data = staging.mapped_span.data();
|
||||
const size_t quad_size = bytes_per_index * 6;
|
||||
for (u32 first = 0; first < num_first_offset_copies; ++first) {
|
||||
for (u32 quad = 0; quad < num_quads; ++quad) {
|
||||
switch (quad_array_lut_index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
staging_data += quad_size;
|
||||
}
|
||||
}
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
|
||||
dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferCopy copy{
|
||||
.srcOffset = src_offset,
|
||||
.dstOffset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
const VkBufferMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
|
||||
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
.buffer = dst_buffer,
|
||||
.offset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
|
||||
write_barrier);
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
0, write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
const VkBuffer handle = Handle();
|
||||
scheduler.Record(
|
||||
[staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferMemoryBarrier barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
void BufferCacheRuntime::ReserveNullIndexBuffer() {
|
||||
if (null_index_buffer) {
|
||||
return;
|
||||
}
|
||||
null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
};
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
|
||||
cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
|
||||
.flags = 0,
|
||||
.size = 4,
|
||||
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
scheduler.Finish();
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
null_index_buffer.SetObjectNameEXT("Null index buffer");
|
||||
}
|
||||
null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
|
||||
|
||||
std::memcpy(data, staging.mapped_span.data(), data_size);
|
||||
}
|
||||
|
||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size) {
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
const VkBuffer dst_buffer = Handle();
|
||||
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
|
||||
copy_size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
|
||||
|
||||
std::array<VkBufferMemoryBarrier, 2> barriers;
|
||||
barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barriers[0].pNext = nullptr;
|
||||
barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[0].buffer = src_buffer;
|
||||
barriers[0].offset = src_offset;
|
||||
barriers[0].size = copy_size;
|
||||
barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barriers[1].pNext = nullptr;
|
||||
barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
|
||||
barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[1].buffer = dst_buffer;
|
||||
barriers[1].offset = dst_offset;
|
||||
barriers[1].size = copy_size;
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
|
||||
barriers, {});
|
||||
scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
|
||||
});
|
||||
}
|
||||
|
||||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
|
||||
StagingBufferPool& staging_pool_)
|
||||
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
|
||||
cpu_memory_, stream_buffer_},
|
||||
device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
staging_pool{staging_pool_} {}
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
|
||||
size);
|
||||
}
|
||||
|
||||
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||
size = std::max(size, std::size_t(4));
|
||||
const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
||||
});
|
||||
return {empty.buffer, 0, 0};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -4,69 +4,124 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class VKDescriptorPool;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class Buffer final : public VideoCommon::BufferBlock {
|
||||
class BufferCacheRuntime;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
|
||||
public:
|
||||
explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
|
||||
StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
|
||||
~Buffer();
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
|
||||
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_);
|
||||
|
||||
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
|
||||
|
||||
void Download(std::size_t offset, std::size_t data_size, u8* data);
|
||||
|
||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size);
|
||||
|
||||
VkBuffer Handle() const {
|
||||
[[nodiscard]] VkBuffer Handle() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
u64 Address() const {
|
||||
return 0;
|
||||
operator VkBuffer() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_pool;
|
||||
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit commit;
|
||||
};
|
||||
|
||||
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
|
||||
class BufferCacheRuntime {
|
||||
friend Buffer;
|
||||
|
||||
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
|
||||
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
|
||||
|
||||
public:
|
||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, MemoryAllocator& memory_allocator,
|
||||
VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
|
||||
StagingBufferPool& staging_pool);
|
||||
~VKBufferCache();
|
||||
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
|
||||
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKDescriptorPool& descriptor_pool);
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t size) override;
|
||||
void Finish();
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
|
||||
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
void BindQuadArrayIndexBuffer(u32 first, u32 count);
|
||||
|
||||
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
|
||||
|
||||
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
|
||||
[[maybe_unused]] u32 binding_index, u32 size) {
|
||||
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
|
||||
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
|
||||
return ref.mapped_span;
|
||||
}
|
||||
|
||||
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
|
||||
[[maybe_unused]] bool is_written) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
private:
|
||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
|
||||
|
||||
void ReserveNullIndexBuffer();
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
vk::Buffer quad_array_lut;
|
||||
MemoryCommit quad_array_lut_commit;
|
||||
VkIndexType quad_array_lut_index_type{};
|
||||
u32 current_num_indices = 0;
|
||||
|
||||
vk::Buffer null_index_buffer;
|
||||
MemoryCommit null_index_buffer_commit;
|
||||
|
||||
Uint8Pass uint8_pass;
|
||||
QuadIndexedPass quad_index_pass;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
using Runtime = Vulkan::BufferCacheRuntime;
|
||||
using Buffer = Vulkan::Buffer;
|
||||
|
||||
static constexpr bool IS_OPENGL = false;
|
||||
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
|
||||
static constexpr bool USE_MEMORY_MAPS = true;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
@ -22,30 +22,7 @@
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
|
||||
return {
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
};
|
||||
}
|
||||
|
||||
VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
|
||||
return {
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = 0,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
};
|
||||
}
|
||||
|
||||
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
|
||||
return {
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
|
||||
return set;
|
||||
}
|
||||
|
||||
QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
|
||||
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
|
||||
BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
QuadArrayPass::~QuadArrayPass() = default;
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
|
||||
const u32 num_triangle_vertices = (num_vertices / 4) * 6;
|
||||
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
|
||||
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
ASSERT(num_vertices % 4 == 0);
|
||||
const u32 num_quads = num_vertices / 4;
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
|
||||
num_quads, first, set](vk::CommandBuffer cmdbuf) {
|
||||
constexpr u32 dispatch_size = 1024;
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
|
||||
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
|
||||
cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
|
||||
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.buffer = buffer;
|
||||
barrier.offset = 0;
|
||||
barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
|
||||
});
|
||||
return {staging_ref.buffer, 0};
|
||||
}
|
||||
|
||||
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
|
||||
|
||||
Uint8Pass::~Uint8Pass() = default;
|
||||
|
||||
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u64 src_offset) {
|
||||
std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u32 src_offset) {
|
||||
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
|
||||
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
|
||||
num_vertices](vk::CommandBuffer cmdbuf) {
|
||||
constexpr u32 dispatch_size = 1024;
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||
};
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
|
||||
cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1);
|
||||
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.buffer = buffer;
|
||||
barrier.offset = 0;
|
||||
barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
|
||||
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
|
||||
});
|
||||
return {staging_ref.buffer, 0};
|
||||
return {staging.buffer, staging.offset};
|
||||
}
|
||||
|
||||
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
|
||||
@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
|
||||
|
||||
QuadIndexedPass::~QuadIndexedPass() = default;
|
||||
|
||||
std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
|
||||
std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
||||
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
|
||||
VkBuffer src_buffer, u64 src_offset) {
|
||||
VkBuffer src_buffer, u32 src_offset) {
|
||||
const u32 index_shift = [index_format] {
|
||||
switch (index_format) {
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
|
||||
@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
|
||||
const u32 num_tri_vertices = (num_vertices / 4) * 6;
|
||||
|
||||
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
|
||||
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 dispatch_size = 1024;
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||
};
|
||||
const std::array push_constants = {base_vertex, index_shift};
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
|
||||
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
|
||||
&push_constants);
|
||||
cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
|
||||
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.buffer = buffer;
|
||||
barrier.offset = 0;
|
||||
barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
|
||||
cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
|
||||
});
|
||||
return {staging_ref.buffer, 0};
|
||||
return {staging.buffer, staging.offset};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -41,22 +41,6 @@ private:
|
||||
vk::ShaderModule module;
|
||||
};
|
||||
|
||||
class QuadArrayPass final : public VKComputePass {
|
||||
public:
|
||||
explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~QuadArrayPass();
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
};
|
||||
|
||||
class Uint8Pass final : public VKComputePass {
|
||||
public:
|
||||
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
|
||||
@ -64,7 +48,10 @@ public:
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~Uint8Pass();
|
||||
|
||||
std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
|
||||
/// Assemble uint8 indices into an uint16 index buffer
|
||||
/// Returns a pair with the staging buffer, and the offset where the assembled data is
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u32 src_offset);
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
@ -80,9 +67,9 @@ public:
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~QuadIndexedPass();
|
||||
|
||||
std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
|
||||
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
|
||||
u64 src_offset);
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(
|
||||
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
|
||||
u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
|
@ -45,8 +45,8 @@ void InnerFence::Wait() {
|
||||
}
|
||||
|
||||
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
|
||||
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
VKQueryCache& query_cache_, const Device& device_,
|
||||
VKScheduler& scheduler_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
|
||||
scheduler{scheduler_} {}
|
||||
|
@ -22,7 +22,6 @@ class RasterizerInterface;
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class VKBufferCache;
|
||||
class VKQueryCache;
|
||||
class VKScheduler;
|
||||
|
||||
@ -45,14 +44,14 @@ private:
|
||||
using Fence = std::shared_ptr<InnerFence>;
|
||||
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
|
||||
VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
|
||||
|
||||
class VKFenceManager final : public GenericFenceManager {
|
||||
public:
|
||||
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
|
||||
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
|
||||
VKScheduler& scheduler_);
|
||||
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCache& texture_cache, BufferCache& buffer_cache,
|
||||
VKQueryCache& query_cache, const Device& device,
|
||||
VKScheduler& scheduler);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
|
@ -8,8 +8,6 @@
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
@ -24,7 +22,6 @@
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
|
||||
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
|
||||
MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
|
||||
|
||||
namespace {
|
||||
struct DrawParams {
|
||||
u32 base_instance;
|
||||
u32 num_instances;
|
||||
u32 base_vertex;
|
||||
u32 num_vertices;
|
||||
bool is_indexed;
|
||||
};
|
||||
|
||||
constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
|
||||
|
||||
@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
|
||||
const float width = src.scale_x * 2.0f;
|
||||
const float height = src.scale_y * 2.0f;
|
||||
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
|
||||
|
||||
VkViewport viewport{
|
||||
.x = src.translate_x - src.scale_x,
|
||||
.y = src.translate_y - src.scale_y,
|
||||
@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
|
||||
.minDepth = src.translate_z - src.scale_z * reduce_z,
|
||||
.maxDepth = src.translate_z + src.scale_z,
|
||||
};
|
||||
|
||||
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
|
||||
viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
|
||||
viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
return viewport;
|
||||
}
|
||||
|
||||
@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
|
||||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
|
||||
std::array<VkDeviceSize, N> expanded;
|
||||
std::copy(strides.begin(), strides.end(), expanded.begin());
|
||||
return expanded;
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::e2D;
|
||||
@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
class BufferBindings final {
|
||||
public:
|
||||
void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {
|
||||
vertex.buffers[vertex.num_buffers] = buffer;
|
||||
vertex.offsets[vertex.num_buffers] = offset;
|
||||
vertex.sizes[vertex.num_buffers] = size;
|
||||
vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);
|
||||
++vertex.num_buffers;
|
||||
}
|
||||
|
||||
void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
|
||||
index.buffer = buffer;
|
||||
index.offset = offset;
|
||||
index.type = type;
|
||||
}
|
||||
|
||||
void Bind(const Device& device, VKScheduler& scheduler) const {
|
||||
// Use this large switch case to avoid dispatching more memory in the record lambda than
|
||||
// what we need. It looks horrible, but it's the best we can do on standard C++.
|
||||
switch (vertex.num_buffers) {
|
||||
case 0:
|
||||
return BindStatic<0>(device, scheduler);
|
||||
case 1:
|
||||
return BindStatic<1>(device, scheduler);
|
||||
case 2:
|
||||
return BindStatic<2>(device, scheduler);
|
||||
case 3:
|
||||
return BindStatic<3>(device, scheduler);
|
||||
case 4:
|
||||
return BindStatic<4>(device, scheduler);
|
||||
case 5:
|
||||
return BindStatic<5>(device, scheduler);
|
||||
case 6:
|
||||
return BindStatic<6>(device, scheduler);
|
||||
case 7:
|
||||
return BindStatic<7>(device, scheduler);
|
||||
case 8:
|
||||
return BindStatic<8>(device, scheduler);
|
||||
case 9:
|
||||
return BindStatic<9>(device, scheduler);
|
||||
case 10:
|
||||
return BindStatic<10>(device, scheduler);
|
||||
case 11:
|
||||
return BindStatic<11>(device, scheduler);
|
||||
case 12:
|
||||
return BindStatic<12>(device, scheduler);
|
||||
case 13:
|
||||
return BindStatic<13>(device, scheduler);
|
||||
case 14:
|
||||
return BindStatic<14>(device, scheduler);
|
||||
case 15:
|
||||
return BindStatic<15>(device, scheduler);
|
||||
case 16:
|
||||
return BindStatic<16>(device, scheduler);
|
||||
case 17:
|
||||
return BindStatic<17>(device, scheduler);
|
||||
case 18:
|
||||
return BindStatic<18>(device, scheduler);
|
||||
case 19:
|
||||
return BindStatic<19>(device, scheduler);
|
||||
case 20:
|
||||
return BindStatic<20>(device, scheduler);
|
||||
case 21:
|
||||
return BindStatic<21>(device, scheduler);
|
||||
case 22:
|
||||
return BindStatic<22>(device, scheduler);
|
||||
case 23:
|
||||
return BindStatic<23>(device, scheduler);
|
||||
case 24:
|
||||
return BindStatic<24>(device, scheduler);
|
||||
case 25:
|
||||
return BindStatic<25>(device, scheduler);
|
||||
case 26:
|
||||
return BindStatic<26>(device, scheduler);
|
||||
case 27:
|
||||
return BindStatic<27>(device, scheduler);
|
||||
case 28:
|
||||
return BindStatic<28>(device, scheduler);
|
||||
case 29:
|
||||
return BindStatic<29>(device, scheduler);
|
||||
case 30:
|
||||
return BindStatic<30>(device, scheduler);
|
||||
case 31:
|
||||
return BindStatic<31>(device, scheduler);
|
||||
case 32:
|
||||
return BindStatic<32>(device, scheduler);
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
private:
|
||||
// Some of these fields are intentionally left uninitialized to avoid initializing them twice.
|
||||
struct {
|
||||
size_t num_buffers = 0;
|
||||
std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
|
||||
std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
|
||||
std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
|
||||
std::array<u16, Maxwell::NumVertexArrays> strides;
|
||||
} vertex;
|
||||
|
||||
struct {
|
||||
VkBuffer buffer = nullptr;
|
||||
VkDeviceSize offset;
|
||||
VkIndexType type;
|
||||
} index;
|
||||
|
||||
template <size_t N>
|
||||
void BindStatic(const Device& device, VKScheduler& scheduler) const {
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
if (index.buffer) {
|
||||
BindStatic<N, true, true>(scheduler);
|
||||
} else {
|
||||
BindStatic<N, false, true>(scheduler);
|
||||
}
|
||||
} else {
|
||||
if (index.buffer) {
|
||||
BindStatic<N, true, false>(scheduler);
|
||||
} else {
|
||||
BindStatic<N, false, false>(scheduler);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
|
||||
void BindStatic(VKScheduler& scheduler) const {
|
||||
static_assert(N <= Maxwell::NumVertexArrays);
|
||||
if constexpr (N == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::array<VkBuffer, N> buffers;
|
||||
std::array<VkDeviceSize, N> offsets;
|
||||
std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
|
||||
std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
|
||||
|
||||
if constexpr (has_extended_dynamic_state) {
|
||||
// With extended dynamic states we can specify the length and stride of a vertex buffer
|
||||
std::array<VkDeviceSize, N> sizes;
|
||||
std::array<u16, N> strides;
|
||||
std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
|
||||
std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
|
||||
|
||||
if constexpr (is_indexed) {
|
||||
scheduler.Record(
|
||||
[buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
|
||||
cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
|
||||
offsets.data(), sizes.data(),
|
||||
ExpandStrides(strides).data());
|
||||
});
|
||||
} else {
|
||||
scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
|
||||
offsets.data(), sizes.data(),
|
||||
ExpandStrides(strides).data());
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (is_indexed) {
|
||||
// Indexed draw
|
||||
scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
|
||||
cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
|
||||
});
|
||||
} else {
|
||||
// Array draw
|
||||
scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
|
||||
if (is_indexed) {
|
||||
cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
|
||||
} else {
|
||||
cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
|
||||
DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
|
||||
bool is_indexed) {
|
||||
DrawParams params{
|
||||
.base_instance = regs.vb_base_instance,
|
||||
.num_instances = is_instanced ? num_instances : 1,
|
||||
.base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
|
||||
.num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
|
||||
.is_indexed = is_indexed,
|
||||
};
|
||||
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
|
||||
// 6 triangle vertices per quad, base vertex is part of the index
|
||||
// See BindQuadArrayIndexBuffer for more details
|
||||
params.num_vertices = (params.num_vertices / 4) * 6;
|
||||
params.base_vertex = 0;
|
||||
params.is_indexed = true;
|
||||
}
|
||||
return params;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
|
||||
gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
|
||||
screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
|
||||
state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
|
||||
state_tracker{state_tracker_}, scheduler{scheduler_},
|
||||
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
|
||||
update_descriptor_queue(device, scheduler),
|
||||
blit_image(device, scheduler, state_tracker, descriptor_pool),
|
||||
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||
quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||
texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
|
||||
update_descriptor_queue, descriptor_pool),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
|
||||
descriptor_pool, update_descriptor_queue),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
|
||||
stream_buffer, staging_pool),
|
||||
query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
|
||||
fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
|
||||
wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
|
||||
scheduler.SetQueryCache(query_cache);
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
GraphicsPipelineCacheKey key;
|
||||
key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
|
||||
|
||||
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
|
||||
BufferBindings buffer_bindings;
|
||||
const DrawParameters draw_params =
|
||||
SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
const auto shaders = pipeline_cache.GetShaders();
|
||||
key.shaders = GetShaderAddresses(shaders);
|
||||
SetupShaderDescriptors(shaders);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
SetupShaderDescriptors(shaders, is_indexed);
|
||||
|
||||
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
|
||||
key.renderpass = framebuffer->RenderPass();
|
||||
@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
return;
|
||||
}
|
||||
|
||||
buffer_bindings.Bind(device, scheduler);
|
||||
|
||||
BeginTransformFeedback();
|
||||
|
||||
scheduler.RequestRenderpass(framebuffer);
|
||||
scheduler.BindGraphicsPipeline(pipeline->GetHandle());
|
||||
UpdateDynamicStates();
|
||||
|
||||
const auto pipeline_layout = pipeline->GetLayout();
|
||||
const auto descriptor_set = pipeline->CommitDescriptorSet();
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const u32 num_instances = maxwell3d.mme_draw.instance_count;
|
||||
const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
|
||||
const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
|
||||
const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
|
||||
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
|
||||
if (descriptor_set) {
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
|
||||
DESCRIPTOR_SET, descriptor_set, {});
|
||||
DESCRIPTOR_SET, descriptor_set, nullptr);
|
||||
}
|
||||
if (draw_params.is_indexed) {
|
||||
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
|
||||
draw_params.base_vertex, draw_params.base_instance);
|
||||
} else {
|
||||
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
|
||||
draw_params.base_vertex, draw_params.base_instance);
|
||||
}
|
||||
draw_params.Draw(cmdbuf);
|
||||
});
|
||||
|
||||
EndTransformFeedback();
|
||||
@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
|
||||
return;
|
||||
}
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
|
||||
const VkExtent2D render_area = framebuffer->RenderArea();
|
||||
@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
|
||||
if (use_stencil) {
|
||||
aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
}
|
||||
|
||||
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
|
||||
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
|
||||
VkClearAttachment attachment;
|
||||
@ -580,8 +399,7 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||
auto& pipeline = pipeline_cache.GetComputePipeline({
|
||||
.shader = code_addr,
|
||||
.shared_memory_size = launch_desc.shared_alloc,
|
||||
.workgroup_size =
|
||||
{
|
||||
.workgroup_size{
|
||||
launch_desc.block_dim_x,
|
||||
launch_desc.block_dim_y,
|
||||
launch_desc.block_dim_z,
|
||||
@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
|
||||
const auto& entries = pipeline.GetEntries();
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : entries.global_buffers) {
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
|
||||
buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
SetupComputeUniformTexels(entries);
|
||||
SetupComputeTextures(entries);
|
||||
SetupComputeStorageTexels(entries);
|
||||
@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache.Map(CalculateComputeStreamBufferSize());
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
|
||||
SetupComputeConstBuffers(entries);
|
||||
SetupComputeGlobalBuffers(entries);
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
ImageViewId* image_view_id_ptr = image_view_ids.data();
|
||||
VkSampler* sampler_ptr = sampler_handles.data();
|
||||
PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
|
||||
sampler_ptr);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
|
||||
const VkPipeline pipeline_handle = pipeline.GetHandle();
|
||||
const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
|
||||
const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
|
||||
@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||
query_cache.Query(gpu_addr, type, timestamp);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||
u32 size) {
|
||||
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushAll() {}
|
||||
|
||||
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
|
||||
@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
|
||||
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.MustFlushRegion(addr, size);
|
||||
return buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.MustFlushRegion(addr, size);
|
||||
buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
|
||||
@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
pipeline_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
pipeline_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
pipeline_cache.OnCPUWrite(addr, size);
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncGuestHost() {
|
||||
buffer_cache.SyncGuestHost();
|
||||
pipeline_cache.SyncGuestHost();
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.FlushCachedWrites();
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
pipeline_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
|
||||
draw_counter = 0;
|
||||
update_descriptor_queue.TickFrame();
|
||||
fence_manager.TickFrame();
|
||||
buffer_cache.TickFrame();
|
||||
staging_pool.TickFrame();
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.TickFrame();
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.TickFrame();
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.BlitImage(dst, src, copy_config);
|
||||
return true;
|
||||
}
|
||||
@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
if (!framebuffer_addr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
|
||||
if (!image_view) {
|
||||
return false;
|
||||
}
|
||||
|
||||
screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
|
||||
screen_info.width = image_view->size.width;
|
||||
screen_info.height = image_view->size.height;
|
||||
@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
|
||||
draw_counter = 0;
|
||||
}
|
||||
|
||||
RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
|
||||
BufferBindings& buffer_bindings,
|
||||
bool is_indexed,
|
||||
bool is_instanced) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Geometry);
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
SetupVertexArrays(buffer_bindings);
|
||||
|
||||
const u32 base_instance = regs.vb_base_instance;
|
||||
const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
|
||||
const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
|
||||
const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
|
||||
|
||||
DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
|
||||
SetupIndexBuffer(buffer_bindings, params, is_indexed);
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupShaderDescriptors(
|
||||
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
|
||||
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
const auto& entries = shader->GetEntries();
|
||||
const ShaderEntries& entries = shader->GetEntries();
|
||||
SetupGraphicsUniformTexels(entries, stage);
|
||||
SetupGraphicsTextures(entries, stage);
|
||||
SetupGraphicsStorageTexels(entries, stage);
|
||||
SetupGraphicsImages(entries, stage);
|
||||
|
||||
buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : entries.global_buffers) {
|
||||
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
|
||||
buffer.cbuf_offset, buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
}
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
|
||||
ImageViewId* image_view_id_ptr = image_view_ids.data();
|
||||
@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
const auto& entries = shader->GetEntries();
|
||||
SetupGraphicsConstBuffers(entries, stage);
|
||||
SetupGraphicsGlobalBuffers(entries, stage);
|
||||
PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
|
||||
sampler_ptr);
|
||||
buffer_cache.BindHostStageBuffers(stage);
|
||||
PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
|
||||
image_view_id_ptr, sampler_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
|
||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||
|
||||
UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
|
||||
UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
|
||||
UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
|
||||
|
||||
const auto& binding = regs.tfb_bindings[0];
|
||||
UNIMPLEMENTED_IF(binding.buffer_enable == 0);
|
||||
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||
|
||||
const GPUVAddr gpu_addr = binding.Address();
|
||||
const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||
|
||||
scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
|
||||
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
||||
});
|
||||
scheduler.Record(
|
||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
||||
}
|
||||
|
||||
void RasterizerVulkan::EndTransformFeedback() {
|
||||
@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.Record(
|
||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
if (!vertex_array.IsEnabled()) {
|
||||
continue;
|
||||
}
|
||||
const GPUVAddr start{vertex_array.StartAddress()};
|
||||
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
|
||||
|
||||
ASSERT(end >= start);
|
||||
const size_t size = end - start;
|
||||
if (size == 0) {
|
||||
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
|
||||
continue;
|
||||
}
|
||||
const auto info = buffer_cache.UploadMemory(start, size);
|
||||
buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
|
||||
bool is_indexed) {
|
||||
if (params.num_vertices == 0) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
switch (regs.draw.topology) {
|
||||
case Maxwell::PrimitiveTopology::Quads: {
|
||||
if (!params.is_indexed) {
|
||||
const auto [buffer, offset] =
|
||||
quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
|
||||
buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
|
||||
params.base_vertex = 0;
|
||||
params.num_vertices = params.num_vertices * 6 / 4;
|
||||
params.is_indexed = true;
|
||||
break;
|
||||
}
|
||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||
VkBuffer buffer = info.handle;
|
||||
u64 offset = info.offset;
|
||||
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
||||
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
||||
|
||||
buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
|
||||
params.num_vertices = (params.num_vertices / 4) * 6;
|
||||
params.base_vertex = 0;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (!is_indexed) {
|
||||
break;
|
||||
}
|
||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||
VkBuffer buffer = info.handle;
|
||||
u64 offset = info.offset;
|
||||
|
||||
auto format = regs.index_array.format;
|
||||
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
|
||||
if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
|
||||
std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
|
||||
format = Maxwell::IndexFormat::UnsignedShort;
|
||||
}
|
||||
|
||||
buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
|
||||
const auto& shader_stage = maxwell3d.state.shader_stages[stage];
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage]};
|
||||
|
||||
for (const auto& entry : entries.global_buffers) {
|
||||
const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
|
||||
SetupGlobalBuffer(entry, addr);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : entries.uniform_texels) {
|
||||
@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : entries.samplers) {
|
||||
@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : entries.storage_texels) {
|
||||
@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : entries.images) {
|
||||
@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||
const Tegra::Engines::ConstBufferInfo info{
|
||||
.address = config.Address(),
|
||||
.size = config.size,
|
||||
.enabled = mask[entry.GetIndex()],
|
||||
};
|
||||
SetupConstBuffer(entry, info);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
|
||||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||
for (const auto& entry : entries.global_buffers) {
|
||||
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
|
||||
SetupGlobalBuffer(entry, addr);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : entries.uniform_texels) {
|
||||
const TextureHandle handle =
|
||||
@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : entries.samplers) {
|
||||
for (size_t index = 0; index < entry.size; ++index) {
|
||||
@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : entries.storage_texels) {
|
||||
const TextureHandle handle =
|
||||
@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : entries.images) {
|
||||
const TextureHandle handle =
|
||||
@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
|
||||
return;
|
||||
}
|
||||
// Align the size to avoid bad std140 interactions
|
||||
const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
|
||||
ASSERT(size <= MaxConstbufferSize);
|
||||
|
||||
const u64 alignment = device.GetUniformBufferAlignment();
|
||||
const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
|
||||
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
|
||||
const u64 actual_addr = gpu_memory.Read<u64>(address);
|
||||
const u32 size = gpu_memory.Read<u32>(address + 8);
|
||||
|
||||
if (size == 0) {
|
||||
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry
|
||||
// because Vulkan doesn't like empty buffers.
|
||||
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
|
||||
// default buffer.
|
||||
static constexpr size_t dummy_size = 4;
|
||||
const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
|
||||
update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto info = buffer_cache.UploadMemory(
|
||||
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
|
||||
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!state_tracker.TouchViewports()) {
|
||||
return;
|
||||
@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
|
||||
GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
|
||||
GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
|
||||
GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
|
||||
GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
|
||||
GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
|
||||
};
|
||||
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
|
||||
}
|
||||
|
||||
@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
|
||||
if (!state_tracker.TouchScissors()) {
|
||||
return;
|
||||
}
|
||||
const std::array scissors = {
|
||||
const std::array scissors{
|
||||
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
|
||||
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
|
||||
GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
|
||||
GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
|
||||
GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
|
||||
GetScissorState(regs, 15)};
|
||||
GetScissorState(regs, 15),
|
||||
};
|
||||
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
|
||||
}
|
||||
|
||||
@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
|
||||
});
|
||||
}
|
||||
|
||||
size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
|
||||
size_t size = CalculateVertexArraysSize();
|
||||
if (is_indexed) {
|
||||
size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
|
||||
}
|
||||
size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
|
||||
return Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
}
|
||||
|
||||
size_t RasterizerVulkan::CalculateVertexArraysSize() const {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
size_t size = 0;
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
// This implementation assumes that all attributes are used in the shader.
|
||||
const GPUVAddr start{regs.vertex_array[index].StartAddress()};
|
||||
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
|
||||
DEBUG_ASSERT(end >= start);
|
||||
|
||||
size += (end - start) * regs.vertex_array[index].enable;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t RasterizerVulkan::CalculateIndexBufferSize() const {
|
||||
return static_cast<size_t>(maxwell3d.regs.index_array.count) *
|
||||
static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
|
||||
}
|
||||
|
||||
size_t RasterizerVulkan::CalculateConstBufferSize(
|
||||
const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
|
||||
if (entry.IsIndirect()) {
|
||||
// Buffer is accessed indirectly, so upload the entire thing
|
||||
return buffer.size;
|
||||
} else {
|
||||
// Buffer is accessed directly, upload just what we use
|
||||
return entry.GetSize();
|
||||
}
|
||||
}
|
||||
|
||||
VkBuffer RasterizerVulkan::DefaultBuffer() {
|
||||
if (default_buffer) {
|
||||
return *default_buffer;
|
||||
}
|
||||
default_buffer = device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = DEFAULT_BUFFER_SIZE,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
|
||||
});
|
||||
return *default_buffer;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -18,14 +18,12 @@
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
@ -49,7 +47,6 @@ namespace Vulkan {
|
||||
struct VKScreenInfo;
|
||||
|
||||
class StateTracker;
|
||||
class BufferBindings;
|
||||
|
||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
@ -65,6 +62,7 @@ public:
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
@ -107,24 +105,11 @@ private:
|
||||
|
||||
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
|
||||
|
||||
struct DrawParameters {
|
||||
void Draw(vk::CommandBuffer cmdbuf) const;
|
||||
|
||||
u32 base_instance = 0;
|
||||
u32 num_instances = 0;
|
||||
u32 base_vertex = 0;
|
||||
u32 num_vertices = 0;
|
||||
bool is_indexed = 0;
|
||||
};
|
||||
|
||||
void FlushWork();
|
||||
|
||||
/// Setups geometry buffers and state.
|
||||
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
|
||||
bool is_indexed, bool is_instanced);
|
||||
|
||||
/// Setup descriptors in the graphics pipeline.
|
||||
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
|
||||
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
|
||||
bool is_indexed);
|
||||
|
||||
void UpdateDynamicStates();
|
||||
|
||||
@ -132,16 +117,6 @@ private:
|
||||
|
||||
void EndTransformFeedback();
|
||||
|
||||
void SetupVertexArrays(BufferBindings& buffer_bindings);
|
||||
|
||||
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
|
||||
|
||||
/// Setup constant buffers in the graphics pipeline.
|
||||
void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup global buffers in the graphics pipeline.
|
||||
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup uniform texels in the graphics pipeline.
|
||||
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
@ -154,12 +129,6 @@ private:
|
||||
/// Setup images in the graphics pipeline.
|
||||
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup constant buffers in the compute pipeline.
|
||||
void SetupComputeConstBuffers(const ShaderEntries& entries);
|
||||
|
||||
/// Setup global buffers in the compute pipeline.
|
||||
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
|
||||
|
||||
/// Setup texel buffers in the compute pipeline.
|
||||
void SetupComputeUniformTexels(const ShaderEntries& entries);
|
||||
|
||||
@ -172,11 +141,6 @@ private:
|
||||
/// Setup images in the compute pipeline.
|
||||
void SetupComputeImages(const ShaderEntries& entries);
|
||||
|
||||
void SetupConstBuffer(const ConstBufferEntry& entry,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer);
|
||||
|
||||
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
@ -193,19 +157,6 @@ private:
|
||||
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
|
||||
|
||||
size_t CalculateComputeStreamBufferSize() const;
|
||||
|
||||
size_t CalculateVertexArraysSize() const;
|
||||
|
||||
size_t CalculateIndexBufferSize() const;
|
||||
|
||||
size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer) const;
|
||||
|
||||
VkBuffer DefaultBuffer();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
@ -217,24 +168,19 @@ private:
|
||||
StateTracker& state_tracker;
|
||||
VKScheduler& scheduler;
|
||||
|
||||
VKStreamBuffer stream_buffer;
|
||||
StagingBufferPool staging_pool;
|
||||
VKDescriptorPool descriptor_pool;
|
||||
VKUpdateDescriptorQueue update_descriptor_queue;
|
||||
BlitImageHelper blit_image;
|
||||
QuadArrayPass quad_array_pass;
|
||||
QuadIndexedPass quad_indexed_pass;
|
||||
Uint8Pass uint8_pass;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
VKPipelineCache pipeline_cache;
|
||||
VKBufferCache buffer_cache;
|
||||
VKQueryCache query_cache;
|
||||
VKFenceManager fence_manager;
|
||||
|
||||
vk::Buffer default_buffer;
|
||||
MemoryCommit default_buffer_commit;
|
||||
vk::Event wfi_event;
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
|
@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
|
||||
worker_thread.join();
|
||||
}
|
||||
|
||||
u64 VKScheduler::CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
}
|
||||
|
||||
bool VKScheduler::IsFree(u64 tick) const noexcept {
|
||||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
void VKScheduler::Wait(u64 tick) {
|
||||
master_semaphore->Wait(tick);
|
||||
}
|
||||
|
||||
void VKScheduler::Flush(VkSemaphore semaphore) {
|
||||
SubmitExecution(semaphore);
|
||||
AllocateNewContext();
|
||||
@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
|
||||
vk::Span(barriers.data(), num_images));
|
||||
});
|
||||
state.renderpass = nullptr;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@ -21,7 +22,6 @@ namespace Vulkan {
|
||||
class CommandPool;
|
||||
class Device;
|
||||
class Framebuffer;
|
||||
class MasterSemaphore;
|
||||
class StateTracker;
|
||||
class VKQueryCache;
|
||||
|
||||
@ -32,15 +32,6 @@ public:
|
||||
explicit VKScheduler(const Device& device, StateTracker& state_tracker);
|
||||
~VKScheduler();
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept;
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept;
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
void Flush(VkSemaphore semaphore = nullptr);
|
||||
|
||||
@ -82,6 +73,21 @@ public:
|
||||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick) {
|
||||
master_semaphore->Wait(tick);
|
||||
}
|
||||
|
||||
/// Returns the master timeline semaphore.
|
||||
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
|
||||
return *master_semaphore;
|
||||
|
@ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
|
||||
}
|
||||
for (const auto& [base, usage] : ir.GetGlobalMemory()) {
|
||||
entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written);
|
||||
entries.global_buffers.emplace_back(GlobalBufferEntry{
|
||||
.cbuf_index = base.cbuf_index,
|
||||
.cbuf_offset = base.cbuf_offset,
|
||||
.is_written = usage.is_written,
|
||||
});
|
||||
}
|
||||
for (const auto& sampler : ir.GetSamplers()) {
|
||||
if (sampler.is_buffer) {
|
||||
@ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
entries.attributes.insert(GetGenericAttributeLocation(attribute));
|
||||
}
|
||||
}
|
||||
for (const auto& buffer : entries.const_buffers) {
|
||||
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
|
||||
}
|
||||
entries.clip_distances = ir.GetClipDistances();
|
||||
entries.shader_length = ir.GetLength();
|
||||
entries.uses_warps = ir.UsesWarps();
|
||||
|
@ -39,24 +39,7 @@ private:
|
||||
u32 index{};
|
||||
};
|
||||
|
||||
class GlobalBufferEntry {
|
||||
public:
|
||||
constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
|
||||
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
|
||||
|
||||
constexpr u32 GetCbufIndex() const {
|
||||
return cbuf_index;
|
||||
}
|
||||
|
||||
constexpr u32 GetCbufOffset() const {
|
||||
return cbuf_offset;
|
||||
}
|
||||
|
||||
constexpr bool IsWritten() const {
|
||||
return is_written;
|
||||
}
|
||||
|
||||
private:
|
||||
struct GlobalBufferEntry {
|
||||
u32 cbuf_index{};
|
||||
u32 cbuf_offset{};
|
||||
bool is_written{};
|
||||
@ -78,6 +61,7 @@ struct ShaderEntries {
|
||||
std::set<u32> attributes;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
bool uses_warps{};
|
||||
};
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
@ -17,18 +18,119 @@
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
// Maximum potential alignment of a Vulkan buffer
|
||||
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
|
||||
// Maximum size to put elements in the stream buffer
|
||||
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
|
||||
// Stream buffer size in bytes
|
||||
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
|
||||
|
||||
constexpr VkMemoryPropertyFlags HOST_FLAGS =
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
|
||||
|
||||
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
|
||||
return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
|
||||
}
|
||||
|
||||
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
|
||||
VkMemoryPropertyFlags flags) noexcept {
|
||||
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
|
||||
if (((type_mask >> type_index) & 1) == 0) {
|
||||
// Memory type is incompatible
|
||||
continue;
|
||||
}
|
||||
const VkMemoryType& memory_type = props.memoryTypes[type_index];
|
||||
if ((memory_type.propertyFlags & flags) != flags) {
|
||||
// Memory type doesn't have the flags we want
|
||||
continue;
|
||||
}
|
||||
if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
|
||||
// Memory heap is not suitable for streaming
|
||||
continue;
|
||||
}
|
||||
// Success!
|
||||
return type_index;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
|
||||
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
|
||||
std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
|
||||
if (type) {
|
||||
return *type;
|
||||
}
|
||||
// Otherwise try without the DEVICE_LOCAL_BIT
|
||||
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
|
||||
if (type) {
|
||||
return *type;
|
||||
}
|
||||
// This should never happen, and in case it does, signal it as an out of memory situation
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
size_t Region(size_t iterator) noexcept {
|
||||
return iterator / REGION_SIZE;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
VKScheduler& scheduler_)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {}
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
|
||||
const vk::Device& dev = device.GetLogical();
|
||||
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = STREAM_BUFFER_SIZE,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
stream_buffer.SetObjectNameEXT("Stream Buffer");
|
||||
}
|
||||
VkMemoryDedicatedRequirements dedicated_reqs{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
|
||||
.pNext = nullptr,
|
||||
.prefersDedicatedAllocation = VK_FALSE,
|
||||
.requiresDedicatedAllocation = VK_FALSE,
|
||||
};
|
||||
const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
|
||||
const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
|
||||
dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
|
||||
const VkMemoryDedicatedAllocateInfo dedicated_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.image = nullptr,
|
||||
.buffer = *stream_buffer,
|
||||
};
|
||||
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
|
||||
stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = make_dedicated ? &dedicated_info : nullptr,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
|
||||
}
|
||||
stream_buffer.BindMemory(*stream_memory, 0);
|
||||
stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
StagingBufferPool::~StagingBufferPool() = default;
|
||||
|
||||
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
|
||||
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
|
||||
return *ref;
|
||||
if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
|
||||
return GetStreamBuffer(size);
|
||||
}
|
||||
return CreateStagingBuffer(size, usage);
|
||||
return GetStagingBuffer(size, usage);
|
||||
}
|
||||
|
||||
void StagingBufferPool::TickFrame() {
|
||||
@ -39,6 +141,51 @@ void StagingBufferPool::TickFrame() {
|
||||
ReleaseCache(MemoryUsage::Download);
|
||||
}
|
||||
|
||||
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||
if (AreRegionsActive(Region(free_iterator) + 1,
|
||||
std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
|
||||
// Avoid waiting for the previous usages to be free
|
||||
return GetStagingBuffer(size, MemoryUsage::Upload);
|
||||
}
|
||||
const u64 current_tick = scheduler.CurrentTick();
|
||||
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
|
||||
current_tick);
|
||||
used_iterator = iterator;
|
||||
free_iterator = std::max(free_iterator, iterator + size);
|
||||
|
||||
if (iterator + size > STREAM_BUFFER_SIZE) {
|
||||
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
|
||||
current_tick);
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
if (AreRegionsActive(0, Region(size) + 1)) {
|
||||
// Avoid waiting for the previous usages to be free
|
||||
return GetStagingBuffer(size, MemoryUsage::Upload);
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return StagingBufferRef{
|
||||
.buffer = *stream_buffer,
|
||||
.offset = static_cast<VkDeviceSize>(offset),
|
||||
.mapped_span = std::span<u8>(stream_pointer + offset, size),
|
||||
};
|
||||
}
|
||||
|
||||
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
|
||||
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
|
||||
[this](u64 sync_tick) { return !scheduler.IsFree(sync_tick); });
|
||||
};
|
||||
|
||||
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
|
||||
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
|
||||
return *ref;
|
||||
}
|
||||
return CreateStagingBuffer(size, usage);
|
||||
}
|
||||
|
||||
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
|
||||
MemoryUsage usage) {
|
||||
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
|
||||
|
@ -19,11 +19,14 @@ class VKScheduler;
|
||||
|
||||
struct StagingBufferRef {
|
||||
VkBuffer buffer;
|
||||
VkDeviceSize offset;
|
||||
std::span<u8> mapped_span;
|
||||
};
|
||||
|
||||
class StagingBufferPool {
|
||||
public:
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
|
||||
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
|
||||
VKScheduler& scheduler);
|
||||
~StagingBufferPool();
|
||||
@ -33,6 +36,11 @@ public:
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
struct StreamBufferCommit {
|
||||
size_t upper_bound;
|
||||
u64 tick;
|
||||
};
|
||||
|
||||
struct StagingBuffer {
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit commit;
|
||||
@ -42,6 +50,7 @@ private:
|
||||
StagingBufferRef Ref() const noexcept {
|
||||
return {
|
||||
.buffer = *buffer,
|
||||
.offset = 0,
|
||||
.mapped_span = mapped_span,
|
||||
};
|
||||
}
|
||||
@ -56,6 +65,12 @@ private:
|
||||
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
|
||||
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
|
||||
|
||||
StagingBufferRef GetStreamBuffer(size_t size);
|
||||
|
||||
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
|
||||
|
||||
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
|
||||
@ -70,6 +85,15 @@ private:
|
||||
MemoryAllocator& memory_allocator;
|
||||
VKScheduler& scheduler;
|
||||
|
||||
vk::Buffer stream_buffer;
|
||||
vk::DeviceMemory stream_memory;
|
||||
u8* stream_pointer = nullptr;
|
||||
|
||||
size_t iterator = 0;
|
||||
size_t used_iterator = 0;
|
||||
size_t free_iterator = 0;
|
||||
std::array<u64, NUM_SYNCS> sync_ticks{};
|
||||
|
||||
StagingBuffersCache device_local_cache;
|
||||
StagingBuffersCache upload_cache;
|
||||
StagingBuffersCache download_cache;
|
||||
|
@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
|
||||
using Flags = Maxwell3D::DirtyState::Flags;
|
||||
|
||||
Flags MakeInvalidationFlags() {
|
||||
static constexpr std::array INVALIDATION_FLAGS{
|
||||
static constexpr int INVALIDATION_FLAGS[]{
|
||||
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
|
||||
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
|
||||
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
|
||||
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
|
||||
};
|
||||
Flags flags{};
|
||||
for (const int flag : INVALIDATION_FLAGS) {
|
||||
flags[flag] = true;
|
||||
}
|
||||
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu)
|
||||
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
|
||||
auto& tables = gpu.Maxwell3D().dirty.tables;
|
||||
SetupDirtyRenderTargets(tables);
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyDepthBias(tables);
|
||||
|
@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
|
||||
: surface{surface_}, device{device_}, scheduler{scheduler_} {}
|
||||
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
|
||||
u32 width, u32 height, bool srgb)
|
||||
: surface{surface_}, device{device_}, scheduler{scheduler_} {
|
||||
Create(width, height, srgb);
|
||||
}
|
||||
|
||||
VKSwapchain::~VKSwapchain() = default;
|
||||
|
||||
|
@ -20,7 +20,8 @@ class VKScheduler;
|
||||
|
||||
class VKSwapchain {
|
||||
public:
|
||||
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
|
||||
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
|
||||
u32 width, u32 height, bool srgb);
|
||||
~VKSwapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
|
@ -426,21 +426,23 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
|
||||
VkImageAspectFlags aspect_mask, bool is_initialized,
|
||||
std::span<const VkBufferImageCopy> copies) {
|
||||
static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = ACCESS_FLAGS,
|
||||
.srcAccessMask = WRITE_ACCESS_FLAGS,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange =
|
||||
{
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
@ -452,14 +454,13 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = ACCESS_FLAGS,
|
||||
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange =
|
||||
{
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
|
||||
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
|
||||
return {
|
||||
.handle = staging_ref.buffer,
|
||||
.span = staging_ref.mapped_span,
|
||||
};
|
||||
StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
|
||||
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
|
||||
return {
|
||||
.handle = staging_ref.buffer,
|
||||
.span = staging_ref.mapped_span,
|
||||
};
|
||||
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
|
||||
}
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const BufferImageCopy> copies) {
|
||||
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
||||
// TODO: Move this to another API
|
||||
scheduler->RequestOutsideRenderPassOperationContext();
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
|
||||
const VkBuffer src_buffer = map.handle;
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
|
||||
const VkBuffer src_buffer = map.buffer;
|
||||
const VkImage vk_image = *image;
|
||||
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
|
||||
const bool is_initialized = std::exchange(initialized, true);
|
||||
@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
});
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::UploadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
// TODO: Move this to another API
|
||||
scheduler->RequestOutsideRenderPassOperationContext();
|
||||
std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
|
||||
const VkBuffer src_buffer = map.handle;
|
||||
std::vector vk_copies = TransformBufferCopies(copies, map.offset);
|
||||
const VkBuffer src_buffer = map.buffer;
|
||||
const VkBuffer dst_buffer = *buffer;
|
||||
scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
// TODO: Barriers
|
||||
@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
});
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const BufferImageCopy> copies) {
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
|
||||
scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
|
||||
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
|
||||
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
|
||||
vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
// TODO: Barriers
|
||||
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkImageMemoryBarrier image_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkMemoryBarrier memory_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
||||
.pAttachments = attachments.data(),
|
||||
.width = key.size.width,
|
||||
.height = key.size.height,
|
||||
.layers = static_cast<u32>(num_layers),
|
||||
.layers = static_cast<u32>(std::max(num_layers, 1)),
|
||||
});
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <compare>
|
||||
#include <span>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ImageBufferMap {
|
||||
[[nodiscard]] VkBuffer Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
VkBuffer handle;
|
||||
std::span<u8> span;
|
||||
};
|
||||
|
||||
struct TextureCacheRuntime {
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
@ -76,9 +64,9 @@ struct TextureCacheRuntime {
|
||||
|
||||
void Finish();
|
||||
|
||||
[[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
@ -94,7 +82,7 @@ struct TextureCacheRuntime {
|
||||
return false;
|
||||
}
|
||||
|
||||
void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
|
||||
void AccelerateImageUpload(Image&, const StagingBufferRef&,
|
||||
std::span<const VideoCommon::SwizzleParameters>) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@ -112,13 +100,12 @@ public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void UploadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void DownloadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
[[nodiscard]] VkImage Handle() const noexcept {
|
||||
|
@ -9,16 +9,7 @@
|
||||
#include <shared_mutex>
|
||||
#include <thread>
|
||||
|
||||
// This header includes both Vulkan and OpenGL headers, this has to be fixed
|
||||
// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
|
||||
// Forcefully include glad early and undefine macros
|
||||
#include <glad/glad.h>
|
||||
#ifdef CreateEvent
|
||||
#undef CreateEvent
|
||||
#endif
|
||||
#ifdef CreateSemaphore
|
||||
#undef CreateSemaphore
|
||||
#endif
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
|
@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
case SystemVariable::InvocationId:
|
||||
return Operation(OperationCode::InvocationId);
|
||||
case SystemVariable::Ydirection:
|
||||
uses_y_negate = true;
|
||||
return Operation(OperationCode::YNegate);
|
||||
case SystemVariable::InvocationInfo:
|
||||
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
|
||||
|
@ -139,6 +139,10 @@ public:
|
||||
return uses_legacy_varyings;
|
||||
}
|
||||
|
||||
bool UsesYNegate() const {
|
||||
return uses_y_negate;
|
||||
}
|
||||
|
||||
bool UsesWarps() const {
|
||||
return uses_warps;
|
||||
}
|
||||
@ -465,6 +469,7 @@ private:
|
||||
bool uses_instance_id{};
|
||||
bool uses_vertex_id{};
|
||||
bool uses_legacy_varyings{};
|
||||
bool uses_y_negate{};
|
||||
bool uses_warps{};
|
||||
bool uses_indexed_samplers{};
|
||||
|
||||
|
@ -103,9 +103,6 @@ public:
|
||||
/// Notify the cache that a new frame has been queued
|
||||
void TickFrame();
|
||||
|
||||
/// Return an unique mutually exclusive lock for the cache
|
||||
[[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
|
||||
|
||||
/// Return a constant reference to the given image view id
|
||||
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
|
||||
|
||||
@ -179,6 +176,8 @@ public:
|
||||
/// Return true when a CPU region is modified from the GPU
|
||||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||
|
||||
std::mutex mutex;
|
||||
|
||||
private:
|
||||
/// Iterate over all page indices in a range
|
||||
template <typename Func>
|
||||
@ -212,8 +211,8 @@ private:
|
||||
void RefreshContents(Image& image);
|
||||
|
||||
/// Upload data from guest to an image
|
||||
template <typename MapBuffer>
|
||||
void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
|
||||
template <typename StagingBuffer>
|
||||
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
|
||||
|
||||
/// Find or create an image view from a guest descriptor
|
||||
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
|
||||
@ -325,8 +324,6 @@ private:
|
||||
|
||||
RenderTargets render_targets;
|
||||
|
||||
std::mutex mutex;
|
||||
|
||||
std::unordered_map<TICEntry, ImageViewId> image_views;
|
||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
||||
@ -385,11 +382,6 @@ void TextureCache<P>::TickFrame() {
|
||||
++frame_tick;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
|
||||
return std::unique_lock{mutex};
|
||||
}
|
||||
|
||||
template <class P>
|
||||
const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
|
||||
return slot_image_views[id];
|
||||
@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||
});
|
||||
for (const ImageId image_id : images) {
|
||||
Image& image = slot_images[image_id];
|
||||
auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
|
||||
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(map, 0, copies);
|
||||
image.DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
}
|
||||
}
|
||||
|
||||
@ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() {
|
||||
for (const ImageId image_id : download_ids) {
|
||||
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
|
||||
}
|
||||
auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
|
||||
size_t buffer_offset = 0;
|
||||
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||
const size_t original_offset = download_map.offset;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
Image& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(download_map, buffer_offset, copies);
|
||||
buffer_offset += image.unswizzled_size_bytes;
|
||||
image.DownloadMemory(download_map, copies);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
}
|
||||
// Wait for downloads to finish
|
||||
runtime.Finish();
|
||||
|
||||
buffer_offset = 0;
|
||||
const std::span<u8> download_span = download_map.Span();
|
||||
download_map.offset = original_offset;
|
||||
std::span<u8> download_span = download_map.mapped_span;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
const ImageBase& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
|
||||
buffer_offset += image.unswizzled_size_bytes;
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
download_span = download_span.subspan(image.unswizzled_size_bytes);
|
||||
}
|
||||
committed_downloads.pop();
|
||||
}
|
||||
@ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
||||
return;
|
||||
}
|
||||
auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
|
||||
UploadImageContents(image, map, 0);
|
||||
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
||||
UploadImageContents(image, staging);
|
||||
runtime.InsertUploadMemoryBarrier();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
template <typename MapBuffer>
|
||||
void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
|
||||
const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
|
||||
template <typename StagingBuffer>
|
||||
void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
|
||||
const std::span<u8> mapped_span = staging.mapped_span;
|
||||
const GPUVAddr gpu_addr = image.gpu_addr;
|
||||
|
||||
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
|
||||
const auto uploads = FullUploadSwizzles(image.info);
|
||||
runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
|
||||
runtime.AccelerateImageUpload(image, staging, uploads);
|
||||
} else if (True(image.flags & ImageFlagBits::Converted)) {
|
||||
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
|
||||
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
|
||||
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
|
||||
image.UploadMemory(map, buffer_offset, copies);
|
||||
image.UploadMemory(staging, copies);
|
||||
} else if (image.info.type == ImageType::Buffer) {
|
||||
const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
|
||||
image.UploadMemory(map, buffer_offset, copies);
|
||||
image.UploadMemory(staging, copies);
|
||||
} else {
|
||||
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
|
||||
image.UploadMemory(map, buffer_offset, copies);
|
||||
image.UploadMemory(staging, copies);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,19 +38,18 @@ namespace VideoCore {
|
||||
|
||||
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
|
||||
const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
|
||||
std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
|
||||
system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
|
||||
|
||||
const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
|
||||
auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
|
||||
auto context = emu_window.CreateSharedContext();
|
||||
const auto scope = context->Acquire();
|
||||
|
||||
auto scope = context->Acquire();
|
||||
try {
|
||||
auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
|
||||
if (!renderer->Init()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
gpu->BindRenderer(std::move(renderer));
|
||||
return gpu;
|
||||
} catch (const std::runtime_error& exception) {
|
||||
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
|
||||
|
@ -18,27 +18,22 @@
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace Alternatives {
|
||||
|
||||
constexpr std::array Depth24UnormS8_UINT{
|
||||
constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
VkFormat{},
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
constexpr std::array Depth16UnormS8_UINT{
|
||||
constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VkFormat{},
|
||||
VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
constexpr std::array REQUIRED_EXTENSIONS{
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
|
||||
VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
|
||||
VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
|
||||
@ -52,6 +47,12 @@ constexpr std::array REQUIRED_EXTENSIONS{
|
||||
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
|
||||
#ifdef _WIN32
|
||||
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
@ -63,9 +64,9 @@ void SetNext(void**& next, T& data) {
|
||||
constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
|
||||
switch (format) {
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
return Alternatives::Depth24UnormS8_UINT.data();
|
||||
return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
|
||||
case VK_FORMAT_D16_UNORM_S8_UINT:
|
||||
return Alternatives::Depth16UnormS8_UINT.data();
|
||||
return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@ -195,23 +196,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
const vk::InstanceDispatch& dld_)
|
||||
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
|
||||
format_properties{GetFormatProperties(physical)} {
|
||||
CheckSuitability();
|
||||
CheckSuitability(surface != nullptr);
|
||||
SetupFamilies(surface);
|
||||
SetupFeatures();
|
||||
|
||||
const auto queue_cis = GetDeviceQueueCreateInfos();
|
||||
const std::vector extensions = LoadExtensions();
|
||||
const std::vector extensions = LoadExtensions(surface != nullptr);
|
||||
|
||||
VkPhysicalDeviceFeatures2 features2{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
||||
.pNext = nullptr,
|
||||
.features{},
|
||||
};
|
||||
const void* first_next = &features2;
|
||||
void** next = &features2.pNext;
|
||||
|
||||
features2.features = {
|
||||
.robustBufferAccess = false,
|
||||
.features{
|
||||
.robustBufferAccess = true,
|
||||
.fullDrawIndexUint32 = false,
|
||||
.imageCubeArray = true,
|
||||
.independentBlend = true,
|
||||
@ -266,7 +262,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
.sparseResidencyAliased = false,
|
||||
.variableMultisampleRate = false,
|
||||
.inheritedQueries = false,
|
||||
},
|
||||
};
|
||||
const void* first_next = &features2;
|
||||
void** next = &features2.pNext;
|
||||
|
||||
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
@ -384,7 +384,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
robustness2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.robustBufferAccess2 = false,
|
||||
.robustBufferAccess2 = true,
|
||||
.robustImageAccess2 = true,
|
||||
.nullDescriptor = true,
|
||||
};
|
||||
@ -535,16 +535,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
|
||||
return (supported_usage & wanted_usage) == wanted_usage;
|
||||
}
|
||||
|
||||
void Device::CheckSuitability() const {
|
||||
void Device::CheckSuitability(bool requires_swapchain) const {
|
||||
std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
|
||||
bool has_swapchain = false;
|
||||
for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
|
||||
for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
|
||||
const std::string_view name{property.extensionName};
|
||||
for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
|
||||
if (available_extensions[i]) {
|
||||
continue;
|
||||
}
|
||||
const std::string_view name{property.extensionName};
|
||||
available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
|
||||
}
|
||||
has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME;
|
||||
}
|
||||
for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
|
||||
if (available_extensions[i]) {
|
||||
@ -553,6 +555,11 @@ void Device::CheckSuitability() const {
|
||||
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
|
||||
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
|
||||
}
|
||||
if (requires_swapchain && !has_swapchain) {
|
||||
LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain");
|
||||
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
|
||||
}
|
||||
|
||||
struct LimitTuple {
|
||||
u32 minimum;
|
||||
u32 value;
|
||||
@ -574,7 +581,9 @@ void Device::CheckSuitability() const {
|
||||
}
|
||||
const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
|
||||
const std::array feature_report{
|
||||
std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
|
||||
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
|
||||
std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
|
||||
std::make_pair(features.imageCubeArray, "imageCubeArray"),
|
||||
std::make_pair(features.independentBlend, "independentBlend"),
|
||||
std::make_pair(features.depthClamp, "depthClamp"),
|
||||
@ -599,10 +608,13 @@ void Device::CheckSuitability() const {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> Device::LoadExtensions() {
|
||||
std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
|
||||
extensions.reserve(8 + REQUIRED_EXTENSIONS.size());
|
||||
extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
|
||||
if (requires_surface) {
|
||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
bool has_khr_shader_float16_int8{};
|
||||
bool has_ext_subgroup_size_control{};
|
||||
@ -743,7 +755,8 @@ std::vector<const char*> Device::LoadExtensions() {
|
||||
robustness2.pNext = nullptr;
|
||||
features.pNext = &robustness2;
|
||||
physical.GetFeatures2KHR(features);
|
||||
if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
|
||||
if (robustness2.nullDescriptor && robustness2.robustBufferAccess2 &&
|
||||
robustness2.robustImageAccess2) {
|
||||
extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
ext_robustness2 = true;
|
||||
}
|
||||
|
@ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer };
|
||||
const u32 GuestWarpSize = 32;
|
||||
|
||||
/// Handles data specific to a physical device.
|
||||
class Device final {
|
||||
class Device {
|
||||
public:
|
||||
explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
|
||||
const vk::InstanceDispatch& dld);
|
||||
@ -227,10 +227,10 @@ public:
|
||||
|
||||
private:
|
||||
/// Checks if the physical device is suitable.
|
||||
void CheckSuitability() const;
|
||||
void CheckSuitability(bool requires_swapchain) const;
|
||||
|
||||
/// Loads extensions into a vector and stores available ones in this object.
|
||||
std::vector<const char*> LoadExtensions();
|
||||
std::vector<const char*> LoadExtensions(bool requires_surface);
|
||||
|
||||
/// Sets up queue families.
|
||||
void SetupFamilies(VkSurfaceKHR surface);
|
||||
|
@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <future>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
@ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
|
||||
VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
|
||||
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
|
||||
}
|
||||
vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld);
|
||||
vk::Instance instance =
|
||||
std::async([&] {
|
||||
return vk::Instance::Create(required_version, layers, extensions, dld);
|
||||
}).get();
|
||||
if (!vk::Load(*instance, dld)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
|
||||
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
@ -55,10 +57,24 @@ struct Range {
|
||||
|
||||
class MemoryAllocation {
|
||||
public:
|
||||
explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
|
||||
VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
|
||||
: device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
|
||||
property_flags{properties}, shifted_memory_type{1U << type} {}
|
||||
explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
|
||||
u64 allocation_size_, u32 type)
|
||||
: memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
|
||||
shifted_memory_type{1U << type} {}
|
||||
|
||||
#if defined(_WIN32) || defined(__linux__)
|
||||
~MemoryAllocation() {
|
||||
if (owning_opengl_handle != 0) {
|
||||
glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
MemoryAllocation& operator=(const MemoryAllocation&) = delete;
|
||||
MemoryAllocation(const MemoryAllocation&) = delete;
|
||||
|
||||
MemoryAllocation& operator=(MemoryAllocation&&) = delete;
|
||||
MemoryAllocation(MemoryAllocation&&) = delete;
|
||||
|
||||
[[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
|
||||
const std::optional<u64> alloc = FindFreeRegion(size, alignment);
|
||||
@ -88,6 +104,31 @@ public:
|
||||
return memory_mapped_span;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
[[nodiscard]] u32 ExportOpenGLHandle() {
|
||||
if (!owning_opengl_handle) {
|
||||
glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
|
||||
glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
|
||||
GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
|
||||
memory.GetMemoryWin32HandleKHR());
|
||||
}
|
||||
return owning_opengl_handle;
|
||||
}
|
||||
#elif __linux__
|
||||
[[nodiscard]] u32 ExportOpenGLHandle() {
|
||||
if (!owning_opengl_handle) {
|
||||
glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
|
||||
glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
|
||||
memory.GetMemoryFdKHR());
|
||||
}
|
||||
return owning_opengl_handle;
|
||||
}
|
||||
#else
|
||||
[[nodiscard]] u32 ExportOpenGLHandle() {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Returns whether this allocation is compatible with the arguments.
|
||||
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
|
||||
return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
|
||||
@ -118,13 +159,15 @@ private:
|
||||
return candidate;
|
||||
}
|
||||
|
||||
const Device& device; ///< Vulkan device.
|
||||
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
||||
const u64 allocation_size; ///< Size of this allocation.
|
||||
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
|
||||
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
|
||||
std::vector<Range> commits; ///< All commit ranges done from this allocation.
|
||||
std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
|
||||
#if defined(_WIN32) || defined(__linux__)
|
||||
u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
|
||||
#endif
|
||||
};
|
||||
|
||||
MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
|
||||
@ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() {
|
||||
return span;
|
||||
}
|
||||
|
||||
u32 MemoryCommit::ExportOpenGLHandle() const {
|
||||
return allocation->ExportOpenGLHandle();
|
||||
}
|
||||
|
||||
void MemoryCommit::Release() {
|
||||
if (allocation) {
|
||||
allocation->Free(begin);
|
||||
}
|
||||
}
|
||||
|
||||
MemoryAllocator::MemoryAllocator(const Device& device_)
|
||||
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
|
||||
MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
|
||||
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
|
||||
export_allocations{export_allocations_} {}
|
||||
|
||||
MemoryAllocator::~MemoryAllocator() = default;
|
||||
|
||||
@ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage)
|
||||
|
||||
void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
|
||||
const u32 type = FindType(flags, type_mask).value();
|
||||
const VkExportMemoryAllocateInfo export_allocate_info{
|
||||
.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
#ifdef _WIN32
|
||||
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
|
||||
#elif __linux__
|
||||
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
#else
|
||||
.handleTypes = 0,
|
||||
#endif
|
||||
};
|
||||
vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.pNext = export_allocations ? &export_allocate_info : nullptr,
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = type,
|
||||
});
|
||||
allocations.push_back(
|
||||
std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type));
|
||||
allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
|
||||
}
|
||||
|
||||
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
|
||||
|
@ -43,6 +43,9 @@ public:
|
||||
/// It will map the backing allocation if it hasn't been mapped before.
|
||||
std::span<u8> Map();
|
||||
|
||||
/// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
|
||||
u32 ExportOpenGLHandle() const;
|
||||
|
||||
/// Returns the Vulkan memory handler.
|
||||
VkDeviceMemory Memory() const {
|
||||
return memory;
|
||||
@ -67,7 +70,15 @@ private:
|
||||
/// Allocates and releases memory allocations on demand.
|
||||
class MemoryAllocator {
|
||||
public:
|
||||
explicit MemoryAllocator(const Device& device_);
|
||||
/**
|
||||
* Construct memory allocator
|
||||
*
|
||||
* @param device_ Device to allocate from
|
||||
* @param export_allocations_ True when allocations have to be exported
|
||||
*
|
||||
* @throw vk::Exception on failure
|
||||
*/
|
||||
explicit MemoryAllocator(const Device& device_, bool export_allocations_);
|
||||
~MemoryAllocator();
|
||||
|
||||
MemoryAllocator& operator=(const MemoryAllocator&) = delete;
|
||||
@ -108,6 +119,7 @@ private:
|
||||
|
||||
const Device& device; ///< Device handle.
|
||||
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
|
||||
const bool export_allocations; ///< True when memory allocations have to be exported.
|
||||
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
|
||||
};
|
||||
|
||||
|
@ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||
X(vkFreeCommandBuffers);
|
||||
X(vkFreeDescriptorSets);
|
||||
X(vkFreeMemory);
|
||||
X(vkGetBufferMemoryRequirements);
|
||||
X(vkGetBufferMemoryRequirements2);
|
||||
X(vkGetDeviceQueue);
|
||||
X(vkGetEventStatus);
|
||||
X(vkGetFenceStatus);
|
||||
X(vkGetImageMemoryRequirements);
|
||||
X(vkGetMemoryFdKHR);
|
||||
#ifdef _WIN32
|
||||
X(vkGetMemoryWin32HandleKHR);
|
||||
#endif
|
||||
X(vkGetQueryPoolResults);
|
||||
X(vkGetSemaphoreCounterValueKHR);
|
||||
X(vkMapMemory);
|
||||
@ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
|
||||
}
|
||||
|
||||
int DeviceMemory::GetMemoryFdKHR() const {
|
||||
const VkMemoryGetFdInfoKHR get_fd_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.memory = handle,
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
|
||||
};
|
||||
int fd;
|
||||
Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd));
|
||||
return fd;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const {
|
||||
const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.memory = handle,
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
|
||||
};
|
||||
HANDLE win32_handle;
|
||||
Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle));
|
||||
return win32_handle;
|
||||
}
|
||||
#endif
|
||||
|
||||
void DeviceMemory::SetObjectNameEXT(const char* name) const {
|
||||
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
|
||||
}
|
||||
@ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
|
||||
return DeviceMemory(memory, handle, *dld);
|
||||
}
|
||||
|
||||
VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
|
||||
VkMemoryRequirements requirements;
|
||||
dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
|
||||
return requirements;
|
||||
VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer,
|
||||
void* pnext) const noexcept {
|
||||
const VkBufferMemoryRequirementsInfo2 info{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
|
||||
.pNext = nullptr,
|
||||
.buffer = buffer,
|
||||
};
|
||||
VkMemoryRequirements2 requirements{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
|
||||
.pNext = pnext,
|
||||
.memoryRequirements{},
|
||||
};
|
||||
dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements);
|
||||
return requirements.memoryRequirements;
|
||||
}
|
||||
|
||||
VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
|
||||
|
@ -15,8 +15,19 @@
|
||||
#include <vector>
|
||||
|
||||
#define VK_NO_PROTOTYPES
|
||||
#ifdef _WIN32
|
||||
#define VK_USE_PLATFORM_WIN32_KHR
|
||||
#endif
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
// Sanitize macros
|
||||
#ifdef CreateEvent
|
||||
#undef CreateEvent
|
||||
#endif
|
||||
#ifdef CreateSemaphore
|
||||
#undef CreateSemaphore
|
||||
#endif
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
@ -174,7 +185,7 @@ struct InstanceDispatch {
|
||||
};
|
||||
|
||||
/// Table holding Vulkan device function pointers.
|
||||
struct DeviceDispatch : public InstanceDispatch {
|
||||
struct DeviceDispatch : InstanceDispatch {
|
||||
PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{};
|
||||
PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{};
|
||||
PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{};
|
||||
@ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch {
|
||||
PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
|
||||
PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
|
||||
PFN_vkFreeMemory vkFreeMemory{};
|
||||
PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{};
|
||||
PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{};
|
||||
PFN_vkGetDeviceQueue vkGetDeviceQueue{};
|
||||
PFN_vkGetEventStatus vkGetEventStatus{};
|
||||
PFN_vkGetFenceStatus vkGetFenceStatus{};
|
||||
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
|
||||
PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
|
||||
#ifdef _WIN32
|
||||
PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
|
||||
#endif
|
||||
PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
|
||||
PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
|
||||
PFN_vkMapMemory vkMapMemory{};
|
||||
@ -344,6 +359,9 @@ public:
|
||||
/// Construct an empty handle.
|
||||
Handle() = default;
|
||||
|
||||
/// Construct an empty handle.
|
||||
Handle(std::nullptr_t) {}
|
||||
|
||||
/// Copying Vulkan objects is not supported and will never be.
|
||||
Handle(const Handle&) = delete;
|
||||
Handle& operator=(const Handle&) = delete;
|
||||
@ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
|
||||
using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
|
||||
|
||||
public:
|
||||
int GetMemoryFdKHR() const;
|
||||
|
||||
#ifdef _WIN32
|
||||
HANDLE GetMemoryWin32HandleKHR() const;
|
||||
#endif
|
||||
|
||||
/// Set object name.
|
||||
void SetObjectNameEXT(const char* name) const;
|
||||
|
||||
@ -847,7 +871,8 @@ public:
|
||||
|
||||
DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
|
||||
|
||||
VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept;
|
||||
VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer,
|
||||
void* pnext = nullptr) const noexcept;
|
||||
|
||||
VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
|
||||
|
||||
@ -1031,6 +1056,12 @@ public:
|
||||
PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {});
|
||||
}
|
||||
|
||||
void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
|
||||
VkDependencyFlags dependency_flags,
|
||||
const VkMemoryBarrier& memory_barrier) const noexcept {
|
||||
PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {});
|
||||
}
|
||||
|
||||
void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
|
||||
VkDependencyFlags dependency_flags,
|
||||
const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
|
||||
|
@ -64,7 +64,7 @@ void EmuThread::run() {
|
||||
|
||||
emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
|
||||
|
||||
system.Renderer().Rasterizer().LoadDiskResources(
|
||||
system.Renderer().ReadRasterizer()->LoadDiskResources(
|
||||
system.CurrentProcess()->GetTitleID(), stop_run,
|
||||
[this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
|
||||
emit LoadProgress(stage, value, total);
|
||||
|
@ -782,14 +782,14 @@ void Config::ReadRendererValues() {
|
||||
ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100);
|
||||
ReadSettingGlobal(Settings::values.use_disk_shader_cache,
|
||||
QStringLiteral("use_disk_shader_cache"), true);
|
||||
ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0);
|
||||
ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1);
|
||||
ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
|
||||
QStringLiteral("use_asynchronous_gpu_emulation"), true);
|
||||
ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
|
||||
true);
|
||||
ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
|
||||
ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
|
||||
true);
|
||||
false);
|
||||
ReadSettingGlobal(Settings::values.use_asynchronous_shaders,
|
||||
QStringLiteral("use_asynchronous_shaders"), false);
|
||||
ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
|
||||
@ -1351,14 +1351,14 @@ void Config::SaveRendererValues() {
|
||||
Settings::values.use_disk_shader_cache, true);
|
||||
WriteSettingGlobal(QStringLiteral("gpu_accuracy"),
|
||||
static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)),
|
||||
Settings::values.gpu_accuracy.UsingGlobal(), 0);
|
||||
Settings::values.gpu_accuracy.UsingGlobal(), 1);
|
||||
WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
|
||||
Settings::values.use_asynchronous_gpu_emulation, true);
|
||||
WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
|
||||
true);
|
||||
WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
|
||||
WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
|
||||
Settings::values.use_assembly_shaders, true);
|
||||
Settings::values.use_assembly_shaders, false);
|
||||
WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"),
|
||||
Settings::values.use_asynchronous_shaders, false);
|
||||
WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
|
||||
|
@ -2,6 +2,9 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Include this early to include Vulkan headers how we want to
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
#include <QColorDialog>
|
||||
#include <QComboBox>
|
||||
#include <QVulkanInstance>
|
||||
@ -11,7 +14,8 @@
|
||||
#include "core/core.h"
|
||||
#include "core/settings.h"
|
||||
#include "ui_configure_graphics.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "yuzu/configuration/configuration_shared.h"
|
||||
#include "yuzu/configuration/configure_graphics.h"
|
||||
|
||||
@ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
|
||||
ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
|
||||
}
|
||||
|
||||
void ConfigureGraphics::RetrieveVulkanDevices() {
|
||||
void ConfigureGraphics::RetrieveVulkanDevices() try {
|
||||
using namespace Vulkan;
|
||||
|
||||
vk::InstanceDispatch dld;
|
||||
const Common::DynamicLibrary library = OpenLibrary();
|
||||
const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
|
||||
const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
|
||||
|
||||
vulkan_devices.clear();
|
||||
for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
|
||||
vulkan_devices.reserve(physical_devices.size());
|
||||
for (const VkPhysicalDevice device : physical_devices) {
|
||||
const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName;
|
||||
vulkan_devices.push_back(QString::fromStdString(name));
|
||||
}
|
||||
|
||||
} catch (const Vulkan::vk::Exception& exception) {
|
||||
LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
|
||||
}
|
||||
|
||||
Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
|
||||
|
@ -388,7 +388,7 @@ void Config::ReadValues() {
|
||||
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)));
|
||||
Settings::values.use_disk_shader_cache.SetValue(
|
||||
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
|
||||
const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
|
||||
const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1);
|
||||
Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
|
||||
Settings::values.use_asynchronous_gpu_emulation.SetValue(
|
||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true));
|
||||
|
@ -215,7 +215,7 @@ int main(int argc, char** argv) {
|
||||
// Core is loaded, start the GPU (makes the GPU contexts current to this thread)
|
||||
system.GPU().Start();
|
||||
|
||||
system.Renderer().Rasterizer().LoadDiskResources(
|
||||
system.Renderer().ReadRasterizer()->LoadDiskResources(
|
||||
system.CurrentProcess()->GetTitleID(), false,
|
||||
[](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user