diff options
| author | 2020-01-03 16:16:29 -0400 | |
|---|---|---|
| committer | 2020-01-24 16:43:29 -0400 | |
| commit | c921e496eb47de49a4d6ce62527581b966dca259 (patch) | |
| tree | 788c71599f0abf53b479bd3f2f3ea730fc9c35c4 /src | |
| parent | Merge pull request #3273 from FernandoS27/txd-array (diff) | |
| download | yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.gz yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.xz yuzu-c921e496eb47de49a4d6ce62527581b966dca259.zip | |
GPU: Implement guest driver profile and deduce texture handler sizes.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/engines/const_buffer_engine_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 2 | ||||
| -rw-r--r-- | src/video_core/guest_driver.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/guest_driver.h | 37 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 8 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 8 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 1 |
13 files changed, 127 insertions, 0 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ccfed4f2e..04a25da4f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -29,6 +29,8 @@ add_library(video_core STATIC | |||
| 29 | gpu_synch.h | 29 | gpu_synch.h |
| 30 | gpu_thread.cpp | 30 | gpu_thread.cpp |
| 31 | gpu_thread.h | 31 | gpu_thread.h |
| 32 | guest_driver.cpp | ||
| 33 | guest_driver.h | ||
| 32 | macro_interpreter.cpp | 34 | macro_interpreter.cpp |
| 33 | macro_interpreter.h | 35 | macro_interpreter.h |
| 34 | memory_manager.cpp | 36 | memory_manager.cpp |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index 44b8b8d22..c29156e34 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/shader_bytecode.h" | 10 | #include "video_core/engines/shader_bytecode.h" |
| 11 | #include "video_core/engines/shader_type.h" | 11 | #include "video_core/engines/shader_type.h" |
| 12 | #include "video_core/guest_driver.h" | ||
| 12 | #include "video_core/textures/texture.h" | 13 | #include "video_core/textures/texture.h" |
| 13 | 14 | ||
| 14 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| @@ -106,6 +107,8 @@ public: | |||
| 106 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 107 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 107 | u64 offset) const = 0; | 108 | u64 offset) const = 0; |
| 108 | virtual u32 GetBoundBuffer() const = 0; | 109 | virtual u32 GetBoundBuffer() const = 0; |
| 110 | |||
| 111 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; | ||
| 109 | }; | 112 | }; |
| 110 | 113 | ||
| 111 | } // namespace Tegra::Engines | 114 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 110406f2f..f177ae938 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 94 | return result; | 94 | return result; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { | ||
| 98 | return rasterizer.AccessGuestDriverProfile(); | ||
| 99 | } | ||
| 100 | |||
| 97 | void KeplerCompute::ProcessLaunch() { | 101 | void KeplerCompute::ProcessLaunch() { |
| 98 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 102 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 99 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 103 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 4ef3e0613..99c82a9af 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -218,6 +218,8 @@ public: | |||
| 218 | return regs.tex_cb_index; | 218 | return regs.tex_cb_index; |
| 219 | } | 219 | } |
| 220 | 220 | ||
| 221 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 222 | |||
| 221 | private: | 223 | private: |
| 222 | Core::System& system; | 224 | Core::System& system; |
| 223 | VideoCore::RasterizerInterface& rasterizer; | 225 | VideoCore::RasterizerInterface& rasterizer; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 58dfa8033..8167864c0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 784 | return result; | 784 | return result; |
| 785 | } | 785 | } |
| 786 | 786 | ||
| 787 | VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { | ||
| 788 | return rasterizer.AccessGuestDriverProfile(); | ||
| 789 | } | ||
| 790 | |||
| 787 | } // namespace Tegra::Engines | 791 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ee79260fc..08ef95410 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1306,6 +1306,8 @@ public: | |||
| 1306 | return regs.tex_cb_index; | 1306 | return regs.tex_cb_index; |
| 1307 | } | 1307 | } |
| 1308 | 1308 | ||
| 1309 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 1310 | |||
| 1309 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | 1311 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than |
| 1310 | /// we've seen used. | 1312 | /// we've seen used. |
| 1311 | using MacroMemory = std::array<u32, 0x40000>; | 1313 | using MacroMemory = std::array<u32, 0x40000>; |
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp new file mode 100644 index 000000000..b1ac254ff --- /dev/null +++ b/src/video_core/guest_driver.cpp | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/guest_driver.h" | ||
| 6 | |||
| 7 | namespace VideoCore { | ||
| 8 | |||
| 9 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { | ||
| 10 | if (texture_handler_size_deduced) { | ||
| 11 | return; | ||
| 12 | } | ||
| 13 | std::size_t size = bound_offsets.size(); | ||
| 14 | if (size < 2) { | ||
| 15 | return; | ||
| 16 | } | ||
| 17 | std::sort(bound_offsets.begin(), bound_offsets.end(), | ||
| 18 | [](const u32& a, const u32& b) { return a < b; }); | ||
| 19 | u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer; | ||
| 20 | for (std::size_t i = 1; i < size; i++) { | ||
| 21 | if (bound_offsets[i] == bound_offsets[i - 1]) { | ||
| 22 | continue; | ||
| 23 | } | ||
| 24 | const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; | ||
| 25 | min_val = std::min(min_val, new_min); | ||
| 26 | } | ||
| 27 | if (min_val > 2) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | texture_handler_size_deduced = true; | ||
| 31 | texture_handler_size = sizeof(u32) * min_val; | ||
| 32 | } | ||
| 33 | |||
| 34 | } // namespace VideoCore | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h new file mode 100644 index 000000000..f64f043af --- /dev/null +++ b/src/video_core/guest_driver.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace VideoCore { | ||
| 12 | |||
| 13 | /** | ||
| 14 | * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect | ||
| 15 | * information necessary for impossible to avoid HLE methods like shader tracks. | ||
| 16 | */ | ||
| 17 | class GuestDriverProfile { | ||
| 18 | public: | ||
| 19 | u32 GetTextureHandlerSize() const { | ||
| 20 | return texture_handler_size; | ||
| 21 | } | ||
| 22 | |||
| 23 | bool TextureHandlerSizeKnown() const { | ||
| 24 | return texture_handler_size_deduced; | ||
| 25 | } | ||
| 26 | |||
| 27 | void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); | ||
| 28 | |||
| 29 | private: | ||
| 30 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily | ||
| 31 | // use 4 bytes instead. Thus, certain drivers may squish the size. | ||
| 32 | static constexpr u32 default_texture_handler_size = 8; | ||
| 33 | u32 texture_handler_size{default_texture_handler_size}; | ||
| 34 | bool texture_handler_size_deduced{}; | ||
| 35 | }; | ||
| 36 | |||
| 37 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5b0eca9e2..149f79af3 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/fermi_2d.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 12 | #include "video_core/guest_driver.h" | ||
| 12 | 13 | ||
| 13 | namespace Tegra { | 14 | namespace Tegra { |
| 14 | class MemoryManager; | 15 | class MemoryManager; |
| @@ -78,5 +79,12 @@ public: | |||
| 78 | /// Initialize disk cached resources for the game being emulated | 79 | /// Initialize disk cached resources for the game being emulated |
| 79 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, | 80 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, |
| 80 | const DiskResourceLoadCallback& callback = {}) {} | 81 | const DiskResourceLoadCallback& callback = {}) {} |
| 82 | |||
| 83 | GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 84 | return guest_driver_profile; | ||
| 85 | } | ||
| 86 | |||
| 87 | private: | ||
| 88 | GuestDriverProfile guest_driver_profile{}; | ||
| 81 | }; | 89 | }; |
| 82 | } // namespace VideoCore | 90 | } // namespace VideoCore |
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..78d9d7037 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/hash.h" | 10 | #include "common/hash.h" |
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | 11 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 12 | #include "video_core/engines/shader_type.h" | 12 | #include "video_core/engines/shader_type.h" |
| 13 | #include "video_core/guest_driver.h" | ||
| 13 | 14 | ||
| 14 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 15 | 16 | ||
| @@ -71,6 +72,13 @@ public: | |||
| 71 | return bindless_samplers; | 72 | return bindless_samplers; |
| 72 | } | 73 | } |
| 73 | 74 | ||
| 75 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() { | ||
| 76 | if (engine) { | ||
| 77 | return &(engine->AccessGuestDriverProfile()); | ||
| 78 | } | ||
| 79 | return nullptr; | ||
| 80 | } | ||
| 81 | |||
| 74 | private: | 82 | private: |
| 75 | const Tegra::Engines::ShaderType stage; | 83 | const Tegra::Engines::ShaderType stage; |
| 76 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | 84 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..aed35a9b8 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 315 | return pc + 1; | 315 | return pc + 1; |
| 316 | } | 316 | } |
| 317 | 317 | ||
| 318 | void ShaderIR::PostDecode() { | ||
| 319 | // Deduce texture handler size if needed | ||
| 320 | auto* gpu_driver = locker.AccessGuestDriverProfile(); | ||
| 321 | if (gpu_driver) { | ||
| 322 | if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) { | ||
| 323 | u32 count{}; | ||
| 324 | std::vector<u32> bound_offsets; | ||
| 325 | for (const auto& sampler : used_samplers) { | ||
| 326 | if (sampler.IsBindless()) { | ||
| 327 | continue; | ||
| 328 | } | ||
| 329 | count++; | ||
| 330 | bound_offsets.emplace_back(sampler.GetOffset()); | ||
| 331 | } | ||
| 332 | if (count > 1) { | ||
| 333 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | ||
| 334 | } | ||
| 335 | } | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 318 | } // namespace VideoCommon::Shader | 339 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..a186e22b2 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet | |||
| 27 | ConstBufferLocker& locker) | 27 | ConstBufferLocker& locker) |
| 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { |
| 29 | Decode(); | 29 | Decode(); |
| 30 | PostDecode(); | ||
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | ShaderIR::~ShaderIR() = default; | 33 | ShaderIR::~ShaderIR() = default; |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..92c24247d 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -191,6 +191,7 @@ private: | |||
| 191 | }; | 191 | }; |
| 192 | 192 | ||
| 193 | void Decode(); | 193 | void Decode(); |
| 194 | void PostDecode(); | ||
| 194 | 195 | ||
| 195 | NodeBlock DecodeRange(u32 begin, u32 end); | 196 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 196 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | 197 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); |