summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2020-01-03 16:16:29 -0400
committerGravatar FernandoS272020-01-24 16:43:29 -0400
commitc921e496eb47de49a4d6ce62527581b966dca259 (patch)
tree788c71599f0abf53b479bd3f2f3ea730fc9c35c4 /src
parentMerge pull request #3273 from FernandoS27/txd-array (diff)
downloadyuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.gz
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.xz
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.zip
GPU: Implement guest driver profile and deduce texture handler sizes.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h3
-rw-r--r--src/video_core/engines/kepler_compute.cpp4
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/guest_driver.cpp34
-rw-r--r--src/video_core/guest_driver.h37
-rw-r--r--src/video_core/rasterizer_interface.h8
-rw-r--r--src/video_core/shader/const_buffer_locker.h8
-rw-r--r--src/video_core/shader/decode.cpp21
-rw-r--r--src/video_core/shader/shader_ir.cpp1
-rw-r--r--src/video_core/shader/shader_ir.h1
13 files changed, 127 insertions, 0 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ccfed4f2e..04a25da4f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,6 +29,8 @@ add_library(video_core STATIC
29 gpu_synch.h 29 gpu_synch.h
30 gpu_thread.cpp 30 gpu_thread.cpp
31 gpu_thread.h 31 gpu_thread.h
32 guest_driver.cpp
33 guest_driver.h
32 macro_interpreter.cpp 34 macro_interpreter.cpp
33 macro_interpreter.h 35 macro_interpreter.h
34 memory_manager.cpp 36 memory_manager.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index 44b8b8d22..c29156e34 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h" 10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/engines/shader_type.h" 11#include "video_core/engines/shader_type.h"
12#include "video_core/guest_driver.h"
12#include "video_core/textures/texture.h" 13#include "video_core/textures/texture.h"
13 14
14namespace Tegra::Engines { 15namespace Tegra::Engines {
@@ -106,6 +107,8 @@ public:
106 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 107 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
107 u64 offset) const = 0; 108 u64 offset) const = 0;
108 virtual u32 GetBoundBuffer() const = 0; 109 virtual u32 GetBoundBuffer() const = 0;
110
111 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
109}; 112};
110 113
111} // namespace Tegra::Engines 114} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 110406f2f..f177ae938 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
94 return result; 94 return result;
95} 95}
96 96
97VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
98 return rasterizer.AccessGuestDriverProfile();
99}
100
97void KeplerCompute::ProcessLaunch() { 101void KeplerCompute::ProcessLaunch() {
98 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 102 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
99 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 103 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 4ef3e0613..99c82a9af 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -218,6 +218,8 @@ public:
218 return regs.tex_cb_index; 218 return regs.tex_cb_index;
219 } 219 }
220 220
221 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
222
221private: 223private:
222 Core::System& system; 224 Core::System& system;
223 VideoCore::RasterizerInterface& rasterizer; 225 VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 58dfa8033..8167864c0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
784 return result; 784 return result;
785} 785}
786 786
787VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
788 return rasterizer.AccessGuestDriverProfile();
789}
790
787} // namespace Tegra::Engines 791} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ee79260fc..08ef95410 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1306,6 +1306,8 @@ public:
1306 return regs.tex_cb_index; 1306 return regs.tex_cb_index;
1307 } 1307 }
1308 1308
1309 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
1310
1309 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than 1311 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
1310 /// we've seen used. 1312 /// we've seen used.
1311 using MacroMemory = std::array<u32, 0x40000>; 1313 using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
new file mode 100644
index 000000000..b1ac254ff
--- /dev/null
+++ b/src/video_core/guest_driver.cpp
@@ -0,0 +1,34 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/guest_driver.h"
6
7namespace VideoCore {
8
9void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
10 if (texture_handler_size_deduced) {
11 return;
12 }
13 std::size_t size = bound_offsets.size();
14 if (size < 2) {
15 return;
16 }
17 std::sort(bound_offsets.begin(), bound_offsets.end(),
18 [](const u32& a, const u32& b) { return a < b; });
19 u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer;
20 for (std::size_t i = 1; i < size; i++) {
21 if (bound_offsets[i] == bound_offsets[i - 1]) {
22 continue;
23 }
24 const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
25 min_val = std::min(min_val, new_min);
26 }
27 if (min_val > 2) {
28 return;
29 }
30 texture_handler_size_deduced = true;
31 texture_handler_size = sizeof(u32) * min_val;
32}
33
34} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
new file mode 100644
index 000000000..f64f043af
--- /dev/null
+++ b/src/video_core/guest_driver.h
@@ -0,0 +1,37 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace VideoCore {
12
13/**
14 * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
15 * information necessary for impossible to avoid HLE methods like shader tracks.
16 */
17class GuestDriverProfile {
18public:
19 u32 GetTextureHandlerSize() const {
20 return texture_handler_size;
21 }
22
23 bool TextureHandlerSizeKnown() const {
24 return texture_handler_size_deduced;
25 }
26
27 void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
28
29private:
30 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
31 // use 4 bytes instead. Thus, certain drivers may squish the size.
32 static constexpr u32 default_texture_handler_size = 8;
33 u32 texture_handler_size{default_texture_handler_size};
34 bool texture_handler_size_deduced{};
35};
36
37} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5b0eca9e2..149f79af3 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
11#include "video_core/gpu.h" 11#include "video_core/gpu.h"
12#include "video_core/guest_driver.h"
12 13
13namespace Tegra { 14namespace Tegra {
14class MemoryManager; 15class MemoryManager;
@@ -78,5 +79,12 @@ public:
78 /// Initialize disk cached resources for the game being emulated 79 /// Initialize disk cached resources for the game being emulated
79 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, 80 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
80 const DiskResourceLoadCallback& callback = {}) {} 81 const DiskResourceLoadCallback& callback = {}) {}
82
83 GuestDriverProfile& AccessGuestDriverProfile() {
84 return guest_driver_profile;
85 }
86
87private:
88 GuestDriverProfile guest_driver_profile{};
81}; 89};
82} // namespace VideoCore 90} // namespace VideoCore
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index d32e2d657..78d9d7037 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -10,6 +10,7 @@
10#include "common/hash.h" 10#include "common/hash.h"
11#include "video_core/engines/const_buffer_engine_interface.h" 11#include "video_core/engines/const_buffer_engine_interface.h"
12#include "video_core/engines/shader_type.h" 12#include "video_core/engines/shader_type.h"
13#include "video_core/guest_driver.h"
13 14
14namespace VideoCommon::Shader { 15namespace VideoCommon::Shader {
15 16
@@ -71,6 +72,13 @@ public:
71 return bindless_samplers; 72 return bindless_samplers;
72 } 73 }
73 74
75 VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
76 if (engine) {
77 return &(engine->AccessGuestDriverProfile());
78 }
79 return nullptr;
80 }
81
74private: 82private:
75 const Tegra::Engines::ShaderType stage; 83 const Tegra::Engines::ShaderType stage;
76 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; 84 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 22c3e5120..aed35a9b8 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
315 return pc + 1; 315 return pc + 1;
316} 316}
317 317
318void ShaderIR::PostDecode() {
319 // Deduce texture handler size if needed
320 auto* gpu_driver = locker.AccessGuestDriverProfile();
321 if (gpu_driver) {
322 if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) {
323 u32 count{};
324 std::vector<u32> bound_offsets;
325 for (const auto& sampler : used_samplers) {
326 if (sampler.IsBindless()) {
327 continue;
328 }
329 count++;
330 bound_offsets.emplace_back(sampler.GetOffset());
331 }
332 if (count > 1) {
333 gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
334 }
335 }
336 }
337}
338
318} // namespace VideoCommon::Shader 339} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 31eecb3f4..a186e22b2 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
27 ConstBufferLocker& locker) 27 ConstBufferLocker& locker)
28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { 28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
29 Decode(); 29 Decode();
30 PostDecode();
30} 31}
31 32
32ShaderIR::~ShaderIR() = default; 33ShaderIR::~ShaderIR() = default;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ba1db4c11..92c24247d 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -191,6 +191,7 @@ private:
191 }; 191 };
192 192
193 void Decode(); 193 void Decode();
194 void PostDecode();
194 195
195 NodeBlock DecodeRange(u32 begin, u32 end); 196 NodeBlock DecodeRange(u32 begin, u32 end);
196 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); 197 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);