summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/cubeb_sink.cpp20
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/common/math_util.h16
-rw-r--r--src/common/page_table.cpp12
-rw-r--r--src/common/page_table.h15
-rw-r--r--src/core/CMakeLists.txt8
-rw-r--r--src/core/arm/arm_interface.h32
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp208
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h77
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp (renamed from src/core/arm/dynarmic/arm_dynarmic.cpp)83
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.h (renamed from src/core/arm/dynarmic/arm_dynarmic.h)34
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp80
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h152
-rw-r--r--src/core/arm/exclusive_monitor.cpp2
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp8
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h7
-rw-r--r--src/core/core.cpp1
-rw-r--r--src/core/core_manager.cpp3
-rw-r--r--src/core/frontend/framebuffer_layout.cpp4
-rw-r--r--src/core/frontend/framebuffer_layout.h16
-rw-r--r--src/core/gdbstub/gdbstub.cpp14
-rw-r--r--src/core/hle/kernel/kernel.cpp4
-rw-r--r--src/core/hle/kernel/physical_core.cpp19
-rw-r--r--src/core/hle/kernel/physical_core.h6
-rw-r--r--src/core/hle/kernel/process.cpp3
-rw-r--r--src/core/hle/kernel/scheduler.cpp21
-rw-r--r--src/core/hle/kernel/scheduler.h3
-rw-r--r--src/core/hle/kernel/svc.cpp329
-rw-r--r--src/core/hle/kernel/svc_wrap.h158
-rw-r--r--src/core/hle/kernel/thread.cpp31
-rw-r--r--src/core/hle/kernel/thread.h22
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp4
-rw-r--r--src/core/hle/service/set/set.cpp10
-rw-r--r--src/core/hle/service/set/set.h1
-rw-r--r--src/core/hle/service/time/time_zone_content_manager.cpp2
-rw-r--r--src/core/loader/deconstructed_rom_directory.cpp6
-rw-r--r--src/core/reporter.cpp2
-rw-r--r--src/core/settings.cpp1
-rw-r--r--src/core/settings.h3
-rw-r--r--src/input_common/udp/client.cpp17
-rw-r--r--src/input_common/udp/protocol.cpp1
-rw-r--r--src/input_common/udp/udp.cpp3
-rw-r--r--src/video_core/CMakeLists.txt16
-rw-r--r--src/video_core/dirty_flags.cpp38
-rw-r--r--src/video_core/dirty_flags.h49
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h67
-rw-r--r--src/video_core/engines/kepler_compute.cpp12
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp189
-rw-r--r--src/video_core/engines/maxwell_3d.h217
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h11
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/guest_driver.cpp7
-rw-r--r--src/video_core/guest_driver.h21
-rw-r--r--src/video_core/memory_manager.h2
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1040
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h71
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h25
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp513
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h99
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp431
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp404
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp109
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h34
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h39
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp569
-rw-r--r--src/video_core/renderer_opengl/gl_state.h251
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp247
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h215
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp48
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h10
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp485
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h14
-rw-r--r--src/video_core/renderer_opengl/utils.cpp13
-rw-r--r--src/video_core/renderer_opengl/utils.h9
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp15
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h8
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp39
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp8
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp131
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp231
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp170
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp99
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h79
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp7
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp126
-rw-r--r--src/video_core/shader/const_buffer_locker.h103
-rw-r--r--src/video_core/shader/control_flow.cpp13
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp22
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/decode/texture.cpp5
-rw-r--r--src/video_core/shader/decode/xmad.cpp63
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/registry.cpp161
-rw-r--r--src/video_core/shader/registry.h137
-rw-r--r--src/video_core/shader/shader_ir.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h6
-rw-r--r--src/video_core/shader/track.cpp38
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h142
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_params.cpp6
-rw-r--r--src/video_core/texture_cache/texture_cache.h59
-rw-r--r--src/video_core/textures/astc.cpp1074
-rw-r--r--src/video_core/textures/texture.h26
-rw-r--r--src/yuzu/CMakeLists.txt3
-rw-r--r--src/yuzu/configuration/config.cpp11
-rw-r--r--src/yuzu/configuration/configure.ui11
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp8
-rw-r--r--src/yuzu/configuration/configure_graphics.ui24
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp48
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h30
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui111
-rw-r--r--src/yuzu/configuration/configure_system.cpp4
-rw-r--r--src/yuzu/configuration/configure_system.h1
-rw-r--r--src/yuzu/configuration/configure_system.ui62
-rw-r--r--src/yuzu/debugger/wait_tree.cpp4
-rw-r--r--src/yuzu/loading_screen.cpp17
-rw-r--r--src/yuzu_cmd/config.cpp4
-rw-r--r--src/yuzu_cmd/default_ini.h6
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h4
153 files changed, 5917 insertions, 4697 deletions
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 7047ed9cf..c4e0e30fe 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/cubeb_sink.h" 8#include "audio_core/cubeb_sink.h"
9#include "audio_core/stream.h" 9#include "audio_core/stream.h"
10#include "audio_core/time_stretch.h" 10#include "audio_core/time_stretch.h"
11#include "common/assert.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
12#include "common/ring_buffer.h" 13#include "common/ring_buffer.h"
13#include "core/settings.h" 14#include "core/settings.h"
@@ -65,12 +66,25 @@ public:
65 void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override { 66 void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override {
66 if (source_num_channels > num_channels) { 67 if (source_num_channels > num_channels) {
67 // Downsample 6 channels to 2 68 // Downsample 6 channels to 2
69 ASSERT_MSG(source_num_channels == 6, "Channel count must be 6");
70
68 std::vector<s16> buf; 71 std::vector<s16> buf;
69 buf.reserve(samples.size() * num_channels / source_num_channels); 72 buf.reserve(samples.size() * num_channels / source_num_channels);
70 for (std::size_t i = 0; i < samples.size(); i += source_num_channels) { 73 for (std::size_t i = 0; i < samples.size(); i += source_num_channels) {
71 for (std::size_t ch = 0; ch < num_channels; ch++) { 74 // Downmixing implementation taken from the ATSC standard
72 buf.push_back(samples[i + ch]); 75 const s16 left{samples[i + 0]};
73 } 76 const s16 right{samples[i + 1]};
77 const s16 center{samples[i + 2]};
78 const s16 surround_left{samples[i + 4]};
79 const s16 surround_right{samples[i + 5]};
80 // Not used in the ATSC reference implementation
81 [[maybe_unused]] const s16 low_frequency_effects { samples[i + 3] };
82
83 constexpr s32 clev{707}; // center mixing level coefficient
84 constexpr s32 slev{707}; // surround mixing level coefficient
85
86 buf.push_back(left + (clev * center / 1000) + (slev * surround_left / 1000));
87 buf.push_back(right + (clev * center / 1000) + (slev * surround_right / 1000));
74 } 88 }
75 queue.Push(buf); 89 queue.Push(buf);
76 return; 90 return;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 9afc6105d..fbebed715 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp
38 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
39 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
40 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" 40 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
41 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
42 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
43 "${VIDEO_CORE}/shader/decode/arithmetic.cpp" 41 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
44 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" 42 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
45 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" 43 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
@@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp
72 "${VIDEO_CORE}/shader/ast.h" 70 "${VIDEO_CORE}/shader/ast.h"
73 "${VIDEO_CORE}/shader/compiler_settings.cpp" 71 "${VIDEO_CORE}/shader/compiler_settings.cpp"
74 "${VIDEO_CORE}/shader/compiler_settings.h" 72 "${VIDEO_CORE}/shader/compiler_settings.h"
75 "${VIDEO_CORE}/shader/const_buffer_locker.cpp"
76 "${VIDEO_CORE}/shader/const_buffer_locker.h"
77 "${VIDEO_CORE}/shader/control_flow.cpp" 73 "${VIDEO_CORE}/shader/control_flow.cpp"
78 "${VIDEO_CORE}/shader/control_flow.h" 74 "${VIDEO_CORE}/shader/control_flow.h"
79 "${VIDEO_CORE}/shader/decode.cpp" 75 "${VIDEO_CORE}/shader/decode.cpp"
@@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp
82 "${VIDEO_CORE}/shader/node.h" 78 "${VIDEO_CORE}/shader/node.h"
83 "${VIDEO_CORE}/shader/node_helper.cpp" 79 "${VIDEO_CORE}/shader/node_helper.cpp"
84 "${VIDEO_CORE}/shader/node_helper.h" 80 "${VIDEO_CORE}/shader/node_helper.h"
81 "${VIDEO_CORE}/shader/registry.cpp"
82 "${VIDEO_CORE}/shader/registry.h"
85 "${VIDEO_CORE}/shader/shader_ir.cpp" 83 "${VIDEO_CORE}/shader/shader_ir.cpp"
86 "${VIDEO_CORE}/shader/shader_ir.h" 84 "${VIDEO_CORE}/shader/shader_ir.h"
87 "${VIDEO_CORE}/shader/track.cpp" 85 "${VIDEO_CORE}/shader/track.cpp"
86 "${VIDEO_CORE}/shader/transform_feedback.cpp"
87 "${VIDEO_CORE}/shader/transform_feedback.h"
88 # and also check that the scm_rev files haven't changed 88 # and also check that the scm_rev files haven't changed
89 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" 89 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
90 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" 90 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
diff --git a/src/common/math_util.h b/src/common/math_util.h
index d6c35ee89..83ef0201f 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -24,17 +24,29 @@ struct Rectangle {
24 : left(left), top(top), right(right), bottom(bottom) {} 24 : left(left), top(top), right(right), bottom(bottom) {}
25 25
26 T GetWidth() const { 26 T GetWidth() const {
27 return std::abs(static_cast<std::make_signed_t<T>>(right - left)); 27 if constexpr (std::is_floating_point_v<T>) {
28 return std::abs(right - left);
29 } else {
30 return std::abs(static_cast<std::make_signed_t<T>>(right - left));
31 }
28 } 32 }
33
29 T GetHeight() const { 34 T GetHeight() const {
30 return std::abs(static_cast<std::make_signed_t<T>>(bottom - top)); 35 if constexpr (std::is_floating_point_v<T>) {
36 return std::abs(bottom - top);
37 } else {
38 return std::abs(static_cast<std::make_signed_t<T>>(bottom - top));
39 }
31 } 40 }
41
32 Rectangle<T> TranslateX(const T x) const { 42 Rectangle<T> TranslateX(const T x) const {
33 return Rectangle{left + x, top, right + x, bottom}; 43 return Rectangle{left + x, top, right + x, bottom};
34 } 44 }
45
35 Rectangle<T> TranslateY(const T y) const { 46 Rectangle<T> TranslateY(const T y) const {
36 return Rectangle{left, top + y, right, bottom + y}; 47 return Rectangle{left, top + y, right, bottom + y};
37 } 48 }
49
38 Rectangle<T> Scale(const float s) const { 50 Rectangle<T> Scale(const float s) const {
39 return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), 51 return Rectangle{left, top, static_cast<T>(left + GetWidth() * s),
40 static_cast<T>(top + GetHeight() * s)}; 52 static_cast<T>(top + GetHeight() * s)};
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
index 69b7abc54..566b57b62 100644
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
16 16
17 pointers.resize(num_page_table_entries); 17 pointers.resize(num_page_table_entries);
18 attributes.resize(num_page_table_entries); 18 attributes.resize(num_page_table_entries);
19 backing_addr.resize(num_page_table_entries);
20 19
21 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the 20 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
22 // vector size is subsequently decreased (via resize), the vector might not automatically 21 // vector size is subsequently decreased (via resize), the vector might not automatically
@@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
25 24
26 pointers.shrink_to_fit(); 25 pointers.shrink_to_fit();
27 attributes.shrink_to_fit(); 26 attributes.shrink_to_fit();
27}
28
29BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {}
30
31BackingPageTable::~BackingPageTable() = default;
32
33void BackingPageTable::Resize(std::size_t address_space_width_in_bits) {
34 PageTable::Resize(address_space_width_in_bits);
35 const std::size_t num_page_table_entries = 1ULL
36 << (address_space_width_in_bits - page_size_in_bits);
37 backing_addr.resize(num_page_table_entries);
28 backing_addr.shrink_to_fit(); 38 backing_addr.shrink_to_fit();
29} 39}
30 40
diff --git a/src/common/page_table.h b/src/common/page_table.h
index 8b8ff0bb8..dbc272ab7 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -76,9 +76,20 @@ struct PageTable {
76 */ 76 */
77 std::vector<PageType> attributes; 77 std::vector<PageType> attributes;
78 78
79 std::vector<u64> backing_addr;
80
81 const std::size_t page_size_in_bits{}; 79 const std::size_t page_size_in_bits{};
82}; 80};
83 81
82/**
83 * A more advanced Page Table with the ability to save a backing address when using it
84 * depends on another MMU.
85 */
86struct BackingPageTable : PageTable {
87 explicit BackingPageTable(std::size_t page_size_in_bits);
88 ~BackingPageTable();
89
90 void Resize(std::size_t address_space_width_in_bits);
91
92 std::vector<u64> backing_addr;
93};
94
84} // namespace Common 95} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 54be7dc0c..b31a0328c 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -595,8 +595,12 @@ endif()
595 595
596if (ARCHITECTURE_x86_64) 596if (ARCHITECTURE_x86_64)
597 target_sources(core PRIVATE 597 target_sources(core PRIVATE
598 arm/dynarmic/arm_dynarmic.cpp 598 arm/dynarmic/arm_dynarmic_32.cpp
599 arm/dynarmic/arm_dynarmic.h 599 arm/dynarmic/arm_dynarmic_32.h
600 arm/dynarmic/arm_dynarmic_64.cpp
601 arm/dynarmic/arm_dynarmic_64.h
602 arm/dynarmic/arm_dynarmic_cp15.cpp
603 arm/dynarmic/arm_dynarmic_cp15.h
600 ) 604 )
601 target_link_libraries(core PRIVATE dynarmic) 605 target_link_libraries(core PRIVATE dynarmic)
602endif() 606endif()
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 47b964eb7..57eae839e 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -25,7 +25,20 @@ public:
25 explicit ARM_Interface(System& system_) : system{system_} {} 25 explicit ARM_Interface(System& system_) : system{system_} {}
26 virtual ~ARM_Interface() = default; 26 virtual ~ARM_Interface() = default;
27 27
28 struct ThreadContext { 28 struct ThreadContext32 {
29 std::array<u32, 16> cpu_registers;
30 u32 cpsr;
31 std::array<u8, 4> padding;
32 std::array<u64, 32> fprs;
33 u32 fpscr;
34 u32 fpexc;
35 u32 tpidr;
36 };
37 // Internally within the kernel, it expects the AArch32 version of the
38 // thread context to be 344 bytes in size.
39 static_assert(sizeof(ThreadContext32) == 0x158);
40
41 struct ThreadContext64 {
29 std::array<u64, 31> cpu_registers; 42 std::array<u64, 31> cpu_registers;
30 u64 sp; 43 u64 sp;
31 u64 pc; 44 u64 pc;
@@ -38,7 +51,7 @@ public:
38 }; 51 };
39 // Internally within the kernel, it expects the AArch64 version of the 52 // Internally within the kernel, it expects the AArch64 version of the
40 // thread context to be 800 bytes in size. 53 // thread context to be 800 bytes in size.
41 static_assert(sizeof(ThreadContext) == 0x320); 54 static_assert(sizeof(ThreadContext64) == 0x320);
42 55
43 /// Runs the CPU until an event happens 56 /// Runs the CPU until an event happens
44 virtual void Run() = 0; 57 virtual void Run() = 0;
@@ -130,17 +143,10 @@ public:
130 */ 143 */
131 virtual void SetTPIDR_EL0(u64 value) = 0; 144 virtual void SetTPIDR_EL0(u64 value) = 0;
132 145
133 /** 146 virtual void SaveContext(ThreadContext32& ctx) = 0;
134 * Saves the current CPU context 147 virtual void SaveContext(ThreadContext64& ctx) = 0;
135 * @param ctx Thread context to save 148 virtual void LoadContext(const ThreadContext32& ctx) = 0;
136 */ 149 virtual void LoadContext(const ThreadContext64& ctx) = 0;
137 virtual void SaveContext(ThreadContext& ctx) = 0;
138
139 /**
140 * Loads a CPU context
141 * @param ctx Thread context to load
142 */
143 virtual void LoadContext(const ThreadContext& ctx) = 0;
144 150
145 /// Clears the exclusive monitor's state. 151 /// Clears the exclusive monitor's state.
146 virtual void ClearExclusiveState() = 0; 152 virtual void ClearExclusiveState() = 0;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
new file mode 100644
index 000000000..187a972ac
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -0,0 +1,208 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cinttypes>
6#include <memory>
7#include <dynarmic/A32/a32.h>
8#include <dynarmic/A32/config.h>
9#include <dynarmic/A32/context.h>
10#include "common/microprofile.h"
11#include "core/arm/dynarmic/arm_dynarmic_32.h"
12#include "core/arm/dynarmic/arm_dynarmic_64.h"
13#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
14#include "core/core.h"
15#include "core/core_manager.h"
16#include "core/core_timing.h"
17#include "core/hle/kernel/svc.h"
18#include "core/memory.h"
19
20namespace Core {
21
22class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
23public:
24 explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent) : parent(parent) {}
25
26 u8 MemoryRead8(u32 vaddr) override {
27 return parent.system.Memory().Read8(vaddr);
28 }
29 u16 MemoryRead16(u32 vaddr) override {
30 return parent.system.Memory().Read16(vaddr);
31 }
32 u32 MemoryRead32(u32 vaddr) override {
33 return parent.system.Memory().Read32(vaddr);
34 }
35 u64 MemoryRead64(u32 vaddr) override {
36 return parent.system.Memory().Read64(vaddr);
37 }
38
39 void MemoryWrite8(u32 vaddr, u8 value) override {
40 parent.system.Memory().Write8(vaddr, value);
41 }
42 void MemoryWrite16(u32 vaddr, u16 value) override {
43 parent.system.Memory().Write16(vaddr, value);
44 }
45 void MemoryWrite32(u32 vaddr, u32 value) override {
46 parent.system.Memory().Write32(vaddr, value);
47 }
48 void MemoryWrite64(u32 vaddr, u64 value) override {
49 parent.system.Memory().Write64(vaddr, value);
50 }
51
52 void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
53 UNIMPLEMENTED();
54 }
55
56 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
57 switch (exception) {
58 case Dynarmic::A32::Exception::UndefinedInstruction:
59 case Dynarmic::A32::Exception::UnpredictableInstruction:
60 break;
61 case Dynarmic::A32::Exception::Breakpoint:
62 break;
63 }
64 LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
65 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
66 UNIMPLEMENTED();
67 }
68
69 void CallSVC(u32 swi) override {
70 Kernel::CallSVC(parent.system, swi);
71 }
72
73 void AddTicks(u64 ticks) override {
74 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
75 // rough approximation of the amount of executed ticks in the system, it may be thrown off
76 // if not all cores are doing a similar amount of work. Instead of doing this, we should
77 // device a way so that timing is consistent across all cores without increasing the ticks 4
78 // times.
79 u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
80 // Always execute at least one tick.
81 amortized_ticks = std::max<u64>(amortized_ticks, 1);
82
83 parent.system.CoreTiming().AddTicks(amortized_ticks);
84 num_interpreted_instructions = 0;
85 }
86 u64 GetTicksRemaining() override {
87 return std::max(parent.system.CoreTiming().GetDowncount(), {});
88 }
89
90 ARM_Dynarmic_32& parent;
91 std::size_t num_interpreted_instructions{};
92 u64 tpidrro_el0{};
93 u64 tpidr_el0{};
94};
95
96std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
97 std::size_t address_space_bits) const {
98 Dynarmic::A32::UserConfig config;
99 config.callbacks = cb.get();
100 // TODO(bunnei): Implement page table for 32-bit
101 // config.page_table = &page_table.pointers;
102 config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
103 config.define_unpredictable_behaviour = true;
104 return std::make_unique<Dynarmic::A32::Jit>(config);
105}
106
107MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
108
109void ARM_Dynarmic_32::Run() {
110 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32);
111 jit->Run();
112}
113
114void ARM_Dynarmic_32::Step() {
115 cb->InterpreterFallback(jit->Regs()[15], 1);
116}
117
118ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
119 std::size_t core_index)
120 : ARM_Interface{system},
121 cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
122 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
123
124ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
125
126void ARM_Dynarmic_32::SetPC(u64 pc) {
127 jit->Regs()[15] = static_cast<u32>(pc);
128}
129
130u64 ARM_Dynarmic_32::GetPC() const {
131 return jit->Regs()[15];
132}
133
134u64 ARM_Dynarmic_32::GetReg(int index) const {
135 return jit->Regs()[index];
136}
137
138void ARM_Dynarmic_32::SetReg(int index, u64 value) {
139 jit->Regs()[index] = static_cast<u32>(value);
140}
141
142u128 ARM_Dynarmic_32::GetVectorReg(int index) const {
143 return {};
144}
145
146void ARM_Dynarmic_32::SetVectorReg(int index, u128 value) {}
147
148u32 ARM_Dynarmic_32::GetPSTATE() const {
149 return jit->Cpsr();
150}
151
152void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
153 jit->SetCpsr(cpsr);
154}
155
156u64 ARM_Dynarmic_32::GetTlsAddress() const {
157 return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
158}
159
160void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
161 CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
162}
163
164u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
165 return cb->tpidr_el0;
166}
167
168void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
169 cb->tpidr_el0 = value;
170}
171
172void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
173 Dynarmic::A32::Context context;
174 jit->SaveContext(context);
175 ctx.cpu_registers = context.Regs();
176 ctx.cpsr = context.Cpsr();
177}
178
179void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
180 Dynarmic::A32::Context context;
181 context.Regs() = ctx.cpu_registers;
182 context.SetCpsr(ctx.cpsr);
183 jit->LoadContext(context);
184}
185
186void ARM_Dynarmic_32::PrepareReschedule() {
187 jit->HaltExecution();
188}
189
190void ARM_Dynarmic_32::ClearInstructionCache() {
191 jit->ClearCache();
192}
193
194void ARM_Dynarmic_32::ClearExclusiveState() {}
195
196void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table,
197 std::size_t new_address_space_size_in_bits) {
198 auto key = std::make_pair(&page_table, new_address_space_size_in_bits);
199 auto iter = jit_cache.find(key);
200 if (iter != jit_cache.end()) {
201 jit = iter->second;
202 return;
203 }
204 jit = MakeJit(page_table, new_address_space_size_in_bits);
205 jit_cache.emplace(key, jit);
206}
207
208} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
new file mode 100644
index 000000000..143e46e4d
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -0,0 +1,77 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <dynarmic/A32/a32.h>
11#include <dynarmic/A64/a64.h>
12#include <dynarmic/A64/exclusive_monitor.h>
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "core/arm/arm_interface.h"
16#include "core/arm/exclusive_monitor.h"
17
18namespace Memory {
19class Memory;
20}
21
22namespace Core {
23
24class DynarmicCallbacks32;
25class DynarmicExclusiveMonitor;
26class System;
27
28class ARM_Dynarmic_32 final : public ARM_Interface {
29public:
30 ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
31 ~ARM_Dynarmic_32() override;
32
33 void SetPC(u64 pc) override;
34 u64 GetPC() const override;
35 u64 GetReg(int index) const override;
36 void SetReg(int index, u64 value) override;
37 u128 GetVectorReg(int index) const override;
38 void SetVectorReg(int index, u128 value) override;
39 u32 GetPSTATE() const override;
40 void SetPSTATE(u32 pstate) override;
41 void Run() override;
42 void Step() override;
43 VAddr GetTlsAddress() const override;
44 void SetTlsAddress(VAddr address) override;
45 void SetTPIDR_EL0(u64 value) override;
46 u64 GetTPIDR_EL0() const override;
47
48 void SaveContext(ThreadContext32& ctx) override;
49 void SaveContext(ThreadContext64& ctx) override {}
50 void LoadContext(const ThreadContext32& ctx) override;
51 void LoadContext(const ThreadContext64& ctx) override {}
52
53 void PrepareReschedule() override;
54 void ClearExclusiveState() override;
55
56 void ClearInstructionCache() override;
57 void PageTableChanged(Common::PageTable& new_page_table,
58 std::size_t new_address_space_size_in_bits) override;
59
60private:
61 std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable& page_table,
62 std::size_t address_space_bits) const;
63
64 using JitCacheKey = std::pair<Common::PageTable*, std::size_t>;
65 using JitCacheType =
66 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
67
68 friend class DynarmicCallbacks32;
69 std::unique_ptr<DynarmicCallbacks32> cb;
70 JitCacheType jit_cache;
71 std::shared_ptr<Dynarmic::A32::Jit> jit;
72 std::size_t core_index;
73 DynarmicExclusiveMonitor& exclusive_monitor;
74 std::array<u32, 84> CP15_regs{};
75};
76
77} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 29eaf74e5..a53a58ba0 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -8,7 +8,7 @@
8#include <dynarmic/A64/config.h> 8#include <dynarmic/A64/config.h>
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "core/arm/dynarmic/arm_dynarmic.h" 11#include "core/arm/dynarmic/arm_dynarmic_64.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_manager.h" 13#include "core/core_manager.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
@@ -25,9 +25,9 @@ namespace Core {
25 25
26using Vector = Dynarmic::A64::Vector; 26using Vector = Dynarmic::A64::Vector;
27 27
28class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks { 28class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
29public: 29public:
30 explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {} 30 explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent) : parent(parent) {}
31 31
32 u8 MemoryRead8(u64 vaddr) override { 32 u8 MemoryRead8(u64 vaddr) override {
33 return parent.system.Memory().Read8(vaddr); 33 return parent.system.Memory().Read8(vaddr);
@@ -68,7 +68,7 @@ public:
68 LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, 68 LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
69 num_instructions, MemoryReadCode(pc)); 69 num_instructions, MemoryReadCode(pc));
70 70
71 ARM_Interface::ThreadContext ctx; 71 ARM_Interface::ThreadContext64 ctx;
72 parent.SaveContext(ctx); 72 parent.SaveContext(ctx);
73 parent.inner_unicorn.LoadContext(ctx); 73 parent.inner_unicorn.LoadContext(ctx);
74 parent.inner_unicorn.ExecuteInstructions(num_instructions); 74 parent.inner_unicorn.ExecuteInstructions(num_instructions);
@@ -90,7 +90,7 @@ public:
90 parent.jit->HaltExecution(); 90 parent.jit->HaltExecution();
91 parent.SetPC(pc); 91 parent.SetPC(pc);
92 Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); 92 Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread();
93 parent.SaveContext(thread->GetContext()); 93 parent.SaveContext(thread->GetContext64());
94 GDBStub::Break(); 94 GDBStub::Break();
95 GDBStub::SendTrap(thread, 5); 95 GDBStub::SendTrap(thread, 5);
96 return; 96 return;
@@ -126,14 +126,14 @@ public:
126 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); 126 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
127 } 127 }
128 128
129 ARM_Dynarmic& parent; 129 ARM_Dynarmic_64& parent;
130 std::size_t num_interpreted_instructions = 0; 130 std::size_t num_interpreted_instructions = 0;
131 u64 tpidrro_el0 = 0; 131 u64 tpidrro_el0 = 0;
132 u64 tpidr_el0 = 0; 132 u64 tpidr_el0 = 0;
133}; 133};
134 134
135std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, 135std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
136 std::size_t address_space_bits) const { 136 std::size_t address_space_bits) const {
137 Dynarmic::A64::UserConfig config; 137 Dynarmic::A64::UserConfig config;
138 138
139 // Callbacks 139 // Callbacks
@@ -159,79 +159,79 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
159 // Unpredictable instructions 159 // Unpredictable instructions
160 config.define_unpredictable_behaviour = true; 160 config.define_unpredictable_behaviour = true;
161 161
162 return std::make_unique<Dynarmic::A64::Jit>(config); 162 return std::make_shared<Dynarmic::A64::Jit>(config);
163} 163}
164 164
165MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); 165MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
166 166
167void ARM_Dynarmic::Run() { 167void ARM_Dynarmic_64::Run() {
168 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); 168 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64);
169 169
170 jit->Run(); 170 jit->Run();
171} 171}
172 172
173void ARM_Dynarmic::Step() { 173void ARM_Dynarmic_64::Step() {
174 cb->InterpreterFallback(jit->GetPC(), 1); 174 cb->InterpreterFallback(jit->GetPC(), 1);
175} 175}
176 176
177ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, 177ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor,
178 std::size_t core_index) 178 std::size_t core_index)
179 : ARM_Interface{system}, 179 : ARM_Interface{system},
180 cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, 180 cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system},
181 core_index{core_index}, exclusive_monitor{ 181 core_index{core_index}, exclusive_monitor{
182 dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} 182 dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
183 183
184ARM_Dynarmic::~ARM_Dynarmic() = default; 184ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;
185 185
186void ARM_Dynarmic::SetPC(u64 pc) { 186void ARM_Dynarmic_64::SetPC(u64 pc) {
187 jit->SetPC(pc); 187 jit->SetPC(pc);
188} 188}
189 189
190u64 ARM_Dynarmic::GetPC() const { 190u64 ARM_Dynarmic_64::GetPC() const {
191 return jit->GetPC(); 191 return jit->GetPC();
192} 192}
193 193
194u64 ARM_Dynarmic::GetReg(int index) const { 194u64 ARM_Dynarmic_64::GetReg(int index) const {
195 return jit->GetRegister(index); 195 return jit->GetRegister(index);
196} 196}
197 197
198void ARM_Dynarmic::SetReg(int index, u64 value) { 198void ARM_Dynarmic_64::SetReg(int index, u64 value) {
199 jit->SetRegister(index, value); 199 jit->SetRegister(index, value);
200} 200}
201 201
202u128 ARM_Dynarmic::GetVectorReg(int index) const { 202u128 ARM_Dynarmic_64::GetVectorReg(int index) const {
203 return jit->GetVector(index); 203 return jit->GetVector(index);
204} 204}
205 205
206void ARM_Dynarmic::SetVectorReg(int index, u128 value) { 206void ARM_Dynarmic_64::SetVectorReg(int index, u128 value) {
207 jit->SetVector(index, value); 207 jit->SetVector(index, value);
208} 208}
209 209
210u32 ARM_Dynarmic::GetPSTATE() const { 210u32 ARM_Dynarmic_64::GetPSTATE() const {
211 return jit->GetPstate(); 211 return jit->GetPstate();
212} 212}
213 213
214void ARM_Dynarmic::SetPSTATE(u32 pstate) { 214void ARM_Dynarmic_64::SetPSTATE(u32 pstate) {
215 jit->SetPstate(pstate); 215 jit->SetPstate(pstate);
216} 216}
217 217
218u64 ARM_Dynarmic::GetTlsAddress() const { 218u64 ARM_Dynarmic_64::GetTlsAddress() const {
219 return cb->tpidrro_el0; 219 return cb->tpidrro_el0;
220} 220}
221 221
222void ARM_Dynarmic::SetTlsAddress(VAddr address) { 222void ARM_Dynarmic_64::SetTlsAddress(VAddr address) {
223 cb->tpidrro_el0 = address; 223 cb->tpidrro_el0 = address;
224} 224}
225 225
226u64 ARM_Dynarmic::GetTPIDR_EL0() const { 226u64 ARM_Dynarmic_64::GetTPIDR_EL0() const {
227 return cb->tpidr_el0; 227 return cb->tpidr_el0;
228} 228}
229 229
230void ARM_Dynarmic::SetTPIDR_EL0(u64 value) { 230void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {
231 cb->tpidr_el0 = value; 231 cb->tpidr_el0 = value;
232} 232}
233 233
234void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { 234void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) {
235 ctx.cpu_registers = jit->GetRegisters(); 235 ctx.cpu_registers = jit->GetRegisters();
236 ctx.sp = jit->GetSP(); 236 ctx.sp = jit->GetSP();
237 ctx.pc = jit->GetPC(); 237 ctx.pc = jit->GetPC();
@@ -242,7 +242,7 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) {
242 ctx.tpidr = cb->tpidr_el0; 242 ctx.tpidr = cb->tpidr_el0;
243} 243}
244 244
245void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { 245void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
246 jit->SetRegisters(ctx.cpu_registers); 246 jit->SetRegisters(ctx.cpu_registers);
247 jit->SetSP(ctx.sp); 247 jit->SetSP(ctx.sp);
248 jit->SetPC(ctx.pc); 248 jit->SetPC(ctx.pc);
@@ -253,25 +253,32 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
253 SetTPIDR_EL0(ctx.tpidr); 253 SetTPIDR_EL0(ctx.tpidr);
254} 254}
255 255
256void ARM_Dynarmic::PrepareReschedule() { 256void ARM_Dynarmic_64::PrepareReschedule() {
257 jit->HaltExecution(); 257 jit->HaltExecution();
258} 258}
259 259
260void ARM_Dynarmic::ClearInstructionCache() { 260void ARM_Dynarmic_64::ClearInstructionCache() {
261 jit->ClearCache(); 261 jit->ClearCache();
262} 262}
263 263
264void ARM_Dynarmic::ClearExclusiveState() { 264void ARM_Dynarmic_64::ClearExclusiveState() {
265 jit->ClearExclusiveState(); 265 jit->ClearExclusiveState();
266} 266}
267 267
268void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, 268void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table,
269 std::size_t new_address_space_size_in_bits) { 269 std::size_t new_address_space_size_in_bits) {
270 auto key = std::make_pair(&page_table, new_address_space_size_in_bits);
271 auto iter = jit_cache.find(key);
272 if (iter != jit_cache.end()) {
273 jit = iter->second;
274 return;
275 }
270 jit = MakeJit(page_table, new_address_space_size_in_bits); 276 jit = MakeJit(page_table, new_address_space_size_in_bits);
277 jit_cache.emplace(key, jit);
271} 278}
272 279
273DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count) 280DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count)
274 : monitor(core_count), memory{memory_} {} 281 : monitor(core_count), memory{memory} {}
275 282
276DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; 283DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
277 284
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 9cd475cfb..e71240a96 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -5,9 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <unordered_map>
9
8#include <dynarmic/A64/a64.h> 10#include <dynarmic/A64/a64.h>
9#include <dynarmic/A64/exclusive_monitor.h> 11#include <dynarmic/A64/exclusive_monitor.h>
10#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/hash.h"
11#include "core/arm/arm_interface.h" 14#include "core/arm/arm_interface.h"
12#include "core/arm/exclusive_monitor.h" 15#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 16#include "core/arm/unicorn/arm_unicorn.h"
@@ -18,14 +21,14 @@ class Memory;
18 21
19namespace Core { 22namespace Core {
20 23
21class ARM_Dynarmic_Callbacks; 24class DynarmicCallbacks64;
22class DynarmicExclusiveMonitor; 25class DynarmicExclusiveMonitor;
23class System; 26class System;
24 27
25class ARM_Dynarmic final : public ARM_Interface { 28class ARM_Dynarmic_64 final : public ARM_Interface {
26public: 29public:
27 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 30 ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
28 ~ARM_Dynarmic() override; 31 ~ARM_Dynarmic_64() override;
29 32
30 void SetPC(u64 pc) override; 33 void SetPC(u64 pc) override;
31 u64 GetPC() const override; 34 u64 GetPC() const override;
@@ -42,8 +45,10 @@ public:
42 void SetTPIDR_EL0(u64 value) override; 45 void SetTPIDR_EL0(u64 value) override;
43 u64 GetTPIDR_EL0() const override; 46 u64 GetTPIDR_EL0() const override;
44 47
45 void SaveContext(ThreadContext& ctx) override; 48 void SaveContext(ThreadContext32& ctx) override {}
46 void LoadContext(const ThreadContext& ctx) override; 49 void SaveContext(ThreadContext64& ctx) override;
50 void LoadContext(const ThreadContext32& ctx) override {}
51 void LoadContext(const ThreadContext64& ctx) override;
47 52
48 void PrepareReschedule() override; 53 void PrepareReschedule() override;
49 void ClearExclusiveState() override; 54 void ClearExclusiveState() override;
@@ -53,12 +58,17 @@ public:
53 std::size_t new_address_space_size_in_bits) override; 58 std::size_t new_address_space_size_in_bits) override;
54 59
55private: 60private:
56 std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, 61 std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table,
57 std::size_t address_space_bits) const; 62 std::size_t address_space_bits) const;
58 63
59 friend class ARM_Dynarmic_Callbacks; 64 using JitCacheKey = std::pair<Common::PageTable*, std::size_t>;
60 std::unique_ptr<ARM_Dynarmic_Callbacks> cb; 65 using JitCacheType =
61 std::unique_ptr<Dynarmic::A64::Jit> jit; 66 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A64::Jit>, Common::PairHash>;
67
68 friend class DynarmicCallbacks64;
69 std::unique_ptr<DynarmicCallbacks64> cb;
70 JitCacheType jit_cache;
71 std::shared_ptr<Dynarmic::A64::Jit> jit;
62 ARM_Unicorn inner_unicorn; 72 ARM_Unicorn inner_unicorn;
63 73
64 std::size_t core_index; 74 std::size_t core_index;
@@ -67,7 +77,7 @@ private:
67 77
68class DynarmicExclusiveMonitor final : public ExclusiveMonitor { 78class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
69public: 79public:
70 explicit DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count); 80 explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
71 ~DynarmicExclusiveMonitor() override; 81 ~DynarmicExclusiveMonitor() override;
72 82
73 void SetExclusive(std::size_t core_index, VAddr addr) override; 83 void SetExclusive(std::size_t core_index, VAddr addr) override;
@@ -80,7 +90,7 @@ public:
80 bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; 90 bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
81 91
82private: 92private:
83 friend class ARM_Dynarmic; 93 friend class ARM_Dynarmic_64;
84 Dynarmic::A64::ExclusiveMonitor monitor; 94 Dynarmic::A64::ExclusiveMonitor monitor;
85 Memory::Memory& memory; 95 Memory::Memory& memory;
86}; 96};
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
new file mode 100644
index 000000000..3fdcdebde
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -0,0 +1,80 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
6
7using Callback = Dynarmic::A32::Coprocessor::Callback;
8using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
9using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
10
11std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
12 CoprocReg CRd, CoprocReg CRn,
13 CoprocReg CRm, unsigned opc2) {
14 return {};
15}
16
17CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
18 CoprocReg CRm, unsigned opc2) {
19 // TODO(merry): Privileged CP15 registers
20
21 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
22 // This is a dummy write, we ignore the value written here.
23 return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
24 }
25
26 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
27 switch (opc2) {
28 case 4:
29 // This is a dummy write, we ignore the value written here.
30 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
31 case 5:
32 // This is a dummy write, we ignore the value written here.
33 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
34 default:
35 return {};
36 }
37 }
38
39 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
40 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
41 }
42
43 return {};
44}
45
46CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
47 return {};
48}
49
50CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
51 CoprocReg CRm, unsigned opc2) {
52 // TODO(merry): Privileged CP15 registers
53
54 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
55 switch (opc2) {
56 case 2:
57 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
58 case 3:
59 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
60 default:
61 return {};
62 }
63 }
64
65 return {};
66}
67
68CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
69 return {};
70}
71
72std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
73 std::optional<u8> option) {
74 return {};
75}
76
77std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
78 std::optional<u8> option) {
79 return {};
80}
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
new file mode 100644
index 000000000..07bcde5f9
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -0,0 +1,152 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9
10#include <dynarmic/A32/coprocessor.h>
11#include "common/common_types.h"
12
13enum class CP15Register {
14 // c0 - Information registers
15 CP15_MAIN_ID,
16 CP15_CACHE_TYPE,
17 CP15_TCM_STATUS,
18 CP15_TLB_TYPE,
19 CP15_CPU_ID,
20 CP15_PROCESSOR_FEATURE_0,
21 CP15_PROCESSOR_FEATURE_1,
22 CP15_DEBUG_FEATURE_0,
23 CP15_AUXILIARY_FEATURE_0,
24 CP15_MEMORY_MODEL_FEATURE_0,
25 CP15_MEMORY_MODEL_FEATURE_1,
26 CP15_MEMORY_MODEL_FEATURE_2,
27 CP15_MEMORY_MODEL_FEATURE_3,
28 CP15_ISA_FEATURE_0,
29 CP15_ISA_FEATURE_1,
30 CP15_ISA_FEATURE_2,
31 CP15_ISA_FEATURE_3,
32 CP15_ISA_FEATURE_4,
33
34 // c1 - Control registers
35 CP15_CONTROL,
36 CP15_AUXILIARY_CONTROL,
37 CP15_COPROCESSOR_ACCESS_CONTROL,
38
39 // c2 - Translation table registers
40 CP15_TRANSLATION_BASE_TABLE_0,
41 CP15_TRANSLATION_BASE_TABLE_1,
42 CP15_TRANSLATION_BASE_CONTROL,
43 CP15_DOMAIN_ACCESS_CONTROL,
44 CP15_RESERVED,
45
46 // c5 - Fault status registers
47 CP15_FAULT_STATUS,
48 CP15_INSTR_FAULT_STATUS,
49 CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
50 CP15_INST_FSR,
51
52 // c6 - Fault Address registers
53 CP15_FAULT_ADDRESS,
54 CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
55 CP15_WFAR,
56 CP15_IFAR,
57
58 // c7 - Cache operation registers
59 CP15_WAIT_FOR_INTERRUPT,
60 CP15_PHYS_ADDRESS,
61 CP15_INVALIDATE_INSTR_CACHE,
62 CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
63 CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
64 CP15_FLUSH_PREFETCH_BUFFER,
65 CP15_FLUSH_BRANCH_TARGET_CACHE,
66 CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
67 CP15_INVALIDATE_DATA_CACHE,
68 CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
69 CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
70 CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
71 CP15_CLEAN_DATA_CACHE,
72 CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
73 CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
74 CP15_DATA_SYNC_BARRIER,
75 CP15_DATA_MEMORY_BARRIER,
76 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
77 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
78 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
79
80 // c8 - TLB operations
81 CP15_INVALIDATE_ITLB,
82 CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
83 CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
84 CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
85 CP15_INVALIDATE_DTLB,
86 CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
87 CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
88 CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
89 CP15_INVALIDATE_UTLB,
90 CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
91 CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
92 CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
93
94 // c9 - Data cache lockdown register
95 CP15_DATA_CACHE_LOCKDOWN,
96
97 // c10 - TLB/Memory map registers
98 CP15_TLB_LOCKDOWN,
99 CP15_PRIMARY_REGION_REMAP,
100 CP15_NORMAL_REGION_REMAP,
101
102 // c13 - Thread related registers
103 CP15_PID,
104 CP15_CONTEXT_ID,
105 CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
106 CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
107 CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
108
109 // c15 - Performance and TLB lockdown registers
110 CP15_PERFORMANCE_MONITOR_CONTROL,
111 CP15_CYCLE_COUNTER,
112 CP15_COUNT_0,
113 CP15_COUNT_1,
114 CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
115 CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
116 CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
117 CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
118 CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
119 CP15_TLB_DEBUG_CONTROL,
120
121 // Skyeye defined
122 CP15_TLB_FAULT_ADDR,
123 CP15_TLB_FAULT_STATUS,
124
125 // Not an actual register.
126 // All registers should be defined above this.
127 CP15_REGISTER_COUNT,
128};
129
130class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
131public:
132 using CoprocReg = Dynarmic::A32::CoprocReg;
133
134 explicit DynarmicCP15(u32* cp15) : CP15(cp15){};
135
136 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
137 CoprocReg CRn, CoprocReg CRm,
138 unsigned opc2) override;
139 CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
140 CoprocReg CRm, unsigned opc2) override;
141 CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override;
142 CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm,
143 unsigned opc2) override;
144 CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override;
145 std::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
146 std::optional<u8> option) override;
147 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
148 std::optional<u8> option) override;
149
150private:
151 u32* CP15{};
152};
diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp
index 94570e520..b32401e0b 100644
--- a/src/core/arm/exclusive_monitor.cpp
+++ b/src/core/arm/exclusive_monitor.cpp
@@ -3,7 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#ifdef ARCHITECTURE_x86_64 5#ifdef ARCHITECTURE_x86_64
6#include "core/arm/dynarmic/arm_dynarmic.h" 6#include "core/arm/dynarmic/arm_dynarmic_64.h"
7#endif 7#endif
8#include "core/arm/exclusive_monitor.h" 8#include "core/arm/exclusive_monitor.h"
9#include "core/memory.h" 9#include "core/memory.h"
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index f99ad5802..8a9800a96 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -53,7 +53,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
53 void* user_data) { 53 void* user_data) {
54 auto* const system = static_cast<System*>(user_data); 54 auto* const system = static_cast<System*>(user_data);
55 55
56 ARM_Interface::ThreadContext ctx{}; 56 ARM_Interface::ThreadContext64 ctx{};
57 system->CurrentArmInterface().SaveContext(ctx); 57 system->CurrentArmInterface().SaveContext(ctx);
58 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, 58 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
59 ctx.pc, ctx.cpu_registers[30]); 59 ctx.pc, ctx.cpu_registers[30]);
@@ -179,7 +179,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
179 } 179 }
180 180
181 Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); 181 Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread();
182 SaveContext(thread->GetContext()); 182 SaveContext(thread->GetContext64());
183 if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { 183 if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
184 last_bkpt_hit = false; 184 last_bkpt_hit = false;
185 GDBStub::Break(); 185 GDBStub::Break();
@@ -188,7 +188,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
188 } 188 }
189} 189}
190 190
191void ARM_Unicorn::SaveContext(ThreadContext& ctx) { 191void ARM_Unicorn::SaveContext(ThreadContext64& ctx) {
192 int uregs[32]; 192 int uregs[32];
193 void* tregs[32]; 193 void* tregs[32];
194 194
@@ -215,7 +215,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
215 CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); 215 CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32));
216} 216}
217 217
218void ARM_Unicorn::LoadContext(const ThreadContext& ctx) { 218void ARM_Unicorn::LoadContext(const ThreadContext64& ctx) {
219 int uregs[32]; 219 int uregs[32];
220 void* tregs[32]; 220 void* tregs[32];
221 221
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 3c5b155f9..f30d13cb6 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -30,8 +30,6 @@ public:
30 void SetTlsAddress(VAddr address) override; 30 void SetTlsAddress(VAddr address) override;
31 void SetTPIDR_EL0(u64 value) override; 31 void SetTPIDR_EL0(u64 value) override;
32 u64 GetTPIDR_EL0() const override; 32 u64 GetTPIDR_EL0() const override;
33 void SaveContext(ThreadContext& ctx) override;
34 void LoadContext(const ThreadContext& ctx) override;
35 void PrepareReschedule() override; 33 void PrepareReschedule() override;
36 void ClearExclusiveState() override; 34 void ClearExclusiveState() override;
37 void ExecuteInstructions(std::size_t num_instructions); 35 void ExecuteInstructions(std::size_t num_instructions);
@@ -41,6 +39,11 @@ public:
41 void PageTableChanged(Common::PageTable&, std::size_t) override {} 39 void PageTableChanged(Common::PageTable&, std::size_t) override {}
42 void RecordBreak(GDBStub::BreakpointAddress bkpt); 40 void RecordBreak(GDBStub::BreakpointAddress bkpt);
43 41
42 void SaveContext(ThreadContext32& ctx) override {}
43 void SaveContext(ThreadContext64& ctx) override;
44 void LoadContext(const ThreadContext32& ctx) override {}
45 void LoadContext(const ThreadContext64& ctx) override;
46
44private: 47private:
45 static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data); 48 static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);
46 49
diff --git a/src/core/core.cpp b/src/core/core.cpp
index a82faf127..218508126 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -174,6 +174,7 @@ struct System::Impl {
174 } 174 }
175 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); 175 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
176 gpu_core = VideoCore::CreateGPU(system); 176 gpu_core = VideoCore::CreateGPU(system);
177 renderer->Rasterizer().SetupDirtyFlags();
177 178
178 is_powered_on = true; 179 is_powered_on = true;
179 exit_lock = false; 180 exit_lock = false;
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp
index 8eacf92dd..b6b797c80 100644
--- a/src/core/core_manager.cpp
+++ b/src/core/core_manager.cpp
@@ -6,9 +6,6 @@
6#include <mutex> 6#include <mutex>
7 7
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#ifdef ARCHITECTURE_x86_64
10#include "core/arm/dynarmic/arm_dynarmic.h"
11#endif
12#include "core/arm/exclusive_monitor.h" 9#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 10#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h" 11#include "core/core.h"
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index 2dc795d56..68a0e0906 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -48,8 +48,8 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
48 u32 width, height; 48 u32 width, height;
49 49
50 if (Settings::values.use_docked_mode) { 50 if (Settings::values.use_docked_mode) {
51 width = ScreenDocked::WidthDocked * res_scale; 51 width = ScreenDocked::Width * res_scale;
52 height = ScreenDocked::HeightDocked * res_scale; 52 height = ScreenDocked::Height * res_scale;
53 } else { 53 } else {
54 width = ScreenUndocked::Width * res_scale; 54 width = ScreenUndocked::Width * res_scale;
55 height = ScreenUndocked::Height * res_scale; 55 height = ScreenUndocked::Height * res_scale;
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e9d0a40d3..15ecfb13d 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -8,15 +8,15 @@
8 8
9namespace Layout { 9namespace Layout {
10 10
11enum ScreenUndocked : u32 { 11namespace ScreenUndocked {
12 Width = 1280, 12constexpr u32 Width = 1280;
13 Height = 720, 13constexpr u32 Height = 720;
14}; 14} // namespace ScreenUndocked
15 15
16enum ScreenDocked : u32 { 16namespace ScreenDocked {
17 WidthDocked = 1920, 17constexpr u32 Width = 1920;
18 HeightDocked = 1080, 18constexpr u32 Height = 1080;
19}; 19} // namespace ScreenDocked
20 20
21enum class AspectRatio { 21enum class AspectRatio {
22 Default, 22 Default,
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 67e95999d..e8d8871a7 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -217,7 +217,7 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) {
217 return 0; 217 return 0;
218 } 218 }
219 219
220 const auto& thread_context = thread->GetContext(); 220 const auto& thread_context = thread->GetContext64();
221 221
222 if (id < SP_REGISTER) { 222 if (id < SP_REGISTER) {
223 return thread_context.cpu_registers[id]; 223 return thread_context.cpu_registers[id];
@@ -239,7 +239,7 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
239 return; 239 return;
240 } 240 }
241 241
242 auto& thread_context = thread->GetContext(); 242 auto& thread_context = thread->GetContext64();
243 243
244 if (id < SP_REGISTER) { 244 if (id < SP_REGISTER) {
245 thread_context.cpu_registers[id] = val; 245 thread_context.cpu_registers[id] = val;
@@ -259,7 +259,7 @@ static u128 FpuRead(std::size_t id, Kernel::Thread* thread = nullptr) {
259 return u128{0}; 259 return u128{0};
260 } 260 }
261 261
262 auto& thread_context = thread->GetContext(); 262 auto& thread_context = thread->GetContext64();
263 263
264 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { 264 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
265 return thread_context.vector_registers[id - UC_ARM64_REG_Q0]; 265 return thread_context.vector_registers[id - UC_ARM64_REG_Q0];
@@ -275,7 +275,7 @@ static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr)
275 return; 275 return;
276 } 276 }
277 277
278 auto& thread_context = thread->GetContext(); 278 auto& thread_context = thread->GetContext64();
279 279
280 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { 280 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
281 thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val; 281 thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val;
@@ -916,7 +916,7 @@ static void WriteRegister() {
916 // Update ARM context, skipping scheduler - no running threads at this point 916 // Update ARM context, skipping scheduler - no running threads at this point
917 Core::System::GetInstance() 917 Core::System::GetInstance()
918 .ArmInterface(current_core) 918 .ArmInterface(current_core)
919 .LoadContext(current_thread->GetContext()); 919 .LoadContext(current_thread->GetContext64());
920 920
921 SendReply("OK"); 921 SendReply("OK");
922} 922}
@@ -947,7 +947,7 @@ static void WriteRegisters() {
947 // Update ARM context, skipping scheduler - no running threads at this point 947 // Update ARM context, skipping scheduler - no running threads at this point
948 Core::System::GetInstance() 948 Core::System::GetInstance()
949 .ArmInterface(current_core) 949 .ArmInterface(current_core)
950 .LoadContext(current_thread->GetContext()); 950 .LoadContext(current_thread->GetContext64());
951 951
952 SendReply("OK"); 952 SendReply("OK");
953} 953}
@@ -1019,7 +1019,7 @@ static void Step() {
1019 // Update ARM context, skipping scheduler - no running threads at this point 1019 // Update ARM context, skipping scheduler - no running threads at this point
1020 Core::System::GetInstance() 1020 Core::System::GetInstance()
1021 .ArmInterface(current_core) 1021 .ArmInterface(current_core)
1022 .LoadContext(current_thread->GetContext()); 1022 .LoadContext(current_thread->GetContext64());
1023 } 1023 }
1024 step_loop = true; 1024 step_loop = true;
1025 halt_loop = true; 1025 halt_loop = true;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 9232f4d7e..e47f1deed 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -186,6 +186,10 @@ struct KernelCore::Impl {
186 return; 186 return;
187 } 187 }
188 188
189 for (auto& core : cores) {
190 core.SetIs64Bit(process->Is64BitProcess());
191 }
192
189 system.Memory().SetCurrentPageTable(*process); 193 system.Memory().SetCurrentPageTable(*process);
190 } 194 }
191 195
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 9303dd273..aa2787467 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -5,7 +5,8 @@
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/arm/arm_interface.h" 6#include "core/arm/arm_interface.h"
7#ifdef ARCHITECTURE_x86_64 7#ifdef ARCHITECTURE_x86_64
8#include "core/arm/dynarmic/arm_dynarmic.h" 8#include "core/arm/dynarmic/arm_dynarmic_32.h"
9#include "core/arm/dynarmic/arm_dynarmic_64.h"
9#endif 10#endif
10#include "core/arm/exclusive_monitor.h" 11#include "core/arm/exclusive_monitor.h"
11#include "core/arm/unicorn/arm_unicorn.h" 12#include "core/arm/unicorn/arm_unicorn.h"
@@ -20,13 +21,17 @@ PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
20 Core::ExclusiveMonitor& exclusive_monitor) 21 Core::ExclusiveMonitor& exclusive_monitor)
21 : core_index{id} { 22 : core_index{id} {
22#ifdef ARCHITECTURE_x86_64 23#ifdef ARCHITECTURE_x86_64
23 arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); 24 arm_interface_32 =
25 std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index);
26 arm_interface_64 =
27 std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index);
28
24#else 29#else
25 arm_interface = std::make_shared<Core::ARM_Unicorn>(system); 30 arm_interface = std::make_shared<Core::ARM_Unicorn>(system);
26 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 31 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
27#endif 32#endif
28 33
29 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); 34 scheduler = std::make_unique<Kernel::Scheduler>(system, core_index);
30} 35}
31 36
32PhysicalCore::~PhysicalCore() = default; 37PhysicalCore::~PhysicalCore() = default;
@@ -48,4 +53,12 @@ void PhysicalCore::Shutdown() {
48 scheduler->Shutdown(); 53 scheduler->Shutdown();
49} 54}
50 55
56void PhysicalCore::SetIs64Bit(bool is_64_bit) {
57 if (is_64_bit) {
58 arm_interface = arm_interface_64.get();
59 } else {
60 arm_interface = arm_interface_32.get();
61 }
62}
63
51} // namespace Kernel 64} // namespace Kernel
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 4c32c0f1b..3269166be 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -68,10 +68,14 @@ public:
68 return *scheduler; 68 return *scheduler;
69 } 69 }
70 70
71 void SetIs64Bit(bool is_64_bit);
72
71private: 73private:
72 std::size_t core_index; 74 std::size_t core_index;
73 std::unique_ptr<Core::ARM_Interface> arm_interface; 75 std::unique_ptr<Core::ARM_Interface> arm_interface_32;
76 std::unique_ptr<Core::ARM_Interface> arm_interface_64;
74 std::unique_ptr<Kernel::Scheduler> scheduler; 77 std::unique_ptr<Kernel::Scheduler> scheduler;
78 Core::ARM_Interface* arm_interface{};
75}; 79};
76 80
77} // namespace Kernel 81} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 2fcb7326c..edc414d69 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -42,7 +42,8 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
42 42
43 // Register 1 must be a handle to the main thread 43 // Register 1 must be a handle to the main thread
44 const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); 44 const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap();
45 thread->GetContext().cpu_registers[1] = thread_handle; 45 thread->GetContext32().cpu_registers[1] = thread_handle;
46 thread->GetContext64().cpu_registers[1] = thread_handle;
46 47
47 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires 48 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
48 thread->ResumeFromWait(); 49 thread->ResumeFromWait();
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index c65f82fb7..1140c72a3 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -383,8 +383,8 @@ void GlobalScheduler::Unlock() {
383 // TODO(Blinkhawk): Setup the interrupts and change context on current core. 383 // TODO(Blinkhawk): Setup the interrupts and change context on current core.
384} 384}
385 385
386Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id) 386Scheduler::Scheduler(Core::System& system, std::size_t core_id)
387 : system(system), cpu_core(cpu_core), core_id(core_id) {} 387 : system{system}, core_id{core_id} {}
388 388
389Scheduler::~Scheduler() = default; 389Scheduler::~Scheduler() = default;
390 390
@@ -422,9 +422,10 @@ void Scheduler::UnloadThread() {
422 422
423 // Save context for previous thread 423 // Save context for previous thread
424 if (previous_thread) { 424 if (previous_thread) {
425 cpu_core.SaveContext(previous_thread->GetContext()); 425 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
426 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
426 // Save the TPIDR_EL0 system register in case it was modified. 427 // Save the TPIDR_EL0 system register in case it was modified.
427 previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); 428 previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
428 429
429 if (previous_thread->GetStatus() == ThreadStatus::Running) { 430 if (previous_thread->GetStatus() == ThreadStatus::Running) {
430 // This is only the case when a reschedule is triggered without the current thread 431 // This is only the case when a reschedule is triggered without the current thread
@@ -451,9 +452,10 @@ void Scheduler::SwitchContext() {
451 452
452 // Save context for previous thread 453 // Save context for previous thread
453 if (previous_thread) { 454 if (previous_thread) {
454 cpu_core.SaveContext(previous_thread->GetContext()); 455 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
456 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
455 // Save the TPIDR_EL0 system register in case it was modified. 457 // Save the TPIDR_EL0 system register in case it was modified.
456 previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); 458 previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
457 459
458 if (previous_thread->GetStatus() == ThreadStatus::Running) { 460 if (previous_thread->GetStatus() == ThreadStatus::Running) {
459 // This is only the case when a reschedule is triggered without the current thread 461 // This is only the case when a reschedule is triggered without the current thread
@@ -481,9 +483,10 @@ void Scheduler::SwitchContext() {
481 system.Kernel().MakeCurrentProcess(thread_owner_process); 483 system.Kernel().MakeCurrentProcess(thread_owner_process);
482 } 484 }
483 485
484 cpu_core.LoadContext(new_thread->GetContext()); 486 system.ArmInterface(core_id).LoadContext(new_thread->GetContext32());
485 cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); 487 system.ArmInterface(core_id).LoadContext(new_thread->GetContext64());
486 cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); 488 system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress());
489 system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
487 } else { 490 } else {
488 current_thread = nullptr; 491 current_thread = nullptr;
489 // Note: We do not reset the current process and current page table when idling because 492 // Note: We do not reset the current process and current page table when idling because
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 1c93a838c..07df33f9c 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -181,7 +181,7 @@ private:
181 181
182class Scheduler final { 182class Scheduler final {
183public: 183public:
184 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id); 184 explicit Scheduler(Core::System& system, std::size_t core_id);
185 ~Scheduler(); 185 ~Scheduler();
186 186
187 /// Returns whether there are any threads that are ready to run. 187 /// Returns whether there are any threads that are ready to run.
@@ -235,7 +235,6 @@ private:
235 std::shared_ptr<Thread> selected_thread = nullptr; 235 std::shared_ptr<Thread> selected_thread = nullptr;
236 236
237 Core::System& system; 237 Core::System& system;
238 Core::ARM_Interface& cpu_core;
239 u64 last_context_switch_time = 0; 238 u64 last_context_switch_time = 0;
240 u64 idle_selection_count = 0; 239 u64 idle_selection_count = 0;
241 const std::size_t core_id; 240 const std::size_t core_id;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index fd91779a3..4ffc113c2 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -187,6 +187,13 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s
187 return RESULT_SUCCESS; 187 return RESULT_SUCCESS;
188} 188}
189 189
190static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_size) {
191 VAddr temp_heap_addr{};
192 const ResultCode result{SetHeapSize(system, &temp_heap_addr, heap_size)};
193 *heap_addr = static_cast<u32>(temp_heap_addr);
194 return result;
195}
196
190static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { 197static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) {
191 LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); 198 LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot);
192 199
@@ -371,6 +378,12 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
371 return RESULT_SUCCESS; 378 return RESULT_SUCCESS;
372} 379}
373 380
381static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle,
382 u32 port_name_address) {
383
384 return ConnectToNamedPort(system, out_handle, port_name_address);
385}
386
374/// Makes a blocking IPC call to an OS service. 387/// Makes a blocking IPC call to an OS service.
375static ResultCode SendSyncRequest(Core::System& system, Handle handle) { 388static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
376 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 389 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -390,6 +403,10 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
390 return session->SendSyncRequest(SharedFrom(thread), system.Memory()); 403 return session->SendSyncRequest(SharedFrom(thread), system.Memory());
391} 404}
392 405
406static ResultCode SendSyncRequest32(Core::System& system, Handle handle) {
407 return SendSyncRequest(system, handle);
408}
409
393/// Get the ID for the specified thread. 410/// Get the ID for the specified thread.
394static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) { 411static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) {
395 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); 412 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
@@ -405,6 +422,17 @@ static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle threa
405 return RESULT_SUCCESS; 422 return RESULT_SUCCESS;
406} 423}
407 424
425static ResultCode GetThreadId32(Core::System& system, u32* thread_id_low, u32* thread_id_high,
426 Handle thread_handle) {
427 u64 thread_id{};
428 const ResultCode result{GetThreadId(system, &thread_id, thread_handle)};
429
430 *thread_id_low = static_cast<u32>(thread_id >> 32);
431 *thread_id_high = static_cast<u32>(thread_id & std::numeric_limits<u32>::max());
432
433 return result;
434}
435
408/// Gets the ID of the specified process or a specified thread's owning process. 436/// Gets the ID of the specified process or a specified thread's owning process.
409static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) { 437static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) {
410 LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); 438 LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle);
@@ -479,6 +507,12 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
479 return result; 507 return result;
480} 508}
481 509
510static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address,
511 s32 handle_count, u32 timeout_high, Handle* index) {
512 const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)};
513 return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds);
514}
515
482/// Resumes a thread waiting on WaitSynchronization 516/// Resumes a thread waiting on WaitSynchronization
483static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) { 517static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) {
484 LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); 518 LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle);
@@ -917,6 +951,18 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
917 } 951 }
918} 952}
919 953
954static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low,
955 u32 info_id, u32 handle, u32 sub_id_high) {
956 const u64 sub_id{static_cast<u64>(sub_id_low | (static_cast<u64>(sub_id_high) << 32))};
957 u64 res_value{};
958
959 const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)};
960 *result_high = static_cast<u32>(res_value >> 32);
961 *result_low = static_cast<u32>(res_value & std::numeric_limits<u32>::max());
962
963 return result;
964}
965
920/// Maps memory at a desired address 966/// Maps memory at a desired address
921static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { 967static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
922 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); 968 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
@@ -1058,7 +1104,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
1058 return ERR_BUSY; 1104 return ERR_BUSY;
1059 } 1105 }
1060 1106
1061 Core::ARM_Interface::ThreadContext ctx = thread->GetContext(); 1107 Core::ARM_Interface::ThreadContext64 ctx = thread->GetContext64();
1062 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. 1108 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
1063 ctx.pstate &= 0xFF0FFE20; 1109 ctx.pstate &= 0xFF0FFE20;
1064 1110
@@ -1088,6 +1134,10 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle
1088 return RESULT_SUCCESS; 1134 return RESULT_SUCCESS;
1089} 1135}
1090 1136
1137static ResultCode GetThreadPriority32(Core::System& system, u32* priority, Handle handle) {
1138 return GetThreadPriority(system, priority, handle);
1139}
1140
1091/// Sets the priority for the specified thread 1141/// Sets the priority for the specified thread
1092static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) { 1142static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) {
1093 LOG_TRACE(Kernel_SVC, "called"); 1143 LOG_TRACE(Kernel_SVC, "called");
@@ -1259,6 +1309,11 @@ static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address,
1259 query_address); 1309 query_address);
1260} 1310}
1261 1311
1312static ResultCode QueryMemory32(Core::System& system, u32 memory_info_address,
1313 u32 page_info_address, u32 query_address) {
1314 return QueryMemory(system, memory_info_address, page_info_address, query_address);
1315}
1316
1262static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, 1317static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
1263 u64 src_address, u64 size) { 1318 u64 src_address, u64 size) {
1264 LOG_DEBUG(Kernel_SVC, 1319 LOG_DEBUG(Kernel_SVC,
@@ -1675,6 +1730,10 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1675 } 1730 }
1676} 1731}
1677 1732
1733static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) {
1734 SignalProcessWideKey(system, condition_variable_addr, target);
1735}
1736
1678// Wait for an address (via Address Arbiter) 1737// Wait for an address (via Address Arbiter)
1679static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, 1738static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
1680 s64 timeout) { 1739 s64 timeout) {
@@ -1760,6 +1819,10 @@ static ResultCode CloseHandle(Core::System& system, Handle handle) {
1760 return handle_table.Close(handle); 1819 return handle_table.Close(handle);
1761} 1820}
1762 1821
1822static ResultCode CloseHandle32(Core::System& system, Handle handle) {
1823 return CloseHandle(system, handle);
1824}
1825
1763/// Clears the signaled state of an event or process. 1826/// Clears the signaled state of an event or process.
1764static ResultCode ResetSignal(Core::System& system, Handle handle) { 1827static ResultCode ResetSignal(Core::System& system, Handle handle) {
1765 LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); 1828 LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle);
@@ -2317,69 +2380,196 @@ struct FunctionDef {
2317}; 2380};
2318} // namespace 2381} // namespace
2319 2382
2320static const FunctionDef SVC_Table[] = { 2383static const FunctionDef SVC_Table_32[] = {
2321 {0x00, nullptr, "Unknown"}, 2384 {0x00, nullptr, "Unknown"},
2322 {0x01, SvcWrap<SetHeapSize>, "SetHeapSize"}, 2385 {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"},
2323 {0x02, SvcWrap<SetMemoryPermission>, "SetMemoryPermission"}, 2386 {0x02, nullptr, "Unknown"},
2324 {0x03, SvcWrap<SetMemoryAttribute>, "SetMemoryAttribute"}, 2387 {0x03, nullptr, "SetMemoryAttribute32"},
2325 {0x04, SvcWrap<MapMemory>, "MapMemory"}, 2388 {0x04, nullptr, "MapMemory32"},
2326 {0x05, SvcWrap<UnmapMemory>, "UnmapMemory"}, 2389 {0x05, nullptr, "UnmapMemory32"},
2327 {0x06, SvcWrap<QueryMemory>, "QueryMemory"}, 2390 {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"},
2328 {0x07, SvcWrap<ExitProcess>, "ExitProcess"}, 2391 {0x07, nullptr, "ExitProcess32"},
2329 {0x08, SvcWrap<CreateThread>, "CreateThread"}, 2392 {0x08, nullptr, "CreateThread32"},
2330 {0x09, SvcWrap<StartThread>, "StartThread"}, 2393 {0x09, nullptr, "StartThread32"},
2331 {0x0A, SvcWrap<ExitThread>, "ExitThread"}, 2394 {0x0a, nullptr, "ExitThread32"},
2332 {0x0B, SvcWrap<SleepThread>, "SleepThread"}, 2395 {0x0b, nullptr, "SleepThread32"},
2333 {0x0C, SvcWrap<GetThreadPriority>, "GetThreadPriority"}, 2396 {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"},
2334 {0x0D, SvcWrap<SetThreadPriority>, "SetThreadPriority"}, 2397 {0x0d, nullptr, "SetThreadPriority32"},
2335 {0x0E, SvcWrap<GetThreadCoreMask>, "GetThreadCoreMask"}, 2398 {0x0e, nullptr, "GetThreadCoreMask32"},
2336 {0x0F, SvcWrap<SetThreadCoreMask>, "SetThreadCoreMask"}, 2399 {0x0f, nullptr, "SetThreadCoreMask32"},
2337 {0x10, SvcWrap<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, 2400 {0x10, nullptr, "GetCurrentProcessorNumber32"},
2338 {0x11, SvcWrap<SignalEvent>, "SignalEvent"}, 2401 {0x11, nullptr, "SignalEvent32"},
2339 {0x12, SvcWrap<ClearEvent>, "ClearEvent"}, 2402 {0x12, nullptr, "ClearEvent32"},
2340 {0x13, SvcWrap<MapSharedMemory>, "MapSharedMemory"}, 2403 {0x13, nullptr, "MapSharedMemory32"},
2341 {0x14, SvcWrap<UnmapSharedMemory>, "UnmapSharedMemory"}, 2404 {0x14, nullptr, "UnmapSharedMemory32"},
2342 {0x15, SvcWrap<CreateTransferMemory>, "CreateTransferMemory"}, 2405 {0x15, nullptr, "CreateTransferMemory32"},
2343 {0x16, SvcWrap<CloseHandle>, "CloseHandle"}, 2406 {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"},
2344 {0x17, SvcWrap<ResetSignal>, "ResetSignal"}, 2407 {0x17, nullptr, "ResetSignal32"},
2345 {0x18, SvcWrap<WaitSynchronization>, "WaitSynchronization"}, 2408 {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"},
2346 {0x19, SvcWrap<CancelSynchronization>, "CancelSynchronization"}, 2409 {0x19, nullptr, "CancelSynchronization32"},
2347 {0x1A, SvcWrap<ArbitrateLock>, "ArbitrateLock"}, 2410 {0x1a, nullptr, "ArbitrateLock32"},
2348 {0x1B, SvcWrap<ArbitrateUnlock>, "ArbitrateUnlock"}, 2411 {0x1b, nullptr, "ArbitrateUnlock32"},
2349 {0x1C, SvcWrap<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, 2412 {0x1c, nullptr, "WaitProcessWideKeyAtomic32"},
2350 {0x1D, SvcWrap<SignalProcessWideKey>, "SignalProcessWideKey"}, 2413 {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"},
2351 {0x1E, SvcWrap<GetSystemTick>, "GetSystemTick"}, 2414 {0x1e, nullptr, "GetSystemTick32"},
2352 {0x1F, SvcWrap<ConnectToNamedPort>, "ConnectToNamedPort"}, 2415 {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"},
2416 {0x20, nullptr, "Unknown"},
2417 {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"},
2418 {0x22, nullptr, "SendSyncRequestWithUserBuffer32"},
2419 {0x23, nullptr, "Unknown"},
2420 {0x24, nullptr, "GetProcessId32"},
2421 {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"},
2422 {0x26, nullptr, "Break32"},
2423 {0x27, nullptr, "OutputDebugString32"},
2424 {0x28, nullptr, "Unknown"},
2425 {0x29, SvcWrap32<GetInfo32>, "GetInfo32"},
2426 {0x2a, nullptr, "Unknown"},
2427 {0x2b, nullptr, "Unknown"},
2428 {0x2c, nullptr, "MapPhysicalMemory32"},
2429 {0x2d, nullptr, "UnmapPhysicalMemory32"},
2430 {0x2e, nullptr, "Unknown"},
2431 {0x2f, nullptr, "Unknown"},
2432 {0x30, nullptr, "Unknown"},
2433 {0x31, nullptr, "Unknown"},
2434 {0x32, nullptr, "SetThreadActivity32"},
2435 {0x33, nullptr, "GetThreadContext32"},
2436 {0x34, nullptr, "WaitForAddress32"},
2437 {0x35, nullptr, "SignalToAddress32"},
2438 {0x36, nullptr, "Unknown"},
2439 {0x37, nullptr, "Unknown"},
2440 {0x38, nullptr, "Unknown"},
2441 {0x39, nullptr, "Unknown"},
2442 {0x3a, nullptr, "Unknown"},
2443 {0x3b, nullptr, "Unknown"},
2444 {0x3c, nullptr, "Unknown"},
2445 {0x3d, nullptr, "Unknown"},
2446 {0x3e, nullptr, "Unknown"},
2447 {0x3f, nullptr, "Unknown"},
2448 {0x40, nullptr, "CreateSession32"},
2449 {0x41, nullptr, "AcceptSession32"},
2450 {0x42, nullptr, "Unknown"},
2451 {0x43, nullptr, "ReplyAndReceive32"},
2452 {0x44, nullptr, "Unknown"},
2453 {0x45, nullptr, "CreateEvent32"},
2454 {0x46, nullptr, "Unknown"},
2455 {0x47, nullptr, "Unknown"},
2456 {0x48, nullptr, "Unknown"},
2457 {0x49, nullptr, "Unknown"},
2458 {0x4a, nullptr, "Unknown"},
2459 {0x4b, nullptr, "Unknown"},
2460 {0x4c, nullptr, "Unknown"},
2461 {0x4d, nullptr, "Unknown"},
2462 {0x4e, nullptr, "Unknown"},
2463 {0x4f, nullptr, "Unknown"},
2464 {0x50, nullptr, "Unknown"},
2465 {0x51, nullptr, "Unknown"},
2466 {0x52, nullptr, "Unknown"},
2467 {0x53, nullptr, "Unknown"},
2468 {0x54, nullptr, "Unknown"},
2469 {0x55, nullptr, "Unknown"},
2470 {0x56, nullptr, "Unknown"},
2471 {0x57, nullptr, "Unknown"},
2472 {0x58, nullptr, "Unknown"},
2473 {0x59, nullptr, "Unknown"},
2474 {0x5a, nullptr, "Unknown"},
2475 {0x5b, nullptr, "Unknown"},
2476 {0x5c, nullptr, "Unknown"},
2477 {0x5d, nullptr, "Unknown"},
2478 {0x5e, nullptr, "Unknown"},
2479 {0x5F, nullptr, "FlushProcessDataCache32"},
2480 {0x60, nullptr, "Unknown"},
2481 {0x61, nullptr, "Unknown"},
2482 {0x62, nullptr, "Unknown"},
2483 {0x63, nullptr, "Unknown"},
2484 {0x64, nullptr, "Unknown"},
2485 {0x65, nullptr, "GetProcessList32"},
2486 {0x66, nullptr, "Unknown"},
2487 {0x67, nullptr, "Unknown"},
2488 {0x68, nullptr, "Unknown"},
2489 {0x69, nullptr, "Unknown"},
2490 {0x6A, nullptr, "Unknown"},
2491 {0x6B, nullptr, "Unknown"},
2492 {0x6C, nullptr, "Unknown"},
2493 {0x6D, nullptr, "Unknown"},
2494 {0x6E, nullptr, "Unknown"},
2495 {0x6f, nullptr, "GetSystemInfo32"},
2496 {0x70, nullptr, "CreatePort32"},
2497 {0x71, nullptr, "ManageNamedPort32"},
2498 {0x72, nullptr, "ConnectToPort32"},
2499 {0x73, nullptr, "SetProcessMemoryPermission32"},
2500 {0x74, nullptr, "Unknown"},
2501 {0x75, nullptr, "Unknown"},
2502 {0x76, nullptr, "Unknown"},
2503 {0x77, nullptr, "MapProcessCodeMemory32"},
2504 {0x78, nullptr, "UnmapProcessCodeMemory32"},
2505 {0x79, nullptr, "Unknown"},
2506 {0x7A, nullptr, "Unknown"},
2507 {0x7B, nullptr, "TerminateProcess32"},
2508};
2509
2510static const FunctionDef SVC_Table_64[] = {
2511 {0x00, nullptr, "Unknown"},
2512 {0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"},
2513 {0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"},
2514 {0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"},
2515 {0x04, SvcWrap64<MapMemory>, "MapMemory"},
2516 {0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"},
2517 {0x06, SvcWrap64<QueryMemory>, "QueryMemory"},
2518 {0x07, SvcWrap64<ExitProcess>, "ExitProcess"},
2519 {0x08, SvcWrap64<CreateThread>, "CreateThread"},
2520 {0x09, SvcWrap64<StartThread>, "StartThread"},
2521 {0x0A, SvcWrap64<ExitThread>, "ExitThread"},
2522 {0x0B, SvcWrap64<SleepThread>, "SleepThread"},
2523 {0x0C, SvcWrap64<GetThreadPriority>, "GetThreadPriority"},
2524 {0x0D, SvcWrap64<SetThreadPriority>, "SetThreadPriority"},
2525 {0x0E, SvcWrap64<GetThreadCoreMask>, "GetThreadCoreMask"},
2526 {0x0F, SvcWrap64<SetThreadCoreMask>, "SetThreadCoreMask"},
2527 {0x10, SvcWrap64<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"},
2528 {0x11, SvcWrap64<SignalEvent>, "SignalEvent"},
2529 {0x12, SvcWrap64<ClearEvent>, "ClearEvent"},
2530 {0x13, SvcWrap64<MapSharedMemory>, "MapSharedMemory"},
2531 {0x14, SvcWrap64<UnmapSharedMemory>, "UnmapSharedMemory"},
2532 {0x15, SvcWrap64<CreateTransferMemory>, "CreateTransferMemory"},
2533 {0x16, SvcWrap64<CloseHandle>, "CloseHandle"},
2534 {0x17, SvcWrap64<ResetSignal>, "ResetSignal"},
2535 {0x18, SvcWrap64<WaitSynchronization>, "WaitSynchronization"},
2536 {0x19, SvcWrap64<CancelSynchronization>, "CancelSynchronization"},
2537 {0x1A, SvcWrap64<ArbitrateLock>, "ArbitrateLock"},
2538 {0x1B, SvcWrap64<ArbitrateUnlock>, "ArbitrateUnlock"},
2539 {0x1C, SvcWrap64<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"},
2540 {0x1D, SvcWrap64<SignalProcessWideKey>, "SignalProcessWideKey"},
2541 {0x1E, SvcWrap64<GetSystemTick>, "GetSystemTick"},
2542 {0x1F, SvcWrap64<ConnectToNamedPort>, "ConnectToNamedPort"},
2353 {0x20, nullptr, "SendSyncRequestLight"}, 2543 {0x20, nullptr, "SendSyncRequestLight"},
2354 {0x21, SvcWrap<SendSyncRequest>, "SendSyncRequest"}, 2544 {0x21, SvcWrap64<SendSyncRequest>, "SendSyncRequest"},
2355 {0x22, nullptr, "SendSyncRequestWithUserBuffer"}, 2545 {0x22, nullptr, "SendSyncRequestWithUserBuffer"},
2356 {0x23, nullptr, "SendAsyncRequestWithUserBuffer"}, 2546 {0x23, nullptr, "SendAsyncRequestWithUserBuffer"},
2357 {0x24, SvcWrap<GetProcessId>, "GetProcessId"}, 2547 {0x24, SvcWrap64<GetProcessId>, "GetProcessId"},
2358 {0x25, SvcWrap<GetThreadId>, "GetThreadId"}, 2548 {0x25, SvcWrap64<GetThreadId>, "GetThreadId"},
2359 {0x26, SvcWrap<Break>, "Break"}, 2549 {0x26, SvcWrap64<Break>, "Break"},
2360 {0x27, SvcWrap<OutputDebugString>, "OutputDebugString"}, 2550 {0x27, SvcWrap64<OutputDebugString>, "OutputDebugString"},
2361 {0x28, nullptr, "ReturnFromException"}, 2551 {0x28, nullptr, "ReturnFromException"},
2362 {0x29, SvcWrap<GetInfo>, "GetInfo"}, 2552 {0x29, SvcWrap64<GetInfo>, "GetInfo"},
2363 {0x2A, nullptr, "FlushEntireDataCache"}, 2553 {0x2A, nullptr, "FlushEntireDataCache"},
2364 {0x2B, nullptr, "FlushDataCache"}, 2554 {0x2B, nullptr, "FlushDataCache"},
2365 {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, 2555 {0x2C, SvcWrap64<MapPhysicalMemory>, "MapPhysicalMemory"},
2366 {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, 2556 {0x2D, SvcWrap64<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
2367 {0x2E, nullptr, "GetFutureThreadInfo"}, 2557 {0x2E, nullptr, "GetFutureThreadInfo"},
2368 {0x2F, nullptr, "GetLastThreadInfo"}, 2558 {0x2F, nullptr, "GetLastThreadInfo"},
2369 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, 2559 {0x30, SvcWrap64<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
2370 {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, 2560 {0x31, SvcWrap64<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"},
2371 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, 2561 {0x32, SvcWrap64<SetThreadActivity>, "SetThreadActivity"},
2372 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, 2562 {0x33, SvcWrap64<GetThreadContext>, "GetThreadContext"},
2373 {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, 2563 {0x34, SvcWrap64<WaitForAddress>, "WaitForAddress"},
2374 {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, 2564 {0x35, SvcWrap64<SignalToAddress>, "SignalToAddress"},
2375 {0x36, nullptr, "SynchronizePreemptionState"}, 2565 {0x36, nullptr, "SynchronizePreemptionState"},
2376 {0x37, nullptr, "Unknown"}, 2566 {0x37, nullptr, "Unknown"},
2377 {0x38, nullptr, "Unknown"}, 2567 {0x38, nullptr, "Unknown"},
2378 {0x39, nullptr, "Unknown"}, 2568 {0x39, nullptr, "Unknown"},
2379 {0x3A, nullptr, "Unknown"}, 2569 {0x3A, nullptr, "Unknown"},
2380 {0x3B, nullptr, "Unknown"}, 2570 {0x3B, nullptr, "Unknown"},
2381 {0x3C, SvcWrap<KernelDebug>, "KernelDebug"}, 2571 {0x3C, SvcWrap64<KernelDebug>, "KernelDebug"},
2382 {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"}, 2572 {0x3D, SvcWrap64<ChangeKernelTraceState>, "ChangeKernelTraceState"},
2383 {0x3E, nullptr, "Unknown"}, 2573 {0x3E, nullptr, "Unknown"},
2384 {0x3F, nullptr, "Unknown"}, 2574 {0x3F, nullptr, "Unknown"},
2385 {0x40, nullptr, "CreateSession"}, 2575 {0x40, nullptr, "CreateSession"},
@@ -2387,7 +2577,7 @@ static const FunctionDef SVC_Table[] = {
2387 {0x42, nullptr, "ReplyAndReceiveLight"}, 2577 {0x42, nullptr, "ReplyAndReceiveLight"},
2388 {0x43, nullptr, "ReplyAndReceive"}, 2578 {0x43, nullptr, "ReplyAndReceive"},
2389 {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"}, 2579 {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"},
2390 {0x45, SvcWrap<CreateEvent>, "CreateEvent"}, 2580 {0x45, SvcWrap64<CreateEvent>, "CreateEvent"},
2391 {0x46, nullptr, "Unknown"}, 2581 {0x46, nullptr, "Unknown"},
2392 {0x47, nullptr, "Unknown"}, 2582 {0x47, nullptr, "Unknown"},
2393 {0x48, nullptr, "MapPhysicalMemoryUnsafe"}, 2583 {0x48, nullptr, "MapPhysicalMemoryUnsafe"},
@@ -2398,9 +2588,9 @@ static const FunctionDef SVC_Table[] = {
2398 {0x4D, nullptr, "SleepSystem"}, 2588 {0x4D, nullptr, "SleepSystem"},
2399 {0x4E, nullptr, "ReadWriteRegister"}, 2589 {0x4E, nullptr, "ReadWriteRegister"},
2400 {0x4F, nullptr, "SetProcessActivity"}, 2590 {0x4F, nullptr, "SetProcessActivity"},
2401 {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"}, 2591 {0x50, SvcWrap64<CreateSharedMemory>, "CreateSharedMemory"},
2402 {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"}, 2592 {0x51, SvcWrap64<MapTransferMemory>, "MapTransferMemory"},
2403 {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"}, 2593 {0x52, SvcWrap64<UnmapTransferMemory>, "UnmapTransferMemory"},
2404 {0x53, nullptr, "CreateInterruptEvent"}, 2594 {0x53, nullptr, "CreateInterruptEvent"},
2405 {0x54, nullptr, "QueryPhysicalAddress"}, 2595 {0x54, nullptr, "QueryPhysicalAddress"},
2406 {0x55, nullptr, "QueryIoMapping"}, 2596 {0x55, nullptr, "QueryIoMapping"},
@@ -2419,8 +2609,8 @@ static const FunctionDef SVC_Table[] = {
2419 {0x62, nullptr, "TerminateDebugProcess"}, 2609 {0x62, nullptr, "TerminateDebugProcess"},
2420 {0x63, nullptr, "GetDebugEvent"}, 2610 {0x63, nullptr, "GetDebugEvent"},
2421 {0x64, nullptr, "ContinueDebugEvent"}, 2611 {0x64, nullptr, "ContinueDebugEvent"},
2422 {0x65, SvcWrap<GetProcessList>, "GetProcessList"}, 2612 {0x65, SvcWrap64<GetProcessList>, "GetProcessList"},
2423 {0x66, SvcWrap<GetThreadList>, "GetThreadList"}, 2613 {0x66, SvcWrap64<GetThreadList>, "GetThreadList"},
2424 {0x67, nullptr, "GetDebugThreadContext"}, 2614 {0x67, nullptr, "GetDebugThreadContext"},
2425 {0x68, nullptr, "SetDebugThreadContext"}, 2615 {0x68, nullptr, "SetDebugThreadContext"},
2426 {0x69, nullptr, "QueryDebugProcessMemory"}, 2616 {0x69, nullptr, "QueryDebugProcessMemory"},
@@ -2436,24 +2626,32 @@ static const FunctionDef SVC_Table[] = {
2436 {0x73, nullptr, "SetProcessMemoryPermission"}, 2626 {0x73, nullptr, "SetProcessMemoryPermission"},
2437 {0x74, nullptr, "MapProcessMemory"}, 2627 {0x74, nullptr, "MapProcessMemory"},
2438 {0x75, nullptr, "UnmapProcessMemory"}, 2628 {0x75, nullptr, "UnmapProcessMemory"},
2439 {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"}, 2629 {0x76, SvcWrap64<QueryProcessMemory>, "QueryProcessMemory"},
2440 {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"}, 2630 {0x77, SvcWrap64<MapProcessCodeMemory>, "MapProcessCodeMemory"},
2441 {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, 2631 {0x78, SvcWrap64<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"},
2442 {0x79, nullptr, "CreateProcess"}, 2632 {0x79, nullptr, "CreateProcess"},
2443 {0x7A, nullptr, "StartProcess"}, 2633 {0x7A, nullptr, "StartProcess"},
2444 {0x7B, nullptr, "TerminateProcess"}, 2634 {0x7B, nullptr, "TerminateProcess"},
2445 {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, 2635 {0x7C, SvcWrap64<GetProcessInfo>, "GetProcessInfo"},
2446 {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"}, 2636 {0x7D, SvcWrap64<CreateResourceLimit>, "CreateResourceLimit"},
2447 {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, 2637 {0x7E, SvcWrap64<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"},
2448 {0x7F, nullptr, "CallSecureMonitor"}, 2638 {0x7F, nullptr, "CallSecureMonitor"},
2449}; 2639};
2450 2640
2451static const FunctionDef* GetSVCInfo(u32 func_num) { 2641static const FunctionDef* GetSVCInfo32(u32 func_num) {
2452 if (func_num >= std::size(SVC_Table)) { 2642 if (func_num >= std::size(SVC_Table_32)) {
2643 LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num);
2644 return nullptr;
2645 }
2646 return &SVC_Table_32[func_num];
2647}
2648
2649static const FunctionDef* GetSVCInfo64(u32 func_num) {
2650 if (func_num >= std::size(SVC_Table_64)) {
2453 LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); 2651 LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num);
2454 return nullptr; 2652 return nullptr;
2455 } 2653 }
2456 return &SVC_Table[func_num]; 2654 return &SVC_Table_64[func_num];
2457} 2655}
2458 2656
2459MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 2657MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
@@ -2464,7 +2662,8 @@ void CallSVC(Core::System& system, u32 immediate) {
2464 // Lock the global kernel mutex when we enter the kernel HLE. 2662 // Lock the global kernel mutex when we enter the kernel HLE.
2465 std::lock_guard lock{HLE::g_hle_lock}; 2663 std::lock_guard lock{HLE::g_hle_lock};
2466 2664
2467 const FunctionDef* info = GetSVCInfo(immediate); 2665 const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
2666 : GetSVCInfo32(immediate);
2468 if (info) { 2667 if (info) {
2469 if (info->func) { 2668 if (info->func) {
2470 info->func(system); 2669 info->func(system);
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 29a2cfa9d..7d735e3fa 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -15,6 +15,10 @@ static inline u64 Param(const Core::System& system, int n) {
15 return system.CurrentArmInterface().GetReg(n); 15 return system.CurrentArmInterface().GetReg(n);
16} 16}
17 17
18static inline u32 Param32(const Core::System& system, int n) {
19 return static_cast<u32>(system.CurrentArmInterface().GetReg(n));
20}
21
18/** 22/**
19 * HLE a function return from the current ARM userland process 23 * HLE a function return from the current ARM userland process
20 * @param system System context 24 * @param system System context
@@ -24,40 +28,44 @@ static inline void FuncReturn(Core::System& system, u64 result) {
24 system.CurrentArmInterface().SetReg(0, result); 28 system.CurrentArmInterface().SetReg(0, result);
25} 29}
26 30
31static inline void FuncReturn32(Core::System& system, u32 result) {
32 system.CurrentArmInterface().SetReg(0, (u64)result);
33}
34
27//////////////////////////////////////////////////////////////////////////////////////////////////// 35////////////////////////////////////////////////////////////////////////////////////////////////////
28// Function wrappers that return type ResultCode 36// Function wrappers that return type ResultCode
29 37
30template <ResultCode func(Core::System&, u64)> 38template <ResultCode func(Core::System&, u64)>
31void SvcWrap(Core::System& system) { 39void SvcWrap64(Core::System& system) {
32 FuncReturn(system, func(system, Param(system, 0)).raw); 40 FuncReturn(system, func(system, Param(system, 0)).raw);
33} 41}
34 42
35template <ResultCode func(Core::System&, u64, u64)> 43template <ResultCode func(Core::System&, u64, u64)>
36void SvcWrap(Core::System& system) { 44void SvcWrap64(Core::System& system) {
37 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); 45 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
38} 46}
39 47
40template <ResultCode func(Core::System&, u32)> 48template <ResultCode func(Core::System&, u32)>
41void SvcWrap(Core::System& system) { 49void SvcWrap64(Core::System& system) {
42 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); 50 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
43} 51}
44 52
45template <ResultCode func(Core::System&, u32, u32)> 53template <ResultCode func(Core::System&, u32, u32)>
46void SvcWrap(Core::System& system) { 54void SvcWrap64(Core::System& system) {
47 FuncReturn( 55 FuncReturn(
48 system, 56 system,
49 func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); 57 func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw);
50} 58}
51 59
52template <ResultCode func(Core::System&, u32, u64, u64, u64)> 60template <ResultCode func(Core::System&, u32, u64, u64, u64)>
53void SvcWrap(Core::System& system) { 61void SvcWrap64(Core::System& system) {
54 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), 62 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
55 Param(system, 2), Param(system, 3)) 63 Param(system, 2), Param(system, 3))
56 .raw); 64 .raw);
57} 65}
58 66
59template <ResultCode func(Core::System&, u32*)> 67template <ResultCode func(Core::System&, u32*)>
60void SvcWrap(Core::System& system) { 68void SvcWrap64(Core::System& system) {
61 u32 param = 0; 69 u32 param = 0;
62 const u32 retval = func(system, &param).raw; 70 const u32 retval = func(system, &param).raw;
63 system.CurrentArmInterface().SetReg(1, param); 71 system.CurrentArmInterface().SetReg(1, param);
@@ -65,7 +73,7 @@ void SvcWrap(Core::System& system) {
65} 73}
66 74
67template <ResultCode func(Core::System&, u32*, u32)> 75template <ResultCode func(Core::System&, u32*, u32)>
68void SvcWrap(Core::System& system) { 76void SvcWrap64(Core::System& system) {
69 u32 param_1 = 0; 77 u32 param_1 = 0;
70 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw; 78 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
71 system.CurrentArmInterface().SetReg(1, param_1); 79 system.CurrentArmInterface().SetReg(1, param_1);
@@ -73,7 +81,7 @@ void SvcWrap(Core::System& system) {
73} 81}
74 82
75template <ResultCode func(Core::System&, u32*, u32*)> 83template <ResultCode func(Core::System&, u32*, u32*)>
76void SvcWrap(Core::System& system) { 84void SvcWrap64(Core::System& system) {
77 u32 param_1 = 0; 85 u32 param_1 = 0;
78 u32 param_2 = 0; 86 u32 param_2 = 0;
79 const u32 retval = func(system, &param_1, &param_2).raw; 87 const u32 retval = func(system, &param_1, &param_2).raw;
@@ -86,7 +94,7 @@ void SvcWrap(Core::System& system) {
86} 94}
87 95
88template <ResultCode func(Core::System&, u32*, u64)> 96template <ResultCode func(Core::System&, u32*, u64)>
89void SvcWrap(Core::System& system) { 97void SvcWrap64(Core::System& system) {
90 u32 param_1 = 0; 98 u32 param_1 = 0;
91 const u32 retval = func(system, &param_1, Param(system, 1)).raw; 99 const u32 retval = func(system, &param_1, Param(system, 1)).raw;
92 system.CurrentArmInterface().SetReg(1, param_1); 100 system.CurrentArmInterface().SetReg(1, param_1);
@@ -94,7 +102,7 @@ void SvcWrap(Core::System& system) {
94} 102}
95 103
96template <ResultCode func(Core::System&, u32*, u64, u32)> 104template <ResultCode func(Core::System&, u32*, u64, u32)>
97void SvcWrap(Core::System& system) { 105void SvcWrap64(Core::System& system) {
98 u32 param_1 = 0; 106 u32 param_1 = 0;
99 const u32 retval = 107 const u32 retval =
100 func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw; 108 func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw;
@@ -104,7 +112,7 @@ void SvcWrap(Core::System& system) {
104} 112}
105 113
106template <ResultCode func(Core::System&, u64*, u32)> 114template <ResultCode func(Core::System&, u64*, u32)>
107void SvcWrap(Core::System& system) { 115void SvcWrap64(Core::System& system) {
108 u64 param_1 = 0; 116 u64 param_1 = 0;
109 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw; 117 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
110 118
@@ -113,12 +121,12 @@ void SvcWrap(Core::System& system) {
113} 121}
114 122
115template <ResultCode func(Core::System&, u64, u32)> 123template <ResultCode func(Core::System&, u64, u32)>
116void SvcWrap(Core::System& system) { 124void SvcWrap64(Core::System& system) {
117 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); 125 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
118} 126}
119 127
120template <ResultCode func(Core::System&, u64*, u64)> 128template <ResultCode func(Core::System&, u64*, u64)>
121void SvcWrap(Core::System& system) { 129void SvcWrap64(Core::System& system) {
122 u64 param_1 = 0; 130 u64 param_1 = 0;
123 const u32 retval = func(system, &param_1, Param(system, 1)).raw; 131 const u32 retval = func(system, &param_1, Param(system, 1)).raw;
124 132
@@ -127,7 +135,7 @@ void SvcWrap(Core::System& system) {
127} 135}
128 136
129template <ResultCode func(Core::System&, u64*, u32, u32)> 137template <ResultCode func(Core::System&, u64*, u32, u32)>
130void SvcWrap(Core::System& system) { 138void SvcWrap64(Core::System& system) {
131 u64 param_1 = 0; 139 u64 param_1 = 0;
132 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)), 140 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)),
133 static_cast<u32>(Param(system, 2))) 141 static_cast<u32>(Param(system, 2)))
@@ -138,19 +146,19 @@ void SvcWrap(Core::System& system) {
138} 146}
139 147
140template <ResultCode func(Core::System&, u32, u64)> 148template <ResultCode func(Core::System&, u32, u64)>
141void SvcWrap(Core::System& system) { 149void SvcWrap64(Core::System& system) {
142 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw); 150 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw);
143} 151}
144 152
145template <ResultCode func(Core::System&, u32, u32, u64)> 153template <ResultCode func(Core::System&, u32, u32, u64)>
146void SvcWrap(Core::System& system) { 154void SvcWrap64(Core::System& system) {
147 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), 155 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)),
148 static_cast<u32>(Param(system, 1)), Param(system, 2)) 156 static_cast<u32>(Param(system, 1)), Param(system, 2))
149 .raw); 157 .raw);
150} 158}
151 159
152template <ResultCode func(Core::System&, u32, u32*, u64*)> 160template <ResultCode func(Core::System&, u32, u32*, u64*)>
153void SvcWrap(Core::System& system) { 161void SvcWrap64(Core::System& system) {
154 u32 param_1 = 0; 162 u32 param_1 = 0;
155 u64 param_2 = 0; 163 u64 param_2 = 0;
156 const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2); 164 const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2);
@@ -161,54 +169,54 @@ void SvcWrap(Core::System& system) {
161} 169}
162 170
163template <ResultCode func(Core::System&, u64, u64, u32, u32)> 171template <ResultCode func(Core::System&, u64, u64, u32, u32)>
164void SvcWrap(Core::System& system) { 172void SvcWrap64(Core::System& system) {
165 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), 173 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
166 static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) 174 static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
167 .raw); 175 .raw);
168} 176}
169 177
170template <ResultCode func(Core::System&, u64, u64, u32, u64)> 178template <ResultCode func(Core::System&, u64, u64, u32, u64)>
171void SvcWrap(Core::System& system) { 179void SvcWrap64(Core::System& system) {
172 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), 180 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
173 static_cast<u32>(Param(system, 2)), Param(system, 3)) 181 static_cast<u32>(Param(system, 2)), Param(system, 3))
174 .raw); 182 .raw);
175} 183}
176 184
177template <ResultCode func(Core::System&, u32, u64, u32)> 185template <ResultCode func(Core::System&, u32, u64, u32)>
178void SvcWrap(Core::System& system) { 186void SvcWrap64(Core::System& system) {
179 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), 187 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
180 static_cast<u32>(Param(system, 2))) 188 static_cast<u32>(Param(system, 2)))
181 .raw); 189 .raw);
182} 190}
183 191
184template <ResultCode func(Core::System&, u64, u64, u64)> 192template <ResultCode func(Core::System&, u64, u64, u64)>
185void SvcWrap(Core::System& system) { 193void SvcWrap64(Core::System& system) {
186 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw); 194 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw);
187} 195}
188 196
189template <ResultCode func(Core::System&, u64, u64, u32)> 197template <ResultCode func(Core::System&, u64, u64, u32)>
190void SvcWrap(Core::System& system) { 198void SvcWrap64(Core::System& system) {
191 FuncReturn( 199 FuncReturn(
192 system, 200 system,
193 func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); 201 func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw);
194} 202}
195 203
196template <ResultCode func(Core::System&, u32, u64, u64, u32)> 204template <ResultCode func(Core::System&, u32, u64, u64, u32)>
197void SvcWrap(Core::System& system) { 205void SvcWrap64(Core::System& system) {
198 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), 206 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
199 Param(system, 2), static_cast<u32>(Param(system, 3))) 207 Param(system, 2), static_cast<u32>(Param(system, 3)))
200 .raw); 208 .raw);
201} 209}
202 210
203template <ResultCode func(Core::System&, u32, u64, u64)> 211template <ResultCode func(Core::System&, u32, u64, u64)>
204void SvcWrap(Core::System& system) { 212void SvcWrap64(Core::System& system) {
205 FuncReturn( 213 FuncReturn(
206 system, 214 system,
207 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); 215 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
208} 216}
209 217
210template <ResultCode func(Core::System&, u32*, u64, u64, s64)> 218template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
211void SvcWrap(Core::System& system) { 219void SvcWrap64(Core::System& system) {
212 u32 param_1 = 0; 220 u32 param_1 = 0;
213 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)), 221 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
214 static_cast<s64>(Param(system, 3))) 222 static_cast<s64>(Param(system, 3)))
@@ -219,14 +227,14 @@ void SvcWrap(Core::System& system) {
219} 227}
220 228
221template <ResultCode func(Core::System&, u64, u64, u32, s64)> 229template <ResultCode func(Core::System&, u64, u64, u32, s64)>
222void SvcWrap(Core::System& system) { 230void SvcWrap64(Core::System& system) {
223 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), 231 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
224 static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) 232 static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
225 .raw); 233 .raw);
226} 234}
227 235
228template <ResultCode func(Core::System&, u64*, u64, u64, u64)> 236template <ResultCode func(Core::System&, u64*, u64, u64, u64)>
229void SvcWrap(Core::System& system) { 237void SvcWrap64(Core::System& system) {
230 u64 param_1 = 0; 238 u64 param_1 = 0;
231 const u32 retval = 239 const u32 retval =
232 func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw; 240 func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw;
@@ -236,7 +244,7 @@ void SvcWrap(Core::System& system) {
236} 244}
237 245
238template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)> 246template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)>
239void SvcWrap(Core::System& system) { 247void SvcWrap64(Core::System& system) {
240 u32 param_1 = 0; 248 u32 param_1 = 0;
241 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3), 249 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3),
242 static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5))) 250 static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5)))
@@ -247,7 +255,7 @@ void SvcWrap(Core::System& system) {
247} 255}
248 256
249template <ResultCode func(Core::System&, u32*, u64, u64, u32)> 257template <ResultCode func(Core::System&, u32*, u64, u64, u32)>
250void SvcWrap(Core::System& system) { 258void SvcWrap64(Core::System& system) {
251 u32 param_1 = 0; 259 u32 param_1 = 0;
252 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), 260 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2),
253 static_cast<u32>(Param(system, 3))) 261 static_cast<u32>(Param(system, 3)))
@@ -258,7 +266,7 @@ void SvcWrap(Core::System& system) {
258} 266}
259 267
260template <ResultCode func(Core::System&, Handle*, u64, u32, u32)> 268template <ResultCode func(Core::System&, Handle*, u64, u32, u32)>
261void SvcWrap(Core::System& system) { 269void SvcWrap64(Core::System& system) {
262 u32 param_1 = 0; 270 u32 param_1 = 0;
263 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)), 271 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
264 static_cast<u32>(Param(system, 3))) 272 static_cast<u32>(Param(system, 3)))
@@ -269,14 +277,14 @@ void SvcWrap(Core::System& system) {
269} 277}
270 278
271template <ResultCode func(Core::System&, u64, u32, s32, s64)> 279template <ResultCode func(Core::System&, u64, u32, s32, s64)>
272void SvcWrap(Core::System& system) { 280void SvcWrap64(Core::System& system) {
273 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), 281 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
274 static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) 282 static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
275 .raw); 283 .raw);
276} 284}
277 285
278template <ResultCode func(Core::System&, u64, u32, s32, s32)> 286template <ResultCode func(Core::System&, u64, u32, s32, s32)>
279void SvcWrap(Core::System& system) { 287void SvcWrap64(Core::System& system) {
280 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), 288 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
281 static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) 289 static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
282 .raw); 290 .raw);
@@ -286,7 +294,7 @@ void SvcWrap(Core::System& system) {
286// Function wrappers that return type u32 294// Function wrappers that return type u32
287 295
288template <u32 func(Core::System&)> 296template <u32 func(Core::System&)>
289void SvcWrap(Core::System& system) { 297void SvcWrap64(Core::System& system) {
290 FuncReturn(system, func(system)); 298 FuncReturn(system, func(system));
291} 299}
292 300
@@ -294,7 +302,7 @@ void SvcWrap(Core::System& system) {
294// Function wrappers that return type u64 302// Function wrappers that return type u64
295 303
296template <u64 func(Core::System&)> 304template <u64 func(Core::System&)>
297void SvcWrap(Core::System& system) { 305void SvcWrap64(Core::System& system) {
298 FuncReturn(system, func(system)); 306 FuncReturn(system, func(system));
299} 307}
300 308
@@ -302,44 +310,110 @@ void SvcWrap(Core::System& system) {
302/// Function wrappers that return type void 310/// Function wrappers that return type void
303 311
304template <void func(Core::System&)> 312template <void func(Core::System&)>
305void SvcWrap(Core::System& system) { 313void SvcWrap64(Core::System& system) {
306 func(system); 314 func(system);
307} 315}
308 316
309template <void func(Core::System&, u32)> 317template <void func(Core::System&, u32)>
310void SvcWrap(Core::System& system) { 318void SvcWrap64(Core::System& system) {
311 func(system, static_cast<u32>(Param(system, 0))); 319 func(system, static_cast<u32>(Param(system, 0)));
312} 320}
313 321
314template <void func(Core::System&, u32, u64, u64, u64)> 322template <void func(Core::System&, u32, u64, u64, u64)>
315void SvcWrap(Core::System& system) { 323void SvcWrap64(Core::System& system) {
316 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), 324 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
317 Param(system, 3)); 325 Param(system, 3));
318} 326}
319 327
320template <void func(Core::System&, s64)> 328template <void func(Core::System&, s64)>
321void SvcWrap(Core::System& system) { 329void SvcWrap64(Core::System& system) {
322 func(system, static_cast<s64>(Param(system, 0))); 330 func(system, static_cast<s64>(Param(system, 0)));
323} 331}
324 332
325template <void func(Core::System&, u64, s32)> 333template <void func(Core::System&, u64, s32)>
326void SvcWrap(Core::System& system) { 334void SvcWrap64(Core::System& system) {
327 func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); 335 func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
328} 336}
329 337
330template <void func(Core::System&, u64, u64)> 338template <void func(Core::System&, u64, u64)>
331void SvcWrap(Core::System& system) { 339void SvcWrap64(Core::System& system) {
332 func(system, Param(system, 0), Param(system, 1)); 340 func(system, Param(system, 0), Param(system, 1));
333} 341}
334 342
335template <void func(Core::System&, u64, u64, u64)> 343template <void func(Core::System&, u64, u64, u64)>
336void SvcWrap(Core::System& system) { 344void SvcWrap64(Core::System& system) {
337 func(system, Param(system, 0), Param(system, 1), Param(system, 2)); 345 func(system, Param(system, 0), Param(system, 1), Param(system, 2));
338} 346}
339 347
340template <void func(Core::System&, u32, u64, u64)> 348template <void func(Core::System&, u32, u64, u64)>
341void SvcWrap(Core::System& system) { 349void SvcWrap64(Core::System& system) {
342 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); 350 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
343} 351}
344 352
353// Used by QueryMemory32
354template <ResultCode func(Core::System&, u32, u32, u32)>
355void SvcWrap32(Core::System& system) {
356 FuncReturn32(system,
357 func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw);
358}
359
360// Used by GetInfo32
361template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)>
362void SvcWrap32(Core::System& system) {
363 u32 param_1 = 0;
364 u32 param_2 = 0;
365
366 const u32 retval = func(system, &param_1, &param_2, Param32(system, 0), Param32(system, 1),
367 Param32(system, 2), Param32(system, 3))
368 .raw;
369
370 system.CurrentArmInterface().SetReg(1, param_1);
371 system.CurrentArmInterface().SetReg(2, param_2);
372 FuncReturn(system, retval);
373}
374
375// Used by GetThreadPriority32, ConnectToNamedPort32
376template <ResultCode func(Core::System&, u32*, u32)>
377void SvcWrap32(Core::System& system) {
378 u32 param_1 = 0;
379 const u32 retval = func(system, &param_1, Param32(system, 1)).raw;
380 system.CurrentArmInterface().SetReg(1, param_1);
381 FuncReturn(system, retval);
382}
383
384// Used by GetThreadId32
385template <ResultCode func(Core::System&, u32*, u32*, u32)>
386void SvcWrap32(Core::System& system) {
387 u32 param_1 = 0;
388 u32 param_2 = 0;
389
390 const u32 retval = func(system, &param_1, &param_2, Param32(system, 1)).raw;
391 system.CurrentArmInterface().SetReg(1, param_1);
392 system.CurrentArmInterface().SetReg(2, param_2);
393 FuncReturn(system, retval);
394}
395
396// Used by SignalProcessWideKey32
397template <void func(Core::System&, u32, s32)>
398void SvcWrap32(Core::System& system) {
399 func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1)));
400}
401
402// Used by SendSyncRequest32
403template <ResultCode func(Core::System&, u32)>
404void SvcWrap32(Core::System& system) {
405 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
406}
407
408// Used by WaitSynchronization32
409template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)>
410void SvcWrap32(Core::System& system) {
411 u32 param_1 = 0;
412 const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2),
413 Param32(system, 3), &param_1)
414 .raw;
415 system.CurrentArmInterface().SetReg(1, param_1);
416 FuncReturn(system, retval);
417}
418
345} // namespace Kernel 419} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index bf850e0b2..83e956036 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -133,15 +133,16 @@ void Thread::CancelWait() {
133 ResumeFromWait(); 133 ResumeFromWait();
134} 134}
135 135
136/** 136static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top,
137 * Resets a thread context, making it ready to be scheduled and run by the CPU 137 u32 entry_point, u32 arg) {
138 * @param context Thread context to reset 138 context = {};
139 * @param stack_top Address of the top of the stack 139 context.cpu_registers[0] = arg;
140 * @param entry_point Address of entry point for execution 140 context.cpu_registers[15] = entry_point;
141 * @param arg User argument for thread 141 context.cpu_registers[13] = stack_top;
142 */ 142}
143static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top, 143
144 VAddr entry_point, u64 arg) { 144static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, VAddr stack_top,
145 VAddr entry_point, u64 arg) {
145 context = {}; 146 context = {};
146 context.cpu_registers[0] = arg; 147 context.cpu_registers[0] = arg;
147 context.pc = entry_point; 148 context.pc = entry_point;
@@ -198,9 +199,9 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
198 199
199 thread->owner_process->RegisterThread(thread.get()); 200 thread->owner_process->RegisterThread(thread.get());
200 201
201 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 202 ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
202 // to initialize the context 203 static_cast<u32>(entry_point), static_cast<u32>(arg));
203 ResetThreadContext(thread->context, stack_top, entry_point, arg); 204 ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
204 205
205 return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); 206 return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
206} 207}
@@ -213,11 +214,13 @@ void Thread::SetPriority(u32 priority) {
213} 214}
214 215
215void Thread::SetWaitSynchronizationResult(ResultCode result) { 216void Thread::SetWaitSynchronizationResult(ResultCode result) {
216 context.cpu_registers[0] = result.raw; 217 context_32.cpu_registers[0] = result.raw;
218 context_64.cpu_registers[0] = result.raw;
217} 219}
218 220
219void Thread::SetWaitSynchronizationOutput(s32 output) { 221void Thread::SetWaitSynchronizationOutput(s32 output) {
220 context.cpu_registers[1] = output; 222 context_32.cpu_registers[1] = output;
223 context_64.cpu_registers[1] = output;
221} 224}
222 225
223s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { 226s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 129e7858a..23fdef8a4 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -102,7 +102,8 @@ public:
102 102
103 using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>; 103 using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>;
104 104
105 using ThreadContext = Core::ARM_Interface::ThreadContext; 105 using ThreadContext32 = Core::ARM_Interface::ThreadContext32;
106 using ThreadContext64 = Core::ARM_Interface::ThreadContext64;
106 107
107 using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; 108 using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
108 109
@@ -273,12 +274,20 @@ public:
273 return status == ThreadStatus::WaitSynch; 274 return status == ThreadStatus::WaitSynch;
274 } 275 }
275 276
276 ThreadContext& GetContext() { 277 ThreadContext32& GetContext32() {
277 return context; 278 return context_32;
278 } 279 }
279 280
280 const ThreadContext& GetContext() const { 281 const ThreadContext32& GetContext32() const {
281 return context; 282 return context_32;
283 }
284
285 ThreadContext64& GetContext64() {
286 return context_64;
287 }
288
289 const ThreadContext64& GetContext64() const {
290 return context_64;
282 } 291 }
283 292
284 ThreadStatus GetStatus() const { 293 ThreadStatus GetStatus() const {
@@ -466,7 +475,8 @@ private:
466 void AdjustSchedulingOnPriority(u32 old_priority); 475 void AdjustSchedulingOnPriority(u32 old_priority);
467 void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); 476 void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
468 477
469 Core::ARM_Interface::ThreadContext context{}; 478 ThreadContext32 context_32{};
479 ThreadContext64 context_64{};
470 480
471 u64 thread_id = 0; 481 u64 thread_id = 0;
472 482
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 134152210..437bc5dee 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -191,8 +191,6 @@ void NVFlinger::Compose() {
191 // Search for a queued buffer and acquire it 191 // Search for a queued buffer and acquire it
192 auto buffer = buffer_queue.AcquireBuffer(); 192 auto buffer = buffer_queue.AcquireBuffer();
193 193
194 MicroProfileFlip();
195
196 if (!buffer) { 194 if (!buffer) {
197 continue; 195 continue;
198 } 196 }
@@ -206,6 +204,8 @@ void NVFlinger::Compose() {
206 gpu.WaitFence(fence.id, fence.value); 204 gpu.WaitFence(fence.id, fence.value);
207 } 205 }
208 206
207 MicroProfileFlip();
208
209 // Now send the buffer to the GPU for drawing. 209 // Now send the buffer to the GPU for drawing.
210 // TODO(Subv): Support more than just disp0. The display device selection is probably based 210 // TODO(Subv): Support more than just disp0. The display device selection is probably based
211 // on which display we're drawing (Default, Internal, External, etc) 211 // on which display we're drawing (Default, Internal, External, etc)
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index 5bcc0b588..9e12c76fc 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -111,6 +111,14 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
111 rb.PushEnum(available_language_codes[Settings::values.language_index]); 111 rb.PushEnum(available_language_codes[Settings::values.language_index]);
112} 112}
113 113
114void SET::GetRegionCode(Kernel::HLERequestContext& ctx) {
115 LOG_DEBUG(Service_SET, "called");
116
117 IPC::ResponseBuilder rb{ctx, 3};
118 rb.Push(RESULT_SUCCESS);
119 rb.Push(Settings::values.region_index);
120}
121
114SET::SET() : ServiceFramework("set") { 122SET::SET() : ServiceFramework("set") {
115 // clang-format off 123 // clang-format off
116 static const FunctionInfo functions[] = { 124 static const FunctionInfo functions[] = {
@@ -118,7 +126,7 @@ SET::SET() : ServiceFramework("set") {
118 {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"}, 126 {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"},
119 {2, &SET::MakeLanguageCode, "MakeLanguageCode"}, 127 {2, &SET::MakeLanguageCode, "MakeLanguageCode"},
120 {3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"}, 128 {3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"},
121 {4, nullptr, "GetRegionCode"}, 129 {4, &SET::GetRegionCode, "GetRegionCode"},
122 {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, 130 {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"},
123 {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, 131 {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"},
124 {7, nullptr, "GetKeyCodeMap"}, 132 {7, nullptr, "GetKeyCodeMap"},
diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h
index b154e08aa..6084b345d 100644
--- a/src/core/hle/service/set/set.h
+++ b/src/core/hle/service/set/set.h
@@ -43,6 +43,7 @@ private:
43 void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); 43 void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx);
44 void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); 44 void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx);
45 void GetQuestFlag(Kernel::HLERequestContext& ctx); 45 void GetQuestFlag(Kernel::HLERequestContext& ctx);
46 void GetRegionCode(Kernel::HLERequestContext& ctx);
46}; 47};
47 48
48} // namespace Service::Set 49} // namespace Service::Set
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp
index 57b1a2bca..78d4acd95 100644
--- a/src/core/hle/service/time/time_zone_content_manager.cpp
+++ b/src/core/hle/service/time/time_zone_content_manager.cpp
@@ -53,7 +53,7 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) {
53 return {}; 53 return {};
54 } 54 }
55 55
56 std::vector<char> raw_data(binary_list->GetSize()); 56 std::vector<char> raw_data(binary_list->GetSize() + 1);
57 binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); 57 binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize());
58 58
59 std::stringstream data_stream{raw_data.data()}; 59 std::stringstream data_stream{raw_data.data()};
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index d19c3623c..53559e8b1 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -129,12 +129,6 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
129 } 129 }
130 metadata.Print(); 130 metadata.Print();
131 131
132 const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
133 if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit ||
134 arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) {
135 return {ResultStatus::Error32BitISA, {}};
136 }
137
138 if (process.LoadFromMetadata(metadata).IsError()) { 132 if (process.LoadFromMetadata(metadata).IsError()) {
139 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; 133 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
140 } 134 }
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index f95eee3b1..85ac81ef7 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -111,7 +111,7 @@ json GetProcessorStateDataAuto(Core::System& system) {
111 const auto& vm_manager{process->VMManager()}; 111 const auto& vm_manager{process->VMManager()};
112 auto& arm{system.CurrentArmInterface()}; 112 auto& arm{system.CurrentArmInterface()};
113 113
114 Core::ARM_Interface::ThreadContext context{}; 114 Core::ARM_Interface::ThreadContext64 context{};
115 arm.SaveContext(context); 115 arm.SaveContext(context);
116 116
117 return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", 117 return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32",
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 7c0303684..c1282cb80 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -86,6 +86,7 @@ void LogSettings() {
86 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); 86 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
87 LogSetting("System_CurrentUser", Settings::values.current_user); 87 LogSetting("System_CurrentUser", Settings::values.current_user);
88 LogSetting("System_LanguageIndex", Settings::values.language_index); 88 LogSetting("System_LanguageIndex", Settings::values.language_index);
89 LogSetting("System_RegionIndex", Settings::values.region_index);
89 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); 90 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
90 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
91 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
diff --git a/src/core/settings.h b/src/core/settings.h
index 15b691342..79ec01731 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -387,6 +387,8 @@ struct Values {
387 387
388 s32 current_user; 388 s32 current_user;
389 s32 language_index; 389 s32 language_index;
390 s32 region_index;
391 s32 sound_index;
390 392
391 // Controls 393 // Controls
392 std::array<PlayerInput, 10> players; 394 std::array<PlayerInput, 10> players;
@@ -430,6 +432,7 @@ struct Values {
430 432
431 float resolution_factor; 433 float resolution_factor;
432 int aspect_ratio; 434 int aspect_ratio;
435 int max_anisotropy;
433 bool use_frame_limit; 436 bool use_frame_limit;
434 u16 frame_limit; 437 u16 frame_limit;
435 bool use_disk_shader_cache; 438 bool use_disk_shader_cache;
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index 2228571a6..da5227058 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -32,8 +32,16 @@ public:
32 SocketCallback callback) 32 SocketCallback callback)
33 : callback(std::move(callback)), timer(io_service), 33 : callback(std::move(callback)), timer(io_service),
34 socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), 34 socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id),
35 pad_index(pad_index), 35 pad_index(pad_index) {
36 send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} 36 boost::system::error_code ec{};
37 auto ipv4 = boost::asio::ip::make_address_v4(host, ec);
38 if (ec.value() != boost::system::errc::success) {
39 LOG_ERROR(Input, "Invalid IPv4 address \"{}\" provided to socket", host);
40 ipv4 = boost::asio::ip::address_v4{};
41 }
42
43 send_endpoint = {udp::endpoint(ipv4, port)};
44 }
37 45
38 void Stop() { 46 void Stop() {
39 io_service.stop(); 47 io_service.stop();
@@ -85,17 +93,18 @@ private:
85 } 93 }
86 94
87 void HandleSend(const boost::system::error_code& error) { 95 void HandleSend(const boost::system::error_code& error) {
96 boost::system::error_code _ignored{};
88 // Send a request for getting port info for the pad 97 // Send a request for getting port info for the pad
89 Request::PortInfo port_info{1, {pad_index, 0, 0, 0}}; 98 Request::PortInfo port_info{1, {pad_index, 0, 0, 0}};
90 const auto port_message = Request::Create(port_info, client_id); 99 const auto port_message = Request::Create(port_info, client_id);
91 std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE); 100 std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE);
92 socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint); 101 socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint, {}, _ignored);
93 102
94 // Send a request for getting pad data for the pad 103 // Send a request for getting pad data for the pad
95 Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS}; 104 Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS};
96 const auto pad_message = Request::Create(pad_data, client_id); 105 const auto pad_message = Request::Create(pad_data, client_id);
97 std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE); 106 std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE);
98 socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint); 107 socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint, {}, _ignored);
99 StartSend(timer.expiry()); 108 StartSend(timer.expiry());
100 } 109 }
101 110
diff --git a/src/input_common/udp/protocol.cpp b/src/input_common/udp/protocol.cpp
index a982ac49d..5e50bd612 100644
--- a/src/input_common/udp/protocol.cpp
+++ b/src/input_common/udp/protocol.cpp
@@ -31,7 +31,6 @@ namespace Response {
31 */ 31 */
32std::optional<Type> Validate(u8* data, std::size_t size) { 32std::optional<Type> Validate(u8* data, std::size_t size) {
33 if (size < sizeof(Header)) { 33 if (size < sizeof(Header)) {
34 LOG_DEBUG(Input, "Invalid UDP packet received");
35 return std::nullopt; 34 return std::nullopt;
36 } 35 }
37 Header header{}; 36 Header header{};
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp
index ca99cc22f..8c6ef1394 100644
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#include <mutex>
6#include <optional>
6#include <tuple> 7#include <tuple>
7 8
8#include "common/param_package.h" 9#include "common/param_package.h"
@@ -44,7 +45,7 @@ public:
44 std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { 45 std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override {
45 { 46 {
46 std::lock_guard guard(status->update_mutex); 47 std::lock_guard guard(status->update_mutex);
47 status->touch_calibration.emplace(); 48 status->touch_calibration = DeviceStatus::CalibrationData{};
48 // These default values work well for DS4 but probably not other touch inputs 49 // These default values work well for DS4 but probably not other touch inputs
49 status->touch_calibration->min_x = params.Get("min_x", 100); 50 status->touch_calibration->min_x = params.Get("min_x", 100);
50 status->touch_calibration->min_y = params.Get("min_y", 50); 51 status->touch_calibration->min_y = params.Get("min_y", 50);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4b0c6346f..91df062d7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,6 +2,8 @@ add_library(video_core STATIC
2 buffer_cache/buffer_block.h 2 buffer_cache/buffer_block.h
3 buffer_cache/buffer_cache.h 3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.h 4 buffer_cache/map_interval.h
5 dirty_flags.cpp
6 dirty_flags.h
5 dma_pusher.cpp 7 dma_pusher.cpp
6 dma_pusher.h 8 dma_pusher.h
7 engines/const_buffer_engine_interface.h 9 engines/const_buffer_engine_interface.h
@@ -63,14 +65,12 @@ add_library(video_core STATIC
63 renderer_opengl/gl_shader_decompiler.h 65 renderer_opengl/gl_shader_decompiler.h
64 renderer_opengl/gl_shader_disk_cache.cpp 66 renderer_opengl/gl_shader_disk_cache.cpp
65 renderer_opengl/gl_shader_disk_cache.h 67 renderer_opengl/gl_shader_disk_cache.h
66 renderer_opengl/gl_shader_gen.cpp
67 renderer_opengl/gl_shader_gen.h
68 renderer_opengl/gl_shader_manager.cpp 68 renderer_opengl/gl_shader_manager.cpp
69 renderer_opengl/gl_shader_manager.h 69 renderer_opengl/gl_shader_manager.h
70 renderer_opengl/gl_shader_util.cpp 70 renderer_opengl/gl_shader_util.cpp
71 renderer_opengl/gl_shader_util.h 71 renderer_opengl/gl_shader_util.h
72 renderer_opengl/gl_state.cpp 72 renderer_opengl/gl_state_tracker.cpp
73 renderer_opengl/gl_state.h 73 renderer_opengl/gl_state_tracker.h
74 renderer_opengl/gl_stream_buffer.cpp 74 renderer_opengl/gl_stream_buffer.cpp
75 renderer_opengl/gl_stream_buffer.h 75 renderer_opengl/gl_stream_buffer.h
76 renderer_opengl/gl_texture_cache.cpp 76 renderer_opengl/gl_texture_cache.cpp
@@ -116,8 +116,6 @@ add_library(video_core STATIC
116 shader/ast.h 116 shader/ast.h
117 shader/compiler_settings.cpp 117 shader/compiler_settings.cpp
118 shader/compiler_settings.h 118 shader/compiler_settings.h
119 shader/const_buffer_locker.cpp
120 shader/const_buffer_locker.h
121 shader/control_flow.cpp 119 shader/control_flow.cpp
122 shader/control_flow.h 120 shader/control_flow.h
123 shader/decode.cpp 121 shader/decode.cpp
@@ -126,9 +124,13 @@ add_library(video_core STATIC
126 shader/node_helper.cpp 124 shader/node_helper.cpp
127 shader/node_helper.h 125 shader/node_helper.h
128 shader/node.h 126 shader/node.h
127 shader/registry.cpp
128 shader/registry.h
129 shader/shader_ir.cpp 129 shader/shader_ir.cpp
130 shader/shader_ir.h 130 shader/shader_ir.h
131 shader/track.cpp 131 shader/track.cpp
132 shader/transform_feedback.cpp
133 shader/transform_feedback.h
132 surface.cpp 134 surface.cpp
133 surface.h 135 surface.h
134 texture_cache/format_lookup_table.cpp 136 texture_cache/format_lookup_table.cpp
@@ -198,6 +200,8 @@ if (ENABLE_VULKAN)
198 renderer_vulkan/vk_shader_util.h 200 renderer_vulkan/vk_shader_util.h
199 renderer_vulkan/vk_staging_buffer_pool.cpp 201 renderer_vulkan/vk_staging_buffer_pool.cpp
200 renderer_vulkan/vk_staging_buffer_pool.h 202 renderer_vulkan/vk_staging_buffer_pool.h
203 renderer_vulkan/vk_state_tracker.cpp
204 renderer_vulkan/vk_state_tracker.h
201 renderer_vulkan/vk_stream_buffer.cpp 205 renderer_vulkan/vk_stream_buffer.cpp
202 renderer_vulkan/vk_stream_buffer.h 206 renderer_vulkan/vk_stream_buffer.h
203 renderer_vulkan/vk_swapchain.cpp 207 renderer_vulkan/vk_swapchain.cpp
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
new file mode 100644
index 000000000..e16075993
--- /dev/null
+++ b/src/video_core/dirty_flags.cpp
@@ -0,0 +1,38 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstddef>
7
8#include "common/common_types.h"
9#include "video_core/dirty_flags.h"
10
11#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32))
13
14namespace VideoCommon::Dirty {
15
16using Tegra::Engines::Maxwell3D;
17
18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
19 static constexpr std::size_t num_per_rt = NUM(rt[0]);
20 static constexpr std::size_t begin = OFF(rt);
21 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
22 for (std::size_t rt = 0; rt < Maxwell3D::Regs::NumRenderTargets; ++rt) {
23 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
24 }
25 FillBlock(tables[1], begin, num, RenderTargets);
26
27 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
28 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
29 const u8 flag = zeta_flags[i];
30 auto& table = tables[i];
31 table[OFF(zeta_enable)] = flag;
32 table[OFF(zeta_width)] = flag;
33 table[OFF(zeta_height)] = flag;
34 FillBlock(table, OFF(zeta), NUM(zeta), flag);
35 }
36}
37
38} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
new file mode 100644
index 000000000..3f6c1d83a
--- /dev/null
+++ b/src/video_core/dirty_flags.h
@@ -0,0 +1,49 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <cstddef>
9#include <iterator>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13
14namespace VideoCommon::Dirty {
15
16enum : u8 {
17 NullEntry = 0,
18
19 RenderTargets,
20 ColorBuffer0,
21 ColorBuffer1,
22 ColorBuffer2,
23 ColorBuffer3,
24 ColorBuffer4,
25 ColorBuffer5,
26 ColorBuffer6,
27 ColorBuffer7,
28 ZetaBuffer,
29
30 LastCommonEntry,
31};
32
33template <typename Integer>
34void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Table& table, std::size_t begin,
35 std::size_t num, Integer dirty_index) {
36 const auto it = std::begin(table) + begin;
37 std::fill(it, it + num, static_cast<u8>(dirty_index));
38}
39
40template <typename Integer1, typename Integer2>
41void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_t begin,
42 std::size_t num, Integer1 index_a, Integer2 index_b) {
43 FillBlock(tables[0], begin, num, index_a);
44 FillBlock(tables[1], begin, num, index_b);
45}
46
47void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
48
49} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 0094fd715..713c14182 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 // On entering GPU code, assume all memory may be touched by the ARM core. 24 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().dirty.OnMemoryWrite(); 25 gpu.Maxwell3D().OnMemoryWrite();
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index d56a47710..724ee0fd6 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -16,11 +16,12 @@ namespace Tegra::Engines {
16 16
17struct SamplerDescriptor { 17struct SamplerDescriptor {
18 union { 18 union {
19 BitField<0, 20, Tegra::Shader::TextureType> texture_type; 19 u32 raw = 0;
20 BitField<20, 1, u32> is_array; 20 BitField<0, 2, Tegra::Shader::TextureType> texture_type;
21 BitField<21, 1, u32> is_buffer; 21 BitField<2, 3, Tegra::Texture::ComponentType> component_type;
22 BitField<22, 1, u32> is_shadow; 22 BitField<5, 1, u32> is_array;
23 u32 raw{}; 23 BitField<6, 1, u32> is_buffer;
24 BitField<7, 1, u32> is_shadow;
24 }; 25 };
25 26
26 bool operator==(const SamplerDescriptor& rhs) const noexcept { 27 bool operator==(const SamplerDescriptor& rhs) const noexcept {
@@ -31,68 +32,48 @@ struct SamplerDescriptor {
31 return !operator==(rhs); 32 return !operator==(rhs);
32 } 33 }
33 34
34 static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { 35 static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
36 using Tegra::Shader::TextureType;
35 SamplerDescriptor result; 37 SamplerDescriptor result;
36 switch (tic_texture_type) { 38
39 // This is going to be used to determine the shading language type.
40 // Because of that we don't care about all component types on color textures.
41 result.component_type.Assign(tic.r_type.Value());
42
43 switch (tic.texture_type.Value()) {
37 case Tegra::Texture::TextureType::Texture1D: 44 case Tegra::Texture::TextureType::Texture1D:
38 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 45 result.texture_type.Assign(TextureType::Texture1D);
39 result.is_array.Assign(0);
40 result.is_buffer.Assign(0);
41 result.is_shadow.Assign(0);
42 return result; 46 return result;
43 case Tegra::Texture::TextureType::Texture2D: 47 case Tegra::Texture::TextureType::Texture2D:
44 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 48 result.texture_type.Assign(TextureType::Texture2D);
45 result.is_array.Assign(0);
46 result.is_buffer.Assign(0);
47 result.is_shadow.Assign(0);
48 return result; 49 return result;
49 case Tegra::Texture::TextureType::Texture3D: 50 case Tegra::Texture::TextureType::Texture3D:
50 result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); 51 result.texture_type.Assign(TextureType::Texture3D);
51 result.is_array.Assign(0);
52 result.is_buffer.Assign(0);
53 result.is_shadow.Assign(0);
54 return result; 52 return result;
55 case Tegra::Texture::TextureType::TextureCubemap: 53 case Tegra::Texture::TextureType::TextureCubemap:
56 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); 54 result.texture_type.Assign(TextureType::TextureCube);
57 result.is_array.Assign(0);
58 result.is_buffer.Assign(0);
59 result.is_shadow.Assign(0);
60 return result; 55 return result;
61 case Tegra::Texture::TextureType::Texture1DArray: 56 case Tegra::Texture::TextureType::Texture1DArray:
62 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 57 result.texture_type.Assign(TextureType::Texture1D);
63 result.is_array.Assign(1); 58 result.is_array.Assign(1);
64 result.is_buffer.Assign(0);
65 result.is_shadow.Assign(0);
66 return result; 59 return result;
67 case Tegra::Texture::TextureType::Texture2DArray: 60 case Tegra::Texture::TextureType::Texture2DArray:
68 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 61 result.texture_type.Assign(TextureType::Texture2D);
69 result.is_array.Assign(1); 62 result.is_array.Assign(1);
70 result.is_buffer.Assign(0);
71 result.is_shadow.Assign(0);
72 return result; 63 return result;
73 case Tegra::Texture::TextureType::Texture1DBuffer: 64 case Tegra::Texture::TextureType::Texture1DBuffer:
74 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 65 result.texture_type.Assign(TextureType::Texture1D);
75 result.is_array.Assign(0);
76 result.is_buffer.Assign(1); 66 result.is_buffer.Assign(1);
77 result.is_shadow.Assign(0);
78 return result; 67 return result;
79 case Tegra::Texture::TextureType::Texture2DNoMipmap: 68 case Tegra::Texture::TextureType::Texture2DNoMipmap:
80 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 69 result.texture_type.Assign(TextureType::Texture2D);
81 result.is_array.Assign(0);
82 result.is_buffer.Assign(0);
83 result.is_shadow.Assign(0);
84 return result; 70 return result;
85 case Tegra::Texture::TextureType::TextureCubeArray: 71 case Tegra::Texture::TextureType::TextureCubeArray:
86 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); 72 result.texture_type.Assign(TextureType::TextureCube);
87 result.is_array.Assign(1); 73 result.is_array.Assign(1);
88 result.is_buffer.Assign(0);
89 result.is_shadow.Assign(0);
90 return result; 74 return result;
91 default: 75 default:
92 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 76 result.texture_type.Assign(TextureType::Texture2D);
93 result.is_array.Assign(0);
94 result.is_buffer.Assign(0);
95 result.is_shadow.Assign(0);
96 return result; 77 return result;
97 } 78 }
98 } 79 }
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4b824aa4e..368c75a66 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -39,7 +39,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
39 const bool is_last_call = method_call.IsLastCall(); 39 const bool is_last_call = method_call.IsLastCall();
40 upload_state.ProcessData(method_call.argument, is_last_call); 40 upload_state.ProcessData(method_call.argument, is_last_call);
41 if (is_last_call) { 41 if (is_last_call) {
42 system.GPU().Maxwell3D().dirty.OnMemoryWrite(); 42 system.GPU().Maxwell3D().OnMemoryWrite();
43 } 43 }
44 break; 44 break;
45 } 45 }
@@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
89 89
90 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 90 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
91 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 91 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
92 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); 92 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
93 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 93 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
94 return result; 94 return result;
95} 95}
@@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
119 Texture::TICEntry tic_entry; 119 Texture::TICEntry tic_entry;
120 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 120 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
121 121
122 const auto r_type{tic_entry.r_type.Value()};
123 const auto g_type{tic_entry.g_type.Value()};
124 const auto b_type{tic_entry.b_type.Value()};
125 const auto a_type{tic_entry.a_type.Value()};
126
127 // TODO(Subv): Different data types for separate components are not supported
128 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
129
130 return tic_entry; 122 return tic_entry;
131} 123}
132 124
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index fa4a7c5c1..597872e43 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty.OnMemoryWrite(); 37 system.GPU().Maxwell3D().OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b28de1092..ce536e29b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -26,7 +26,8 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
26 MemoryManager& memory_manager) 26 MemoryManager& memory_manager)
27 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 27 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
28 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 28 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
29 InitDirtySettings(); 29 dirty.flags.flip();
30
30 InitializeRegisterDefaults(); 31 InitializeRegisterDefaults();
31} 32}
32 33
@@ -75,8 +76,8 @@ void Maxwell3D::InitializeRegisterDefaults() {
75 regs.stencil_back_mask = 0xFFFFFFFF; 76 regs.stencil_back_mask = 0xFFFFFFFF;
76 77
77 regs.depth_test_func = Regs::ComparisonOp::Always; 78 regs.depth_test_func = Regs::ComparisonOp::Always;
78 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; 79 regs.front_face = Regs::FrontFace::CounterClockWise;
79 regs.cull.cull_face = Regs::Cull::CullFace::Back; 80 regs.cull_face = Regs::CullFace::Back;
80 81
81 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 82 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
82 // register carrying a default value. Assume it's OpenGL's default (1). 83 // register carrying a default value. Assume it's OpenGL's default (1).
@@ -95,7 +96,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
95 regs.rasterize_enable = 1; 96 regs.rasterize_enable = 1;
96 regs.rt_separate_frag_data = 1; 97 regs.rt_separate_frag_data = 1;
97 regs.framebuffer_srgb = 1; 98 regs.framebuffer_srgb = 1;
98 regs.cull.front_face = Maxwell3D::Regs::Cull::FrontFace::ClockWise; 99 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
99 100
100 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; 101 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
101 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; 102 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
@@ -103,164 +104,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
103 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; 104 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
104} 105}
105 106
106#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
107
108void Maxwell3D::InitDirtySettings() {
109 const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
110 const auto start_itr = dirty_pointers.begin() + start;
111 const auto end_itr = start_itr + range;
112 std::fill(start_itr, end_itr, position);
113 };
114 dirty.regs.fill(true);
115
116 // Init Render Targets
117 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
118 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
119 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
120 u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
121 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
122 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
123 ++rt_dirty_reg;
124 }
125 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
126 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
127 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
128 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
129 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
130 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
131 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
132
133 // Init Vertex Arrays
134 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
135 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
136 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
137 u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
138 u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
139 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
140 vertex_reg += vertex_array_size) {
141 set_block(vertex_reg, 3, va_dirty_reg);
142 // The divisor concerns vertex array instances
143 dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
144 ++va_dirty_reg;
145 ++vi_dirty_reg;
146 }
147 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
148 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
149 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
150 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
151 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
152 vertex_reg += vertex_limit_size) {
153 set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
154 va_dirty_reg++;
155 }
156 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
157 constexpr u32 vertex_instance_size =
158 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
159 constexpr u32 vertex_instance_end =
160 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
161 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
162 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
163 vertex_reg += vertex_instance_size) {
164 set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
165 vi_dirty_reg++;
166 }
167 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
168 DIRTY_REGS_POS(vertex_attrib_format));
169
170 // Init Shaders
171 constexpr u32 shader_registers_count =
172 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
173 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
174 DIRTY_REGS_POS(shaders));
175
176 // State
177
178 // Viewport
179 constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
180 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
181 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
182 set_block(viewport_start, viewport_size, viewport_dirty_reg);
183 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
184 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
185 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
186
187 // Viewport transformation
188 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
189 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
190 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
191
192 // Cullmode
193 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
194 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
195 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
196
197 // Screen y control
198 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
199
200 // Primitive Restart
201 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
202 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
203 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
204
205 // Depth Test
206 constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
207 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
210
211 // Stencil Test
212 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
217 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
218 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
219 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
220 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
221 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
222 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
223 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
224 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
225 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
226 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
227 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
228 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
229
230 // Color Mask
231 constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
232 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
233 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
234 color_mask_dirty_reg);
235 // Blend State
236 constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
237 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
238 blend_state_dirty_reg);
239 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
240 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
241 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
242 blend_state_dirty_reg);
243
244 // Scissor State
245 constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
246 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
247 scissor_test_dirty_reg);
248
249 // Polygon Offset
250 constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
251 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
252 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
253 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
254 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
255 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
256 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
257
258 // Depth bounds
259 constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
260 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
261 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
262}
263
264void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { 107void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
265 // Reset the current macro. 108 // Reset the current macro.
266 executing_macro = 0; 109 executing_macro = 0;
@@ -319,19 +162,9 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
319 162
320 if (regs.reg_array[method] != method_call.argument) { 163 if (regs.reg_array[method] != method_call.argument) {
321 regs.reg_array[method] = method_call.argument; 164 regs.reg_array[method] = method_call.argument;
322 const std::size_t dirty_reg = dirty_pointers[method]; 165
323 if (dirty_reg) { 166 for (const auto& table : dirty.tables) {
324 dirty.regs[dirty_reg] = true; 167 dirty.flags[table[method]] = true;
325 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
326 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
327 dirty.vertex_array_buffers = true;
328 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
329 dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
330 dirty.vertex_instances = true;
331 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
332 dirty_reg < DIRTY_REGS_POS(render_settings)) {
333 dirty.render_settings = true;
334 }
335 } 168 }
336 } 169 }
337 170
@@ -419,7 +252,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
419 const bool is_last_call = method_call.IsLastCall(); 252 const bool is_last_call = method_call.IsLastCall();
420 upload_state.ProcessData(method_call.argument, is_last_call); 253 upload_state.ProcessData(method_call.argument, is_last_call);
421 if (is_last_call) { 254 if (is_last_call) {
422 dirty.OnMemoryWrite(); 255 OnMemoryWrite();
423 } 256 }
424 break; 257 break;
425 } 258 }
@@ -727,7 +560,7 @@ void Maxwell3D::FinishCBData() {
727 560
728 const u32 id = cb_data_state.id; 561 const u32 id = cb_data_state.id;
729 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); 562 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
730 dirty.OnMemoryWrite(); 563 OnMemoryWrite();
731 564
732 cb_data_state.id = null_cb_data; 565 cb_data_state.id = null_cb_data;
733 cb_data_state.current = null_cb_data; 566 cb_data_state.current = null_cb_data;
@@ -805,7 +638,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
805 638
806 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 639 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
807 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 640 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
808 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); 641 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
809 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 642 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
810 return result; 643 return result;
811} 644}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 6ea7cc6a5..8a9e9992e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <bitset> 8#include <bitset>
9#include <limits>
9#include <optional> 10#include <optional>
10#include <type_traits> 11#include <type_traits>
11#include <unordered_map> 12#include <unordered_map>
@@ -66,6 +67,7 @@ public:
66 static constexpr std::size_t NumVaryings = 31; 67 static constexpr std::size_t NumVaryings = 31;
67 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number 68 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
68 static constexpr std::size_t NumClipDistances = 8; 69 static constexpr std::size_t NumClipDistances = 8;
70 static constexpr std::size_t NumTransformFeedbackBuffers = 4;
69 static constexpr std::size_t MaxShaderProgram = 6; 71 static constexpr std::size_t MaxShaderProgram = 6;
70 static constexpr std::size_t MaxShaderStage = 5; 72 static constexpr std::size_t MaxShaderStage = 5;
71 // Maximum number of const buffers per shader stage. 73 // Maximum number of const buffers per shader stage.
@@ -431,21 +433,15 @@ public:
431 GeneratedPrimitives = 0x1F, 433 GeneratedPrimitives = 0x1F,
432 }; 434 };
433 435
434 struct Cull { 436 enum class FrontFace : u32 {
435 enum class FrontFace : u32 { 437 ClockWise = 0x0900,
436 ClockWise = 0x0900, 438 CounterClockWise = 0x0901,
437 CounterClockWise = 0x0901, 439 };
438 };
439
440 enum class CullFace : u32 {
441 Front = 0x0404,
442 Back = 0x0405,
443 FrontAndBack = 0x0408,
444 };
445 440
446 u32 enabled; 441 enum class CullFace : u32 {
447 FrontFace front_face; 442 Front = 0x0404,
448 CullFace cull_face; 443 Back = 0x0405,
444 FrontAndBack = 0x0408,
449 }; 445 };
450 446
451 struct Blend { 447 struct Blend {
@@ -529,6 +525,12 @@ public:
529 FractionalEven = 2, 525 FractionalEven = 2,
530 }; 526 };
531 527
528 enum class PolygonMode : u32 {
529 Point = 0x1b00,
530 Line = 0x1b01,
531 Fill = 0x1b02,
532 };
533
532 struct RenderTargetConfig { 534 struct RenderTargetConfig {
533 u32 address_high; 535 u32 address_high;
534 u32 address_low; 536 u32 address_low;
@@ -574,7 +576,7 @@ public:
574 f32 translate_z; 576 f32 translate_z;
575 INSERT_UNION_PADDING_WORDS(2); 577 INSERT_UNION_PADDING_WORDS(2);
576 578
577 Common::Rectangle<s32> GetRect() const { 579 Common::Rectangle<f32> GetRect() const {
578 return { 580 return {
579 GetX(), // left 581 GetX(), // left
580 GetY() + GetHeight(), // top 582 GetY() + GetHeight(), // top
@@ -583,20 +585,20 @@ public:
583 }; 585 };
584 }; 586 };
585 587
586 s32 GetX() const { 588 f32 GetX() const {
587 return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x))); 589 return std::max(0.0f, translate_x - std::fabs(scale_x));
588 } 590 }
589 591
590 s32 GetY() const { 592 f32 GetY() const {
591 return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y))); 593 return std::max(0.0f, translate_y - std::fabs(scale_y));
592 } 594 }
593 595
594 s32 GetWidth() const { 596 f32 GetWidth() const {
595 return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX(); 597 return translate_x + std::fabs(scale_x) - GetX();
596 } 598 }
597 599
598 s32 GetHeight() const { 600 f32 GetHeight() const {
599 return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY(); 601 return translate_y + std::fabs(scale_y) - GetY();
600 } 602 }
601 }; 603 };
602 604
@@ -626,6 +628,29 @@ public:
626 float depth_range_far; 628 float depth_range_far;
627 }; 629 };
628 630
631 struct TransformFeedbackBinding {
632 u32 buffer_enable;
633 u32 address_high;
634 u32 address_low;
635 s32 buffer_size;
636 s32 buffer_offset;
637 INSERT_UNION_PADDING_WORDS(3);
638
639 GPUVAddr Address() const {
640 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
641 address_low);
642 }
643 };
644 static_assert(sizeof(TransformFeedbackBinding) == 32);
645
646 struct TransformFeedbackLayout {
647 u32 stream;
648 u32 varying_count;
649 u32 stride;
650 INSERT_UNION_PADDING_WORDS(1);
651 };
652 static_assert(sizeof(TransformFeedbackLayout) == 16);
653
629 bool IsShaderConfigEnabled(std::size_t index) const { 654 bool IsShaderConfigEnabled(std::size_t index) const {
630 // The VertexB is always enabled. 655 // The VertexB is always enabled.
631 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { 656 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -634,6 +659,10 @@ public:
634 return shader_config[index].enable != 0; 659 return shader_config[index].enable != 0;
635 } 660 }
636 661
662 bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
663 return IsShaderConfigEnabled(static_cast<std::size_t>(type));
664 }
665
637 union { 666 union {
638 struct { 667 struct {
639 INSERT_UNION_PADDING_WORDS(0x45); 668 INSERT_UNION_PADDING_WORDS(0x45);
@@ -682,7 +711,13 @@ public:
682 711
683 u32 rasterize_enable; 712 u32 rasterize_enable;
684 713
685 INSERT_UNION_PADDING_WORDS(0xF1); 714 std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
715
716 INSERT_UNION_PADDING_WORDS(0xC0);
717
718 std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
719
720 INSERT_UNION_PADDING_WORDS(0x1);
686 721
687 u32 tfb_enabled; 722 u32 tfb_enabled;
688 723
@@ -710,7 +745,12 @@ public:
710 745
711 s32 clear_stencil; 746 s32 clear_stencil;
712 747
713 INSERT_UNION_PADDING_WORDS(0x7); 748 INSERT_UNION_PADDING_WORDS(0x2);
749
750 PolygonMode polygon_mode_front;
751 PolygonMode polygon_mode_back;
752
753 INSERT_UNION_PADDING_WORDS(0x3);
714 754
715 u32 polygon_offset_point_enable; 755 u32 polygon_offset_point_enable;
716 u32 polygon_offset_line_enable; 756 u32 polygon_offset_line_enable;
@@ -769,7 +809,11 @@ public:
769 BitField<12, 4, u32> viewport; 809 BitField<12, 4, u32> viewport;
770 } clear_flags; 810 } clear_flags;
771 811
772 INSERT_UNION_PADDING_WORDS(0x19); 812 INSERT_UNION_PADDING_WORDS(0x10);
813
814 u32 fill_rectangle;
815
816 INSERT_UNION_PADDING_WORDS(0x8);
773 817
774 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 818 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
775 819
@@ -872,16 +916,7 @@ public:
872 916
873 INSERT_UNION_PADDING_WORDS(0x35); 917 INSERT_UNION_PADDING_WORDS(0x35);
874 918
875 union { 919 u32 clip_distance_enabled;
876 BitField<0, 1, u32> c0;
877 BitField<1, 1, u32> c1;
878 BitField<2, 1, u32> c2;
879 BitField<3, 1, u32> c3;
880 BitField<4, 1, u32> c4;
881 BitField<5, 1, u32> c5;
882 BitField<6, 1, u32> c6;
883 BitField<7, 1, u32> c7;
884 } clip_distance_enabled;
885 920
886 u32 samplecnt_enable; 921 u32 samplecnt_enable;
887 922
@@ -1060,7 +1095,9 @@ public:
1060 1095
1061 INSERT_UNION_PADDING_WORDS(1); 1096 INSERT_UNION_PADDING_WORDS(1);
1062 1097
1063 Cull cull; 1098 u32 cull_test_enabled;
1099 FrontFace front_face;
1100 CullFace cull_face;
1064 1101
1065 u32 pixel_center_integer; 1102 u32 pixel_center_integer;
1066 1103
@@ -1199,7 +1236,11 @@ public:
1199 1236
1200 u32 tex_cb_index; 1237 u32 tex_cb_index;
1201 1238
1202 INSERT_UNION_PADDING_WORDS(0x395); 1239 INSERT_UNION_PADDING_WORDS(0x7D);
1240
1241 std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
1242
1243 INSERT_UNION_PADDING_WORDS(0x298);
1203 1244
1204 struct { 1245 struct {
1205 /// Compressed address of a buffer that holds information about bound SSBOs. 1246 /// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1238,79 +1279,6 @@ public:
1238 1279
1239 State state{}; 1280 State state{};
1240 1281
1241 struct DirtyRegs {
1242 static constexpr std::size_t NUM_REGS = 256;
1243 static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
1244
1245 union {
1246 struct {
1247 bool null_dirty;
1248
1249 // Vertex Attributes
1250 bool vertex_attrib_format;
1251
1252 // Vertex Arrays
1253 std::array<bool, 32> vertex_array;
1254
1255 bool vertex_array_buffers;
1256
1257 // Vertex Instances
1258 std::array<bool, 32> vertex_instance;
1259
1260 bool vertex_instances;
1261
1262 // Render Targets
1263 std::array<bool, 8> render_target;
1264 bool depth_buffer;
1265
1266 bool render_settings;
1267
1268 // Shaders
1269 bool shaders;
1270
1271 // Rasterizer State
1272 bool viewport;
1273 bool clip_coefficient;
1274 bool cull_mode;
1275 bool primitive_restart;
1276 bool depth_test;
1277 bool stencil_test;
1278 bool blend_state;
1279 bool scissor_test;
1280 bool transform_feedback;
1281 bool color_mask;
1282 bool polygon_offset;
1283 bool depth_bounds_values;
1284
1285 // Complementary
1286 bool viewport_transform;
1287 bool screen_y_control;
1288
1289 bool memory_general;
1290 };
1291 std::array<bool, NUM_REGS> regs;
1292 };
1293
1294 void ResetVertexArrays() {
1295 vertex_array.fill(true);
1296 vertex_array_buffers = true;
1297 }
1298
1299 void ResetRenderTargets() {
1300 depth_buffer = true;
1301 render_target.fill(true);
1302 render_settings = true;
1303 }
1304
1305 void OnMemoryWrite() {
1306 shaders = true;
1307 memory_general = true;
1308 ResetRenderTargets();
1309 ResetVertexArrays();
1310 }
1311
1312 } dirty{};
1313
1314 /// Reads a register value located at the input method address 1282 /// Reads a register value located at the input method address
1315 u32 GetRegisterValue(u32 method) const; 1283 u32 GetRegisterValue(u32 method) const;
1316 1284
@@ -1356,6 +1324,11 @@ public:
1356 return execute_on; 1324 return execute_on;
1357 } 1325 }
1358 1326
1327 /// Notify a memory write has happened.
1328 void OnMemoryWrite() {
1329 dirty.flags |= dirty.on_write_stores;
1330 }
1331
1359 enum class MMEDrawMode : u32 { 1332 enum class MMEDrawMode : u32 {
1360 Undefined, 1333 Undefined,
1361 Array, 1334 Array,
@@ -1371,6 +1344,16 @@ public:
1371 u32 gl_end_count{}; 1344 u32 gl_end_count{};
1372 } mme_draw; 1345 } mme_draw;
1373 1346
1347 struct DirtyState {
1348 using Flags = std::bitset<std::numeric_limits<u8>::max()>;
1349 using Table = std::array<u8, Regs::NUM_REGS>;
1350 using Tables = std::array<Table, 2>;
1351
1352 Flags flags;
1353 Flags on_write_stores;
1354 Tables tables{};
1355 } dirty;
1356
1374private: 1357private:
1375 void InitializeRegisterDefaults(); 1358 void InitializeRegisterDefaults();
1376 1359
@@ -1417,8 +1400,6 @@ private:
1417 /// Retrieves information about a specific TSC entry from the TSC buffer. 1400 /// Retrieves information about a specific TSC entry from the TSC buffer.
1418 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1401 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1419 1402
1420 void InitDirtySettings();
1421
1422 /** 1403 /**
1423 * Call a macro on this engine. 1404 * Call a macro on this engine.
1424 * @param method Method to call 1405 * @param method Method to call
@@ -1485,6 +1466,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8);
1485ASSERT_REG_POSITION(tess_level_outer, 0xC9); 1466ASSERT_REG_POSITION(tess_level_outer, 0xC9);
1486ASSERT_REG_POSITION(tess_level_inner, 0xCD); 1467ASSERT_REG_POSITION(tess_level_inner, 0xCD);
1487ASSERT_REG_POSITION(rasterize_enable, 0xDF); 1468ASSERT_REG_POSITION(rasterize_enable, 0xDF);
1469ASSERT_REG_POSITION(tfb_bindings, 0xE0);
1470ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
1488ASSERT_REG_POSITION(tfb_enabled, 0x1D1); 1471ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
1489ASSERT_REG_POSITION(rt, 0x200); 1472ASSERT_REG_POSITION(rt, 0x200);
1490ASSERT_REG_POSITION(viewport_transform, 0x280); 1473ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1494,6 +1477,8 @@ ASSERT_REG_POSITION(depth_mode, 0x35F);
1494ASSERT_REG_POSITION(clear_color[0], 0x360); 1477ASSERT_REG_POSITION(clear_color[0], 0x360);
1495ASSERT_REG_POSITION(clear_depth, 0x364); 1478ASSERT_REG_POSITION(clear_depth, 0x364);
1496ASSERT_REG_POSITION(clear_stencil, 0x368); 1479ASSERT_REG_POSITION(clear_stencil, 0x368);
1480ASSERT_REG_POSITION(polygon_mode_front, 0x36B);
1481ASSERT_REG_POSITION(polygon_mode_back, 0x36C);
1497ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); 1482ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1498ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); 1483ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1499ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); 1484ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
@@ -1507,6 +1492,7 @@ ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1507ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1492ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1508ASSERT_REG_POSITION(zeta, 0x3F8); 1493ASSERT_REG_POSITION(zeta, 0x3F8);
1509ASSERT_REG_POSITION(clear_flags, 0x43E); 1494ASSERT_REG_POSITION(clear_flags, 0x43E);
1495ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1510ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1496ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1511ASSERT_REG_POSITION(rt_control, 0x487); 1497ASSERT_REG_POSITION(rt_control, 0x487);
1512ASSERT_REG_POSITION(zeta_width, 0x48a); 1498ASSERT_REG_POSITION(zeta_width, 0x48a);
@@ -1561,7 +1547,9 @@ ASSERT_REG_POSITION(index_array, 0x5F2);
1561ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1547ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1562ASSERT_REG_POSITION(instanced_arrays, 0x620); 1548ASSERT_REG_POSITION(instanced_arrays, 0x620);
1563ASSERT_REG_POSITION(vp_point_size, 0x644); 1549ASSERT_REG_POSITION(vp_point_size, 0x644);
1564ASSERT_REG_POSITION(cull, 0x646); 1550ASSERT_REG_POSITION(cull_test_enabled, 0x646);
1551ASSERT_REG_POSITION(front_face, 0x647);
1552ASSERT_REG_POSITION(cull_face, 0x648);
1565ASSERT_REG_POSITION(pixel_center_integer, 0x649); 1553ASSERT_REG_POSITION(pixel_center_integer, 0x649);
1566ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); 1554ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
1567ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); 1555ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
@@ -1578,6 +1566,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0);
1578ASSERT_REG_POSITION(const_buffer, 0x8E0); 1566ASSERT_REG_POSITION(const_buffer, 0x8E0);
1579ASSERT_REG_POSITION(cb_bind[0], 0x904); 1567ASSERT_REG_POSITION(cb_bind[0], 0x904);
1580ASSERT_REG_POSITION(tex_cb_index, 0x982); 1568ASSERT_REG_POSITION(tex_cb_index, 0x982);
1569ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
1581ASSERT_REG_POSITION(ssbo_info, 0xD18); 1570ASSERT_REG_POSITION(ssbo_info, 0xD18);
1582ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); 1571ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
1583ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); 1572ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ad8453c5f..c2610f992 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -57,7 +57,7 @@ void MaxwellDMA::HandleCopy() {
57 } 57 }
58 58
59 // All copies here update the main memory, so mark all rasterizer states as invalid. 59 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 system.GPU().Maxwell3D().dirty.OnMemoryWrite(); 60 system.GPU().Maxwell3D().OnMemoryWrite();
61 61
62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c9bc83cd7..eba42deb4 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -911,14 +911,9 @@ union Instruction {
911 } fadd32i; 911 } fadd32i;
912 912
913 union { 913 union {
914 BitField<20, 8, u64> shift_position; 914 BitField<40, 1, u64> brev;
915 BitField<28, 8, u64> shift_length; 915 BitField<47, 1, u64> rd_cc;
916 BitField<48, 1, u64> negate_b; 916 BitField<48, 1, u64> is_signed;
917 BitField<49, 1, u64> negate_a;
918
919 u64 GetLeftShiftValue() const {
920 return 32 - (shift_position + shift_length);
921 }
922 } bfe; 917 } bfe;
923 918
924 union { 919 union {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ba8c9d665..64acb17df 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
39 RGBA32_FLOAT = 0xC0, 39 RGBA32_FLOAT = 0xC0,
40 RGBA32_UINT = 0xC2, 40 RGBA32_UINT = 0xC2,
41 RGBA16_UNORM = 0xC6, 41 RGBA16_UNORM = 0xC6,
42 RGBA16_SNORM = 0xC7,
42 RGBA16_UINT = 0xC9, 43 RGBA16_UINT = 0xC9,
43 RGBA16_FLOAT = 0xCA, 44 RGBA16_FLOAT = 0xCA,
44 RG32_FLOAT = 0xCB, 45 RG32_FLOAT = 0xCB,
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
index 6adef459e..f058f2744 100644
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -4,13 +4,15 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <limits> 6#include <limits>
7#include <vector>
7 8
9#include "common/common_types.h"
8#include "video_core/guest_driver.h" 10#include "video_core/guest_driver.h"
9 11
10namespace VideoCore { 12namespace VideoCore {
11 13
12void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { 14void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
13 if (texture_handler_size_deduced) { 15 if (texture_handler_size) {
14 return; 16 return;
15 } 17 }
16 const std::size_t size = bound_offsets.size(); 18 const std::size_t size = bound_offsets.size();
@@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
29 if (min_val > 2) { 31 if (min_val > 2) {
30 return; 32 return;
31 } 33 }
32 texture_handler_size_deduced = true;
33 texture_handler_size = min_texture_handler_size * min_val; 34 texture_handler_size = min_texture_handler_size * min_val;
34} 35}
35 36
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index fc1917347..99450777e 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <optional>
7#include <vector> 8#include <vector>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
@@ -17,25 +18,29 @@ namespace VideoCore {
17 */ 18 */
18class GuestDriverProfile { 19class GuestDriverProfile {
19public: 20public:
20 void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); 21 explicit GuestDriverProfile() = default;
22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
23 : texture_handler_size{texture_handler_size} {}
24
25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
21 26
22 u32 GetTextureHandlerSize() const { 27 u32 GetTextureHandlerSize() const {
23 return texture_handler_size; 28 return texture_handler_size.value_or(default_texture_handler_size);
24 } 29 }
25 30
26 bool TextureHandlerSizeKnown() const { 31 bool IsTextureHandlerSizeKnown() const {
27 return texture_handler_size_deduced; 32 return texture_handler_size.has_value();
28 } 33 }
29 34
30private: 35private:
31 // Minimum size of texture handler any driver can use. 36 // Minimum size of texture handler any driver can use.
32 static constexpr u32 min_texture_handler_size = 4; 37 static constexpr u32 min_texture_handler_size = 4;
33 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily 38
34 // use 4 bytes instead. Thus, certain drivers may squish the size. 39 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
40 // Thus, certain drivers may squish the size.
35 static constexpr u32 default_texture_handler_size = 8; 41 static constexpr u32 default_texture_handler_size = 8;
36 42
37 u32 texture_handler_size = default_texture_handler_size; 43 std::optional<u32> texture_handler_size = default_texture_handler_size;
38 bool texture_handler_size_deduced = false;
39}; 44};
40 45
41} // namespace VideoCore 46} // namespace VideoCore
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index aea010087..073bdb491 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -174,7 +174,7 @@ private:
174 /// End of address space, based on address space in bits. 174 /// End of address space, based on address space in bits.
175 static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; 175 static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
176 176
177 Common::PageTable page_table{page_bits}; 177 Common::BackingPageTable page_table{page_bits};
178 VMAMap vma_map; 178 VMAMap vma_map;
179 VideoCore::RasterizerInterface& rasterizer; 179 VideoCore::RasterizerInterface& rasterizer;
180 180
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index f2c83266e..6d522c318 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
51 MortonCopy<true, PixelFormat::R8UI>, 51 MortonCopy<true, PixelFormat::R8UI>,
52 MortonCopy<true, PixelFormat::RGBA16F>, 52 MortonCopy<true, PixelFormat::RGBA16F>,
53 MortonCopy<true, PixelFormat::RGBA16U>, 53 MortonCopy<true, PixelFormat::RGBA16U>,
54 MortonCopy<true, PixelFormat::RGBA16S>,
54 MortonCopy<true, PixelFormat::RGBA16UI>, 55 MortonCopy<true, PixelFormat::RGBA16UI>,
55 MortonCopy<true, PixelFormat::R11FG11FB10F>, 56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
56 MortonCopy<true, PixelFormat::RGBA32UI>, 57 MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
131 MortonCopy<false, PixelFormat::R8U>, 132 MortonCopy<false, PixelFormat::R8U>,
132 MortonCopy<false, PixelFormat::R8UI>, 133 MortonCopy<false, PixelFormat::R8UI>,
133 MortonCopy<false, PixelFormat::RGBA16F>, 134 MortonCopy<false, PixelFormat::RGBA16F>,
135 MortonCopy<false, PixelFormat::RGBA16S>,
134 MortonCopy<false, PixelFormat::RGBA16U>, 136 MortonCopy<false, PixelFormat::RGBA16U>,
135 MortonCopy<false, PixelFormat::RGBA16UI>, 137 MortonCopy<false, PixelFormat::RGBA16UI>,
136 MortonCopy<false, PixelFormat::R11FG11FB10F>, 138 MortonCopy<false, PixelFormat::R11FG11FB10F>,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f18eaf4bc..1a68e3caa 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1;
25 25
26enum class LoadCallbackStage { 26enum class LoadCallbackStage {
27 Prepare, 27 Prepare,
28 Decompile,
29 Build, 28 Build,
30 Complete, 29 Complete,
31}; 30};
@@ -89,6 +88,9 @@ public:
89 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, 88 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
90 const DiskResourceLoadCallback& callback = {}) {} 89 const DiskResourceLoadCallback& callback = {}) {}
91 90
91 /// Initializes renderer dirty flags
92 virtual void SetupDirtyFlags() {}
93
92 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. 94 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
93 GuestDriverProfile& AccessGuestDriverProfile() { 95 GuestDriverProfile& AccessGuestDriverProfile() {
94 return guest_driver_profile; 96 return guest_driver_profile;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
index 874ed3c6e..b8a512cb6 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -11,7 +11,6 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 13#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
14#include "video_core/renderer_opengl/gl_state.h"
15 14
16namespace OpenGL { 15namespace OpenGL {
17 16
@@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
36 framebuffer.Create(); 35 framebuffer.Create();
37 36
38 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. 37 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
39 local_state.draw.draw_framebuffer = framebuffer.handle; 38 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
40 local_state.ApplyFramebufferState();
41 39
42 if (key.zeta) { 40 if (key.zeta) {
43 const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; 41 const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
index 02ec80ae9..8f698fee0 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -13,7 +13,6 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h" 15#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_state.h"
17#include "video_core/renderer_opengl/gl_texture_cache.h" 16#include "video_core/renderer_opengl/gl_texture_cache.h"
18 17
19namespace OpenGL { 18namespace OpenGL {
@@ -63,7 +62,6 @@ public:
63private: 62private:
64 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); 63 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
65 64
66 OpenGLState local_state;
67 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; 65 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
68}; 66};
69 67
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e1965fb21..063f41327 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -28,7 +28,6 @@
28#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_rasterizer.h" 29#include "video_core/renderer_opengl/gl_rasterizer.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_gen.h"
32#include "video_core/renderer_opengl/maxwell_to_gl.h" 31#include "video_core/renderer_opengl/maxwell_to_gl.h"
33#include "video_core/renderer_opengl/renderer_opengl.h" 32#include "video_core/renderer_opengl/renderer_opengl.h"
34 33
@@ -36,6 +35,7 @@ namespace OpenGL {
36 35
37using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38 37
38using Tegra::Engines::ShaderType;
39using VideoCore::Surface::PixelFormat; 39using VideoCore::Surface::PixelFormat;
40using VideoCore::Surface::SurfaceTarget; 40using VideoCore::Surface::SurfaceTarget;
41using VideoCore::Surface::SurfaceType; 41using VideoCore::Surface::SurfaceType;
@@ -54,10 +54,11 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
54 54
55namespace { 55namespace {
56 56
57constexpr std::size_t NumSupportedVertexAttributes = 16;
58
57template <typename Engine, typename Entry> 59template <typename Engine, typename Entry>
58Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 60Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
59 Tegra::Engines::ShaderType shader_type, 61 ShaderType shader_type, std::size_t index = 0) {
60 std::size_t index = 0) {
61 if (entry.IsBindless()) { 62 if (entry.IsBindless()) {
62 const Tegra::Texture::TextureHandle tex_handle = 63 const Tegra::Texture::TextureHandle tex_handle =
63 engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); 64 engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
@@ -74,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
74} 75}
75 76
76std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, 77std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
77 const GLShader::ConstBufferEntry& entry) { 78 const ConstBufferEntry& entry) {
78 if (!entry.IsIndirect()) { 79 if (!entry.IsIndirect()) {
79 return entry.GetSize(); 80 return entry.GetSize();
80 } 81 }
@@ -88,18 +89,19 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
88 return buffer.size; 89 return buffer.size;
89} 90}
90 91
92void oglEnable(GLenum cap, bool state) {
93 (state ? glEnable : glDisable)(cap);
94}
95
91} // Anonymous namespace 96} // Anonymous namespace
92 97
93RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 98RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
94 ScreenInfo& info) 99 ScreenInfo& info, GLShader::ProgramManager& program_manager,
95 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, 100 StateTracker& state_tracker)
101 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
96 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, 102 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
97 screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { 103 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
98 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 104 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
99 state.draw.shader_program = 0;
100 state.Apply();
101
102 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
103 CheckExtensions(); 105 CheckExtensions();
104} 106}
105 107
@@ -113,93 +115,72 @@ void RasterizerOpenGL::CheckExtensions() {
113 } 115 }
114} 116}
115 117
116GLuint RasterizerOpenGL::SetupVertexFormat() { 118void RasterizerOpenGL::SetupVertexFormat() {
117 auto& gpu = system.GPU().Maxwell3D(); 119 auto& gpu = system.GPU().Maxwell3D();
118 const auto& regs = gpu.regs; 120 auto& flags = gpu.dirty.flags;
119 121 if (!flags[Dirty::VertexFormats]) {
120 if (!gpu.dirty.vertex_attrib_format) { 122 return;
121 return state.draw.vertex_array;
122 } 123 }
123 gpu.dirty.vertex_attrib_format = false; 124 flags[Dirty::VertexFormats] = false;
124 125
125 MICROPROFILE_SCOPE(OpenGL_VAO); 126 MICROPROFILE_SCOPE(OpenGL_VAO);
126 127
127 auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format); 128 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
128 auto& vao_entry = iter->second; 129 // the first 16 vertex attributes always, as we don't know which ones are actually used until
129 130 // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
130 if (is_cache_miss) { 131 // avoid OpenGL errors.
131 vao_entry.Create(); 132 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
132 const GLuint vao = vao_entry.handle; 133 // assume every shader uses them all.
133 134 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
134 // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob 135 if (!flags[Dirty::VertexFormat0 + index]) {
135 // that fails to properly create the vertex array if it's not bound even after creating it 136 continue;
136 // with glCreateVertexArrays
137 state.draw.vertex_array = vao;
138 state.ApplyVertexArrayState();
139
140 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
141 // Enables the first 16 vertex attributes always, as we don't know which ones are actually
142 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
143 // for now to avoid OpenGL errors.
144 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
145 // assume every shader uses them all.
146 for (u32 index = 0; index < 16; ++index) {
147 const auto& attrib = regs.vertex_attrib_format[index];
148
149 // Ignore invalid attributes.
150 if (!attrib.IsValid())
151 continue;
152
153 const auto& buffer = regs.vertex_array[attrib.buffer];
154 LOG_TRACE(Render_OpenGL,
155 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
156 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
157 attrib.offset.Value(), attrib.IsNormalized());
158
159 ASSERT(buffer.IsEnabled());
160
161 glEnableVertexArrayAttrib(vao, index);
162 if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
163 attrib.type ==
164 Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
165 glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(),
166 MaxwellToGL::VertexType(attrib), attrib.offset);
167 } else {
168 glVertexArrayAttribFormat(
169 vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
170 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
171 }
172 glVertexArrayAttribBinding(vao, index, attrib.buffer);
173 } 137 }
174 } 138 flags[Dirty::VertexFormat0 + index] = false;
139
140 const auto attrib = gpu.regs.vertex_attrib_format[index];
141 const auto gl_index = static_cast<GLuint>(index);
175 142
176 // Rebinding the VAO invalidates the vertex buffer bindings. 143 // Ignore invalid attributes.
177 gpu.dirty.ResetVertexArrays(); 144 if (!attrib.IsValid()) {
145 glDisableVertexAttribArray(gl_index);
146 continue;
147 }
148 glEnableVertexAttribArray(gl_index);
178 149
179 state.draw.vertex_array = vao_entry.handle; 150 if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
180 return vao_entry.handle; 151 attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
152 glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
153 MaxwellToGL::VertexType(attrib), attrib.offset);
154 } else {
155 glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
156 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
157 }
158 glVertexAttribBinding(gl_index, attrib.buffer);
159 }
181} 160}
182 161
183void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 162void RasterizerOpenGL::SetupVertexBuffer() {
184 auto& gpu = system.GPU().Maxwell3D(); 163 auto& gpu = system.GPU().Maxwell3D();
185 if (!gpu.dirty.vertex_array_buffers) 164 auto& flags = gpu.dirty.flags;
165 if (!flags[Dirty::VertexBuffers]) {
186 return; 166 return;
187 gpu.dirty.vertex_array_buffers = false; 167 }
188 168 flags[Dirty::VertexBuffers] = false;
189 const auto& regs = gpu.regs;
190 169
191 MICROPROFILE_SCOPE(OpenGL_VB); 170 MICROPROFILE_SCOPE(OpenGL_VB);
192 171
193 // Upload all guest vertex arrays sequentially to our buffer 172 // Upload all guest vertex arrays sequentially to our buffer
194 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 173 const auto& regs = gpu.regs;
195 if (!gpu.dirty.vertex_array[index]) 174 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
175 if (!flags[Dirty::VertexBuffer0 + index]) {
196 continue; 176 continue;
197 gpu.dirty.vertex_array[index] = false; 177 }
198 gpu.dirty.vertex_instance[index] = false; 178 flags[Dirty::VertexBuffer0 + index] = false;
199 179
200 const auto& vertex_array = regs.vertex_array[index]; 180 const auto& vertex_array = regs.vertex_array[index];
201 if (!vertex_array.IsEnabled()) 181 if (!vertex_array.IsEnabled()) {
202 continue; 182 continue;
183 }
203 184
204 const GPUVAddr start = vertex_array.StartAddress(); 185 const GPUVAddr start = vertex_array.StartAddress();
205 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 186 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
@@ -209,42 +190,30 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
209 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); 190 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
210 191
211 // Bind the vertex array to the buffer at the current offset. 192 // Bind the vertex array to the buffer at the current offset.
212 vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, 193 vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer,
213 vertex_array.stride); 194 vertex_buffer_offset, vertex_array.stride);
214
215 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
216 // Enable vertex buffer instancing with the specified divisor.
217 glVertexArrayBindingDivisor(vao, index, vertex_array.divisor);
218 } else {
219 // Disable the vertex buffer instancing.
220 glVertexArrayBindingDivisor(vao, index, 0);
221 }
222 } 195 }
223} 196}
224 197
225void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { 198void RasterizerOpenGL::SetupVertexInstances() {
226 auto& gpu = system.GPU().Maxwell3D(); 199 auto& gpu = system.GPU().Maxwell3D();
227 200 auto& flags = gpu.dirty.flags;
228 if (!gpu.dirty.vertex_instances) 201 if (!flags[Dirty::VertexInstances]) {
229 return; 202 return;
230 gpu.dirty.vertex_instances = false; 203 }
204 flags[Dirty::VertexInstances] = false;
231 205
232 const auto& regs = gpu.regs; 206 const auto& regs = gpu.regs;
233 // Upload all guest vertex arrays sequentially to our buffer 207 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
234 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 208 if (!flags[Dirty::VertexInstance0 + index]) {
235 if (!gpu.dirty.vertex_instance[index])
236 continue; 209 continue;
237
238 gpu.dirty.vertex_instance[index] = false;
239
240 if (regs.instanced_arrays.IsInstancingEnabled(index) &&
241 regs.vertex_array[index].divisor != 0) {
242 // Enable vertex buffer instancing with the specified divisor.
243 glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
244 } else {
245 // Disable the vertex buffer instancing.
246 glVertexArrayBindingDivisor(vao, index, 0);
247 } 210 }
211 flags[Dirty::VertexInstance0 + index] = false;
212
213 const auto gl_index = static_cast<GLuint>(index);
214 const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index);
215 const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0;
216 glVertexBindingDivisor(gl_index, divisor);
248 } 217 }
249} 218}
250 219
@@ -260,8 +229,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
260void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 229void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
261 MICROPROFILE_SCOPE(OpenGL_Shader); 230 MICROPROFILE_SCOPE(OpenGL_Shader);
262 auto& gpu = system.GPU().Maxwell3D(); 231 auto& gpu = system.GPU().Maxwell3D();
263 232 u32 clip_distances = 0;
264 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
265 233
266 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 234 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
267 const auto& shader_config = gpu.regs.shader_config[index]; 235 const auto& shader_config = gpu.regs.shader_config[index];
@@ -271,10 +239,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
271 if (!gpu.regs.IsShaderConfigEnabled(index)) { 239 if (!gpu.regs.IsShaderConfigEnabled(index)) {
272 switch (program) { 240 switch (program) {
273 case Maxwell::ShaderProgram::Geometry: 241 case Maxwell::ShaderProgram::Geometry:
274 shader_program_manager->UseTrivialGeometryShader(); 242 program_manager.UseGeometryShader(0);
275 break; 243 break;
276 case Maxwell::ShaderProgram::Fragment: 244 case Maxwell::ShaderProgram::Fragment:
277 shader_program_manager->UseTrivialFragmentShader(); 245 program_manager.UseFragmentShader(0);
278 break; 246 break;
279 default: 247 default:
280 break; 248 break;
@@ -299,19 +267,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
299 SetupDrawTextures(stage, shader); 267 SetupDrawTextures(stage, shader);
300 SetupDrawImages(stage, shader); 268 SetupDrawImages(stage, shader);
301 269
302 const ProgramVariant variant(primitive_mode); 270 const GLuint program_handle = shader->GetHandle();
303 const auto program_handle = shader->GetHandle(variant);
304
305 switch (program) { 271 switch (program) {
306 case Maxwell::ShaderProgram::VertexA: 272 case Maxwell::ShaderProgram::VertexA:
307 case Maxwell::ShaderProgram::VertexB: 273 case Maxwell::ShaderProgram::VertexB:
308 shader_program_manager->UseProgrammableVertexShader(program_handle); 274 program_manager.UseVertexShader(program_handle);
309 break; 275 break;
310 case Maxwell::ShaderProgram::Geometry: 276 case Maxwell::ShaderProgram::Geometry:
311 shader_program_manager->UseProgrammableGeometryShader(program_handle); 277 program_manager.UseGeometryShader(program_handle);
312 break; 278 break;
313 case Maxwell::ShaderProgram::Fragment: 279 case Maxwell::ShaderProgram::Fragment:
314 shader_program_manager->UseProgrammableFragmentShader(program_handle); 280 program_manager.UseFragmentShader(program_handle);
315 break; 281 break;
316 default: 282 default:
317 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 283 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
@@ -322,9 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
322 // When a clip distance is enabled but not set in the shader it crops parts of the screen 288 // When a clip distance is enabled but not set in the shader it crops parts of the screen
323 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the 289 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
324 // clip distances only when it's written by a shader stage. 290 // clip distances only when it's written by a shader stage.
325 for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) { 291 clip_distances |= shader->GetEntries().clip_distances;
326 clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i];
327 }
328 292
329 // When VertexA is enabled, we have dual vertex shaders 293 // When VertexA is enabled, we have dual vertex shaders
330 if (program == Maxwell::ShaderProgram::VertexA) { 294 if (program == Maxwell::ShaderProgram::VertexA) {
@@ -334,8 +298,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
334 } 298 }
335 299
336 SyncClipEnabled(clip_distances); 300 SyncClipEnabled(clip_distances);
337 301 gpu.dirty.flags[Dirty::Shaders] = false;
338 gpu.dirty.shaders = false;
339} 302}
340 303
341std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 304std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -368,20 +331,23 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
368 shader_cache.LoadDiskCache(stop_loading, callback); 331 shader_cache.LoadDiskCache(stop_loading, callback);
369} 332}
370 333
334void RasterizerOpenGL::SetupDirtyFlags() {
335 state_tracker.Initialize();
336}
337
371void RasterizerOpenGL::ConfigureFramebuffers() { 338void RasterizerOpenGL::ConfigureFramebuffers() {
372 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 339 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
373 auto& gpu = system.GPU().Maxwell3D(); 340 auto& gpu = system.GPU().Maxwell3D();
374 if (!gpu.dirty.render_settings) { 341 if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
375 return; 342 return;
376 } 343 }
377 gpu.dirty.render_settings = false; 344 gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
378 345
379 texture_cache.GuardRenderTargets(true); 346 texture_cache.GuardRenderTargets(true);
380 347
381 View depth_surface = texture_cache.GetDepthBufferSurface(true); 348 View depth_surface = texture_cache.GetDepthBufferSurface(true);
382 349
383 const auto& regs = gpu.regs; 350 const auto& regs = gpu.regs;
384 state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
385 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); 351 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
386 352
387 // Bind the framebuffer surfaces 353 // Bind the framebuffer surfaces
@@ -409,14 +375,11 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
409 375
410 texture_cache.GuardRenderTargets(false); 376 texture_cache.GuardRenderTargets(false);
411 377
412 state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); 378 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
413 SyncViewport(state);
414} 379}
415 380
416void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, 381void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb,
417 bool using_depth_fb, bool using_stencil_fb) { 382 bool using_stencil_fb) {
418 using VideoCore::Surface::SurfaceType;
419
420 auto& gpu = system.GPU().Maxwell3D(); 383 auto& gpu = system.GPU().Maxwell3D();
421 const auto& regs = gpu.regs; 384 const auto& regs = gpu.regs;
422 385
@@ -435,80 +398,44 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo
435 key.colors[0] = color_surface; 398 key.colors[0] = color_surface;
436 key.zeta = depth_surface; 399 key.zeta = depth_surface;
437 400
438 current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); 401 state_tracker.NotifyFramebuffer();
439 current_state.ApplyFramebufferState(); 402 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
440} 403}
441 404
442void RasterizerOpenGL::Clear() { 405void RasterizerOpenGL::Clear() {
443 const auto& maxwell3d = system.GPU().Maxwell3D(); 406 const auto& gpu = system.GPU().Maxwell3D();
444 407 if (!gpu.ShouldExecute()) {
445 if (!maxwell3d.ShouldExecute()) {
446 return; 408 return;
447 } 409 }
448 410
449 const auto& regs = maxwell3d.regs; 411 const auto& regs = gpu.regs;
450 bool use_color{}; 412 bool use_color{};
451 bool use_depth{}; 413 bool use_depth{};
452 bool use_stencil{}; 414 bool use_stencil{};
453 415
454 OpenGLState prev_state{OpenGLState::GetCurState()};
455 SCOPE_EXIT({
456 prev_state.AllDirty();
457 prev_state.Apply();
458 });
459
460 OpenGLState clear_state{OpenGLState::GetCurState()};
461 clear_state.SetDefaultViewports();
462 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 416 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
463 regs.clear_buffers.A) { 417 regs.clear_buffers.A) {
464 use_color = true; 418 use_color = true;
465 } 419 }
466 if (use_color) { 420 if (use_color) {
467 clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; 421 state_tracker.NotifyColorMask0();
468 clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; 422 glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
469 clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; 423 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
470 clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; 424
425 // TODO(Rodrigo): Determine if clamping is used on clears
426 SyncFragmentColorClampState();
427 SyncFramebufferSRGB();
471 } 428 }
472 if (regs.clear_buffers.Z) { 429 if (regs.clear_buffers.Z) {
473 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); 430 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
474 use_depth = true; 431 use_depth = true;
475 432
476 // Always enable the depth write when clearing the depth buffer. The depth write mask is 433 state_tracker.NotifyDepthMask();
477 // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to 434 glDepthMask(GL_TRUE);
478 // true.
479 clear_state.depth.test_enabled = true;
480 clear_state.depth.test_func = GL_ALWAYS;
481 clear_state.depth.write_mask = GL_TRUE;
482 } 435 }
483 if (regs.clear_buffers.S) { 436 if (regs.clear_buffers.S) {
484 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 437 ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
485 use_stencil = true; 438 use_stencil = true;
486 clear_state.stencil.test_enabled = true;
487
488 if (regs.clear_flags.stencil) {
489 // Stencil affects the clear so fill it with the used masks
490 clear_state.stencil.front.test_func = GL_ALWAYS;
491 clear_state.stencil.front.test_mask = regs.stencil_front_func_mask;
492 clear_state.stencil.front.action_stencil_fail = GL_KEEP;
493 clear_state.stencil.front.action_depth_fail = GL_KEEP;
494 clear_state.stencil.front.action_depth_pass = GL_KEEP;
495 clear_state.stencil.front.write_mask = regs.stencil_front_mask;
496 if (regs.stencil_two_side_enable) {
497 clear_state.stencil.back.test_func = GL_ALWAYS;
498 clear_state.stencil.back.test_mask = regs.stencil_back_func_mask;
499 clear_state.stencil.back.action_stencil_fail = GL_KEEP;
500 clear_state.stencil.back.action_depth_fail = GL_KEEP;
501 clear_state.stencil.back.action_depth_pass = GL_KEEP;
502 clear_state.stencil.back.write_mask = regs.stencil_back_mask;
503 } else {
504 clear_state.stencil.back.test_func = GL_ALWAYS;
505 clear_state.stencil.back.test_mask = 0xFFFFFFFF;
506 clear_state.stencil.back.write_mask = 0xFFFFFFFF;
507 clear_state.stencil.back.action_stencil_fail = GL_KEEP;
508 clear_state.stencil.back.action_depth_fail = GL_KEEP;
509 clear_state.stencil.back.action_depth_pass = GL_KEEP;
510 }
511 }
512 } 439 }
513 440
514 if (!use_color && !use_depth && !use_stencil) { 441 if (!use_color && !use_depth && !use_stencil) {
@@ -516,20 +443,18 @@ void RasterizerOpenGL::Clear() {
516 return; 443 return;
517 } 444 }
518 445
519 ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); 446 SyncRasterizeEnable();
520 447
521 SyncViewport(clear_state);
522 SyncRasterizeEnable(clear_state);
523 if (regs.clear_flags.scissor) { 448 if (regs.clear_flags.scissor) {
524 SyncScissorTest(clear_state); 449 SyncScissorTest();
450 } else {
451 state_tracker.NotifyScissor0();
452 glDisablei(GL_SCISSOR_TEST, 0);
525 } 453 }
526 454
527 if (regs.clear_flags.viewport) { 455 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
528 clear_state.EmulateViewportWithScissor();
529 }
530 456
531 clear_state.AllDirty(); 457 ConfigureClearFramebuffer(use_color, use_depth, use_stencil);
532 clear_state.Apply();
533 458
534 if (use_color) { 459 if (use_color) {
535 glClearBufferfv(GL_COLOR, 0, regs.clear_color); 460 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
@@ -549,25 +474,27 @@ void RasterizerOpenGL::Clear() {
549void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { 474void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
550 MICROPROFILE_SCOPE(OpenGL_Drawing); 475 MICROPROFILE_SCOPE(OpenGL_Drawing);
551 auto& gpu = system.GPU().Maxwell3D(); 476 auto& gpu = system.GPU().Maxwell3D();
552 const auto& regs = gpu.regs;
553 477
554 query_cache.UpdateCounters(); 478 query_cache.UpdateCounters();
555 479
556 SyncRasterizeEnable(state); 480 SyncViewport();
481 SyncRasterizeEnable();
482 SyncPolygonModes();
557 SyncColorMask(); 483 SyncColorMask();
558 SyncFragmentColorClampState(); 484 SyncFragmentColorClampState();
559 SyncMultiSampleState(); 485 SyncMultiSampleState();
560 SyncDepthTestState(); 486 SyncDepthTestState();
487 SyncDepthClamp();
561 SyncStencilTestState(); 488 SyncStencilTestState();
562 SyncBlendState(); 489 SyncBlendState();
563 SyncLogicOpState(); 490 SyncLogicOpState();
564 SyncCullMode(); 491 SyncCullMode();
565 SyncPrimitiveRestart(); 492 SyncPrimitiveRestart();
566 SyncScissorTest(state); 493 SyncScissorTest();
567 SyncTransformFeedback();
568 SyncPointState(); 494 SyncPointState();
569 SyncPolygonOffset(); 495 SyncPolygonOffset();
570 SyncAlphaTest(); 496 SyncAlphaTest();
497 SyncFramebufferSRGB();
571 498
572 buffer_cache.Acquire(); 499 buffer_cache.Acquire();
573 500
@@ -591,14 +518,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
591 buffer_cache.Map(buffer_size); 518 buffer_cache.Map(buffer_size);
592 519
593 // Prepare vertex array format. 520 // Prepare vertex array format.
594 const GLuint vao = SetupVertexFormat(); 521 SetupVertexFormat();
595 vertex_array_pushbuffer.Setup(vao); 522 vertex_array_pushbuffer.Setup();
596 523
597 // Upload vertex and index data. 524 // Upload vertex and index data.
598 SetupVertexBuffer(vao); 525 SetupVertexBuffer();
599 SetupVertexInstances(vao); 526 SetupVertexInstances();
600 527 GLintptr index_buffer_offset = 0;
601 GLintptr index_buffer_offset;
602 if (is_indexed) { 528 if (is_indexed) {
603 index_buffer_offset = SetupIndexBuffer(); 529 index_buffer_offset = SetupIndexBuffer();
604 } 530 }
@@ -624,27 +550,20 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
624 ConfigureFramebuffers(); 550 ConfigureFramebuffers();
625 551
626 // Signal the buffer cache that we are not going to upload more things. 552 // Signal the buffer cache that we are not going to upload more things.
627 const bool invalidate = buffer_cache.Unmap(); 553 buffer_cache.Unmap();
628 554
629 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. 555 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
630 vertex_array_pushbuffer.Bind(); 556 vertex_array_pushbuffer.Bind();
631 bind_ubo_pushbuffer.Bind(); 557 bind_ubo_pushbuffer.Bind();
632 bind_ssbo_pushbuffer.Bind(); 558 bind_ssbo_pushbuffer.Bind();
633 559
634 if (invalidate) { 560 program_manager.BindGraphicsPipeline();
635 // As all cached buffers are invalidated, we need to recheck their state.
636 gpu.dirty.ResetVertexArrays();
637 }
638 gpu.dirty.memory_general = false;
639
640 shader_program_manager->ApplyTo(state);
641 state.Apply();
642 561
643 if (texture_cache.TextureBarrier()) { 562 if (texture_cache.TextureBarrier()) {
644 glTextureBarrier(); 563 glTextureBarrier();
645 } 564 }
646 565
647 ++num_queued_commands; 566 BeginTransformFeedback(primitive_mode);
648 567
649 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); 568 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
650 const GLsizei num_instances = 569 const GLsizei num_instances =
@@ -683,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
683 num_instances, base_instance); 602 num_instances, base_instance);
684 } 603 }
685 } 604 }
605
606 EndTransformFeedback();
607
608 ++num_queued_commands;
686} 609}
687 610
688void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 611void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -695,13 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
695 auto kernel = shader_cache.GetComputeKernel(code_addr); 618 auto kernel = shader_cache.GetComputeKernel(code_addr);
696 SetupComputeTextures(kernel); 619 SetupComputeTextures(kernel);
697 SetupComputeImages(kernel); 620 SetupComputeImages(kernel);
698 621 program_manager.BindComputeShader(kernel->GetHandle());
699 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
700 const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
701 launch_desc.block_dim_z, launch_desc.shared_alloc,
702 launch_desc.local_pos_alloc);
703 state.draw.shader_program = kernel->GetHandle(variant);
704 state.draw.program_pipeline = 0;
705 622
706 const std::size_t buffer_size = 623 const std::size_t buffer_size =
707 Tegra::Engines::KeplerCompute::NumConstBuffers * 624 Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -719,11 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
719 bind_ubo_pushbuffer.Bind(); 636 bind_ubo_pushbuffer.Bind();
720 bind_ssbo_pushbuffer.Bind(); 637 bind_ssbo_pushbuffer.Bind();
721 638
722 state.ApplyTextures(); 639 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
723 state.ApplyImages();
724 state.ApplyShaderProgram();
725 state.ApplyProgramPipeline();
726
727 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 640 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
728 ++num_queued_commands; 641 ++num_queued_commands;
729} 642}
@@ -828,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
828 const auto& shader_stage = stages[stage_index]; 741 const auto& shader_stage = stages[stage_index];
829 742
830 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; 743 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
831 for (const auto& entry : shader->GetShaderEntries().const_buffers) { 744 for (const auto& entry : shader->GetEntries().const_buffers) {
832 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 745 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
833 SetupConstBuffer(binding++, buffer, entry); 746 SetupConstBuffer(binding++, buffer, entry);
834 } 747 }
@@ -839,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
839 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 752 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
840 753
841 u32 binding = 0; 754 u32 binding = 0;
842 for (const auto& entry : kernel->GetShaderEntries().const_buffers) { 755 for (const auto& entry : kernel->GetEntries().const_buffers) {
843 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 756 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
844 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 757 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
845 Tegra::Engines::ConstBufferInfo buffer; 758 Tegra::Engines::ConstBufferInfo buffer;
@@ -851,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
851} 764}
852 765
853void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 766void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
854 const GLShader::ConstBufferEntry& entry) { 767 const ConstBufferEntry& entry) {
855 if (!buffer.enabled) { 768 if (!buffer.enabled) {
856 // Set values to zero to unbind buffers 769 // Set values to zero to unbind buffers
857 bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, 770 bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
@@ -875,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
875 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 788 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
876 789
877 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; 790 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
878 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { 791 for (const auto& entry : shader->GetEntries().global_memory_entries) {
879 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; 792 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
880 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 793 const auto gpu_addr{memory_manager.Read<u64>(addr)};
881 const auto size{memory_manager.Read<u32>(addr + 8)}; 794 const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -889,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
889 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 802 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
890 803
891 u32 binding = 0; 804 u32 binding = 0;
892 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { 805 for (const auto& entry : kernel->GetEntries().global_memory_entries) {
893 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; 806 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
894 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 807 const auto gpu_addr{memory_manager.Read<u64>(addr)};
895 const auto size{memory_manager.Read<u32>(addr + 8)}; 808 const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -897,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
897 } 810 }
898} 811}
899 812
900void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, 813void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
901 GPUVAddr gpu_addr, std::size_t size) { 814 GPUVAddr gpu_addr, std::size_t size) {
902 const auto alignment{device.GetShaderStorageBufferAlignment()}; 815 const auto alignment{device.GetShaderStorageBufferAlignment()};
903 const auto [ssbo, buffer_offset] = 816 const auto [ssbo, buffer_offset] =
@@ -909,16 +822,11 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
909 MICROPROFILE_SCOPE(OpenGL_Texture); 822 MICROPROFILE_SCOPE(OpenGL_Texture);
910 const auto& maxwell3d = system.GPU().Maxwell3D(); 823 const auto& maxwell3d = system.GPU().Maxwell3D();
911 u32 binding = device.GetBaseBindings(stage_index).sampler; 824 u32 binding = device.GetBaseBindings(stage_index).sampler;
912 for (const auto& entry : shader->GetShaderEntries().samplers) { 825 for (const auto& entry : shader->GetEntries().samplers) {
913 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); 826 const auto shader_type = static_cast<ShaderType>(stage_index);
914 if (!entry.IsIndexed()) { 827 for (std::size_t i = 0; i < entry.Size(); ++i) {
915 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); 828 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
916 SetupTexture(binding++, texture, entry); 829 SetupTexture(binding++, texture, entry);
917 } else {
918 for (std::size_t i = 0; i < entry.Size(); ++i) {
919 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
920 SetupTexture(binding++, texture, entry);
921 }
922 } 830 }
923 } 831 }
924} 832}
@@ -927,46 +835,39 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
927 MICROPROFILE_SCOPE(OpenGL_Texture); 835 MICROPROFILE_SCOPE(OpenGL_Texture);
928 const auto& compute = system.GPU().KeplerCompute(); 836 const auto& compute = system.GPU().KeplerCompute();
929 u32 binding = 0; 837 u32 binding = 0;
930 for (const auto& entry : kernel->GetShaderEntries().samplers) { 838 for (const auto& entry : kernel->GetEntries().samplers) {
931 if (!entry.IsIndexed()) { 839 for (std::size_t i = 0; i < entry.Size(); ++i) {
932 const auto texture = 840 const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
933 GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
934 SetupTexture(binding++, texture, entry); 841 SetupTexture(binding++, texture, entry);
935 } else {
936 for (std::size_t i = 0; i < entry.Size(); ++i) {
937 const auto texture =
938 GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
939 SetupTexture(binding++, texture, entry);
940 }
941 } 842 }
942 } 843 }
943} 844}
944 845
945void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 846void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
946 const GLShader::SamplerEntry& entry) { 847 const SamplerEntry& entry) {
947 const auto view = texture_cache.GetTextureSurface(texture.tic, entry); 848 const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
948 if (!view) { 849 if (!view) {
949 // Can occur when texture addr is null or its memory is unmapped/invalid 850 // Can occur when texture addr is null or its memory is unmapped/invalid
950 state.samplers[binding] = 0; 851 glBindSampler(binding, 0);
951 state.textures[binding] = 0; 852 glBindTextureUnit(binding, 0);
952 return; 853 return;
953 } 854 }
954 state.textures[binding] = view->GetTexture(); 855 glBindTextureUnit(binding, view->GetTexture());
955 856
956 if (view->GetSurfaceParams().IsBuffer()) { 857 if (view->GetSurfaceParams().IsBuffer()) {
957 return; 858 return;
958 } 859 }
959 state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
960
961 // Apply swizzle to textures that are not buffers. 860 // Apply swizzle to textures that are not buffers.
962 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 861 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
963 texture.tic.w_source); 862 texture.tic.w_source);
863
864 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
964} 865}
965 866
966void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 867void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
967 const auto& maxwell3d = system.GPU().Maxwell3D(); 868 const auto& maxwell3d = system.GPU().Maxwell3D();
968 u32 binding = device.GetBaseBindings(stage_index).image; 869 u32 binding = device.GetBaseBindings(stage_index).image;
969 for (const auto& entry : shader->GetShaderEntries().images) { 870 for (const auto& entry : shader->GetEntries().images) {
970 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); 871 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
971 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; 872 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
972 SetupImage(binding++, tic, entry); 873 SetupImage(binding++, tic, entry);
@@ -976,17 +877,17 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
976void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { 877void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
977 const auto& compute = system.GPU().KeplerCompute(); 878 const auto& compute = system.GPU().KeplerCompute();
978 u32 binding = 0; 879 u32 binding = 0;
979 for (const auto& entry : shader->GetShaderEntries().images) { 880 for (const auto& entry : shader->GetEntries().images) {
980 const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; 881 const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
981 SetupImage(binding++, tic, entry); 882 SetupImage(binding++, tic, entry);
982 } 883 }
983} 884}
984 885
985void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, 886void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
986 const GLShader::ImageEntry& entry) { 887 const ImageEntry& entry) {
987 const auto view = texture_cache.GetImageSurface(tic, entry); 888 const auto view = texture_cache.GetImageSurface(tic, entry);
988 if (!view) { 889 if (!view) {
989 state.images[binding] = 0; 890 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
990 return; 891 return;
991 } 892 }
992 if (!tic.IsBuffer()) { 893 if (!tic.IsBuffer()) {
@@ -995,55 +896,85 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
995 if (entry.IsWritten()) { 896 if (entry.IsWritten()) {
996 view->MarkAsModified(texture_cache.Tick()); 897 view->MarkAsModified(texture_cache.Tick());
997 } 898 }
998 state.images[binding] = view->GetTexture(); 899 glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE,
900 view->GetFormat());
999} 901}
1000 902
1001void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 903void RasterizerOpenGL::SyncViewport() {
1002 const auto& regs = system.GPU().Maxwell3D().regs; 904 auto& gpu = system.GPU().Maxwell3D();
1003 const bool geometry_shaders_enabled = 905 auto& flags = gpu.dirty.flags;
1004 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 906 const auto& regs = gpu.regs;
1005 const std::size_t viewport_count = 907
1006 geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; 908 const bool dirty_viewport = flags[Dirty::Viewports];
1007 for (std::size_t i = 0; i < viewport_count; i++) { 909 if (dirty_viewport || flags[Dirty::ClipControl]) {
1008 auto& viewport = current_state.viewports[i]; 910 flags[Dirty::ClipControl] = false;
1009 const auto& src = regs.viewports[i]; 911
1010 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 912 bool flip_y = false;
1011 viewport.x = viewport_rect.left; 913 if (regs.viewport_transform[0].scale_y < 0.0) {
1012 viewport.y = viewport_rect.bottom; 914 flip_y = !flip_y;
1013 viewport.width = viewport_rect.GetWidth(); 915 }
1014 viewport.height = viewport_rect.GetHeight(); 916 if (regs.screen_y_control.y_negate != 0) {
1015 viewport.depth_range_far = src.depth_range_far; 917 flip_y = !flip_y;
1016 viewport.depth_range_near = src.depth_range_near; 918 }
1017 } 919 glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
1018 state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; 920 regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
1019 state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; 921 : GL_NEGATIVE_ONE_TO_ONE);
1020 922 }
1021 bool flip_y = false; 923
1022 if (regs.viewport_transform[0].scale_y < 0.0) { 924 if (dirty_viewport) {
1023 flip_y = !flip_y; 925 flags[Dirty::Viewports] = false;
1024 } 926
1025 if (regs.screen_y_control.y_negate != 0) { 927 const bool force = flags[Dirty::ViewportTransform];
1026 flip_y = !flip_y; 928 flags[Dirty::ViewportTransform] = false;
1027 } 929
1028 state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; 930 for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
1029 state.clip_control.depth_mode = 931 if (!force && !flags[Dirty::Viewport0 + i]) {
1030 regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne 932 continue;
1031 ? GL_ZERO_TO_ONE 933 }
1032 : GL_NEGATIVE_ONE_TO_ONE; 934 flags[Dirty::Viewport0 + i] = false;
935
936 const Common::Rectangle<f32> rect{regs.viewport_transform[i].GetRect()};
937 glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
938 rect.GetHeight());
939
940 const auto& src = regs.viewports[i];
941 glDepthRangeIndexed(static_cast<GLuint>(i), static_cast<GLdouble>(src.depth_range_near),
942 static_cast<GLdouble>(src.depth_range_far));
943 }
944 }
1033} 945}
1034 946
1035void RasterizerOpenGL::SyncClipEnabled( 947void RasterizerOpenGL::SyncDepthClamp() {
1036 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { 948 auto& gpu = system.GPU().Maxwell3D();
949 auto& flags = gpu.dirty.flags;
950 if (!flags[Dirty::DepthClampEnabled]) {
951 return;
952 }
953 flags[Dirty::DepthClampEnabled] = false;
1037 954
1038 const auto& regs = system.GPU().Maxwell3D().regs; 955 const auto& state = gpu.regs.view_volume_clip_control;
1039 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ 956 UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near,
1040 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, 957 "Unimplemented depth clamp separation!");
1041 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, 958
1042 regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0, 959 oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near);
1043 regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0}; 960}
961
962void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
963 auto& gpu = system.GPU().Maxwell3D();
964 auto& flags = gpu.dirty.flags;
965 if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
966 return;
967 }
968 flags[Dirty::ClipDistances] = false;
969
970 clip_mask &= gpu.regs.clip_distance_enabled;
971 if (clip_mask == last_clip_distance_mask) {
972 return;
973 }
974 last_clip_distance_mask = clip_mask;
1044 975
1045 for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { 976 for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
1046 state.clip_distance[i] = reg_state[i] && clip_mask[i]; 977 oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
1047 } 978 }
1048} 979}
1049 980
@@ -1052,247 +983,442 @@ void RasterizerOpenGL::SyncClipCoef() {
1052} 983}
1053 984
1054void RasterizerOpenGL::SyncCullMode() { 985void RasterizerOpenGL::SyncCullMode() {
1055 const auto& regs = system.GPU().Maxwell3D().regs; 986 auto& gpu = system.GPU().Maxwell3D();
987 auto& flags = gpu.dirty.flags;
988 const auto& regs = gpu.regs;
1056 989
1057 state.cull.enabled = regs.cull.enabled != 0; 990 if (flags[Dirty::CullTest]) {
1058 if (state.cull.enabled) { 991 flags[Dirty::CullTest] = false;
1059 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 992
993 if (regs.cull_test_enabled) {
994 glEnable(GL_CULL_FACE);
995 glCullFace(MaxwellToGL::CullFace(regs.cull_face));
996 } else {
997 glDisable(GL_CULL_FACE);
998 }
1060 } 999 }
1061 1000
1062 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 1001 if (flags[Dirty::FrontFace]) {
1002 flags[Dirty::FrontFace] = false;
1003 glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
1004 }
1063} 1005}
1064 1006
1065void RasterizerOpenGL::SyncPrimitiveRestart() { 1007void RasterizerOpenGL::SyncPrimitiveRestart() {
1066 const auto& regs = system.GPU().Maxwell3D().regs; 1008 auto& gpu = system.GPU().Maxwell3D();
1009 auto& flags = gpu.dirty.flags;
1010 if (!flags[Dirty::PrimitiveRestart]) {
1011 return;
1012 }
1013 flags[Dirty::PrimitiveRestart] = false;
1067 1014
1068 state.primitive_restart.enabled = regs.primitive_restart.enabled; 1015 if (gpu.regs.primitive_restart.enabled) {
1069 state.primitive_restart.index = regs.primitive_restart.index; 1016 glEnable(GL_PRIMITIVE_RESTART);
1017 glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
1018 } else {
1019 glDisable(GL_PRIMITIVE_RESTART);
1020 }
1070} 1021}
1071 1022
1072void RasterizerOpenGL::SyncDepthTestState() { 1023void RasterizerOpenGL::SyncDepthTestState() {
1073 const auto& regs = system.GPU().Maxwell3D().regs; 1024 auto& gpu = system.GPU().Maxwell3D();
1074 1025 auto& flags = gpu.dirty.flags;
1075 state.depth.test_enabled = regs.depth_test_enable != 0;
1076 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
1077 1026
1078 if (!state.depth.test_enabled) { 1027 const auto& regs = gpu.regs;
1079 return; 1028 if (flags[Dirty::DepthMask]) {
1029 flags[Dirty::DepthMask] = false;
1030 glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
1080 } 1031 }
1081 1032
1082 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); 1033 if (flags[Dirty::DepthTest]) {
1034 flags[Dirty::DepthTest] = false;
1035 if (regs.depth_test_enable) {
1036 glEnable(GL_DEPTH_TEST);
1037 glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func));
1038 } else {
1039 glDisable(GL_DEPTH_TEST);
1040 }
1041 }
1083} 1042}
1084 1043
1085void RasterizerOpenGL::SyncStencilTestState() { 1044void RasterizerOpenGL::SyncStencilTestState() {
1086 auto& maxwell3d = system.GPU().Maxwell3D(); 1045 auto& gpu = system.GPU().Maxwell3D();
1087 if (!maxwell3d.dirty.stencil_test) { 1046 auto& flags = gpu.dirty.flags;
1047 if (!flags[Dirty::StencilTest]) {
1088 return; 1048 return;
1089 } 1049 }
1090 maxwell3d.dirty.stencil_test = false; 1050 flags[Dirty::StencilTest] = false;
1091
1092 const auto& regs = maxwell3d.regs;
1093 state.stencil.test_enabled = regs.stencil_enable != 0;
1094 state.MarkDirtyStencilState();
1095 1051
1052 const auto& regs = gpu.regs;
1096 if (!regs.stencil_enable) { 1053 if (!regs.stencil_enable) {
1054 glDisable(GL_STENCIL_TEST);
1097 return; 1055 return;
1098 } 1056 }
1099 1057
1100 state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); 1058 glEnable(GL_STENCIL_TEST);
1101 state.stencil.front.test_ref = regs.stencil_front_func_ref; 1059 glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
1102 state.stencil.front.test_mask = regs.stencil_front_func_mask; 1060 regs.stencil_front_func_ref, regs.stencil_front_func_mask);
1103 state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail); 1061 glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail),
1104 state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail); 1062 MaxwellToGL::StencilOp(regs.stencil_front_op_zfail),
1105 state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass); 1063 MaxwellToGL::StencilOp(regs.stencil_front_op_zpass));
1106 state.stencil.front.write_mask = regs.stencil_front_mask; 1064 glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
1065
1107 if (regs.stencil_two_side_enable) { 1066 if (regs.stencil_two_side_enable) {
1108 state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); 1067 glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func),
1109 state.stencil.back.test_ref = regs.stencil_back_func_ref; 1068 regs.stencil_back_func_ref, regs.stencil_back_func_mask);
1110 state.stencil.back.test_mask = regs.stencil_back_func_mask; 1069 glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail),
1111 state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); 1070 MaxwellToGL::StencilOp(regs.stencil_back_op_zfail),
1112 state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); 1071 MaxwellToGL::StencilOp(regs.stencil_back_op_zpass));
1113 state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); 1072 glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
1114 state.stencil.back.write_mask = regs.stencil_back_mask;
1115 } else { 1073 } else {
1116 state.stencil.back.test_func = GL_ALWAYS; 1074 glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
1117 state.stencil.back.test_ref = 0; 1075 glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
1118 state.stencil.back.test_mask = 0xFFFFFFFF; 1076 glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF);
1119 state.stencil.back.write_mask = 0xFFFFFFFF;
1120 state.stencil.back.action_stencil_fail = GL_KEEP;
1121 state.stencil.back.action_depth_fail = GL_KEEP;
1122 state.stencil.back.action_depth_pass = GL_KEEP;
1123 } 1077 }
1124} 1078}
1125 1079
1126void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) { 1080void RasterizerOpenGL::SyncRasterizeEnable() {
1127 const auto& regs = system.GPU().Maxwell3D().regs; 1081 auto& gpu = system.GPU().Maxwell3D();
1128 current_state.rasterizer_discard = regs.rasterize_enable == 0; 1082 auto& flags = gpu.dirty.flags;
1083 if (!flags[Dirty::RasterizeEnable]) {
1084 return;
1085 }
1086 flags[Dirty::RasterizeEnable] = false;
1087
1088 oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
1089}
1090
1091void RasterizerOpenGL::SyncPolygonModes() {
1092 auto& gpu = system.GPU().Maxwell3D();
1093 auto& flags = gpu.dirty.flags;
1094 if (!flags[Dirty::PolygonModes]) {
1095 return;
1096 }
1097 flags[Dirty::PolygonModes] = false;
1098
1099 if (gpu.regs.fill_rectangle) {
1100 if (!GLAD_GL_NV_fill_rectangle) {
1101 LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
1102 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
1103 return;
1104 }
1105
1106 flags[Dirty::PolygonModeFront] = true;
1107 flags[Dirty::PolygonModeBack] = true;
1108 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
1109 return;
1110 }
1111
1112 if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
1113 flags[Dirty::PolygonModeFront] = false;
1114 flags[Dirty::PolygonModeBack] = false;
1115 glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
1116 return;
1117 }
1118
1119 if (flags[Dirty::PolygonModeFront]) {
1120 flags[Dirty::PolygonModeFront] = false;
1121 glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
1122 }
1123
1124 if (flags[Dirty::PolygonModeBack]) {
1125 flags[Dirty::PolygonModeBack] = false;
1126 glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
1127 }
1129} 1128}
1130 1129
1131void RasterizerOpenGL::SyncColorMask() { 1130void RasterizerOpenGL::SyncColorMask() {
1132 auto& maxwell3d = system.GPU().Maxwell3D(); 1131 auto& gpu = system.GPU().Maxwell3D();
1133 if (!maxwell3d.dirty.color_mask) { 1132 auto& flags = gpu.dirty.flags;
1133 if (!flags[Dirty::ColorMasks]) {
1134 return; 1134 return;
1135 } 1135 }
1136 const auto& regs = maxwell3d.regs; 1136 flags[Dirty::ColorMasks] = false;
1137
1138 const bool force = flags[Dirty::ColorMaskCommon];
1139 flags[Dirty::ColorMaskCommon] = false;
1140
1141 const auto& regs = gpu.regs;
1142 if (regs.color_mask_common) {
1143 if (!force && !flags[Dirty::ColorMask0]) {
1144 return;
1145 }
1146 flags[Dirty::ColorMask0] = false;
1137 1147
1138 const std::size_t count = 1148 auto& mask = regs.color_mask[0];
1139 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1149 glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0);
1140 for (std::size_t i = 0; i < count; i++) { 1150 return;
1141 const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
1142 auto& dest = state.color_mask[i];
1143 dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
1144 dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
1145 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
1146 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
1147 } 1151 }
1148 1152
1149 state.MarkDirtyColorMask(); 1153 // Path without color_mask_common set
1150 maxwell3d.dirty.color_mask = false; 1154 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1155 if (!force && !flags[Dirty::ColorMask0 + i]) {
1156 continue;
1157 }
1158 flags[Dirty::ColorMask0 + i] = false;
1159
1160 const auto& mask = regs.color_mask[i];
1161 glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0);
1162 }
1151} 1163}
1152 1164
1153void RasterizerOpenGL::SyncMultiSampleState() { 1165void RasterizerOpenGL::SyncMultiSampleState() {
1166 auto& gpu = system.GPU().Maxwell3D();
1167 auto& flags = gpu.dirty.flags;
1168 if (!flags[Dirty::MultisampleControl]) {
1169 return;
1170 }
1171 flags[Dirty::MultisampleControl] = false;
1172
1154 const auto& regs = system.GPU().Maxwell3D().regs; 1173 const auto& regs = system.GPU().Maxwell3D().regs;
1155 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; 1174 oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
1156 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; 1175 oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
1157} 1176}
1158 1177
1159void RasterizerOpenGL::SyncFragmentColorClampState() { 1178void RasterizerOpenGL::SyncFragmentColorClampState() {
1160 const auto& regs = system.GPU().Maxwell3D().regs; 1179 auto& gpu = system.GPU().Maxwell3D();
1161 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; 1180 auto& flags = gpu.dirty.flags;
1181 if (!flags[Dirty::FragmentClampColor]) {
1182 return;
1183 }
1184 flags[Dirty::FragmentClampColor] = false;
1185
1186 glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
1162} 1187}
1163 1188
1164void RasterizerOpenGL::SyncBlendState() { 1189void RasterizerOpenGL::SyncBlendState() {
1165 auto& maxwell3d = system.GPU().Maxwell3D(); 1190 auto& gpu = system.GPU().Maxwell3D();
1166 if (!maxwell3d.dirty.blend_state) { 1191 auto& flags = gpu.dirty.flags;
1192 const auto& regs = gpu.regs;
1193
1194 if (flags[Dirty::BlendColor]) {
1195 flags[Dirty::BlendColor] = false;
1196 glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
1197 regs.blend_color.a);
1198 }
1199
1200 // TODO(Rodrigo): Revisit blending, there are several registers we are not reading
1201
1202 if (!flags[Dirty::BlendStates]) {
1167 return; 1203 return;
1168 } 1204 }
1169 const auto& regs = maxwell3d.regs; 1205 flags[Dirty::BlendStates] = false;
1170 1206
1171 state.blend_color.red = regs.blend_color.r; 1207 if (!regs.independent_blend_enable) {
1172 state.blend_color.green = regs.blend_color.g; 1208 if (!regs.blend.enable[0]) {
1173 state.blend_color.blue = regs.blend_color.b; 1209 glDisable(GL_BLEND);
1174 state.blend_color.alpha = regs.blend_color.a; 1210 return;
1175
1176 state.independant_blend.enabled = regs.independent_blend_enable;
1177 if (!state.independant_blend.enabled) {
1178 auto& blend = state.blend[0];
1179 const auto& src = regs.blend;
1180 blend.enabled = src.enable[0] != 0;
1181 if (blend.enabled) {
1182 blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb);
1183 blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb);
1184 blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb);
1185 blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a);
1186 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
1187 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
1188 }
1189 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
1190 state.blend[i].enabled = false;
1191 } 1211 }
1192 maxwell3d.dirty.blend_state = false; 1212 glEnable(GL_BLEND);
1193 state.MarkDirtyBlendState(); 1213 glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb),
1214 MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb),
1215 MaxwellToGL::BlendFunc(regs.blend.factor_source_a),
1216 MaxwellToGL::BlendFunc(regs.blend.factor_dest_a));
1217 glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb),
1218 MaxwellToGL::BlendEquation(regs.blend.equation_a));
1194 return; 1219 return;
1195 } 1220 }
1196 1221
1197 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 1222 const bool force = flags[Dirty::BlendIndependentEnabled];
1198 auto& blend = state.blend[i]; 1223 flags[Dirty::BlendIndependentEnabled] = false;
1199 const auto& src = regs.independent_blend[i]; 1224
1200 blend.enabled = regs.blend.enable[i] != 0; 1225 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1201 if (!blend.enabled) 1226 if (!force && !flags[Dirty::BlendState0 + i]) {
1202 continue; 1227 continue;
1203 blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); 1228 }
1204 blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); 1229 flags[Dirty::BlendState0 + i] = false;
1205 blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); 1230
1206 blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); 1231 if (!regs.blend.enable[i]) {
1207 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); 1232 glDisablei(GL_BLEND, static_cast<GLuint>(i));
1208 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); 1233 continue;
1209 } 1234 }
1235 glEnablei(GL_BLEND, static_cast<GLuint>(i));
1210 1236
1211 state.MarkDirtyBlendState(); 1237 const auto& src = regs.independent_blend[i];
1212 maxwell3d.dirty.blend_state = false; 1238 glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb),
1239 MaxwellToGL::BlendFunc(src.factor_dest_rgb),
1240 MaxwellToGL::BlendFunc(src.factor_source_a),
1241 MaxwellToGL::BlendFunc(src.factor_dest_a));
1242 glBlendEquationSeparatei(static_cast<GLuint>(i),
1243 MaxwellToGL::BlendEquation(src.equation_rgb),
1244 MaxwellToGL::BlendEquation(src.equation_a));
1245 }
1213} 1246}
1214 1247
1215void RasterizerOpenGL::SyncLogicOpState() { 1248void RasterizerOpenGL::SyncLogicOpState() {
1216 const auto& regs = system.GPU().Maxwell3D().regs; 1249 auto& gpu = system.GPU().Maxwell3D();
1250 auto& flags = gpu.dirty.flags;
1251 if (!flags[Dirty::LogicOp]) {
1252 return;
1253 }
1254 flags[Dirty::LogicOp] = false;
1217 1255
1218 state.logic_op.enabled = regs.logic_op.enable != 0; 1256 const auto& regs = gpu.regs;
1257 if (regs.logic_op.enable) {
1258 glEnable(GL_COLOR_LOGIC_OP);
1259 glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
1260 } else {
1261 glDisable(GL_COLOR_LOGIC_OP);
1262 }
1263}
1219 1264
1220 if (!state.logic_op.enabled) 1265void RasterizerOpenGL::SyncScissorTest() {
1266 auto& gpu = system.GPU().Maxwell3D();
1267 auto& flags = gpu.dirty.flags;
1268 if (!flags[Dirty::Scissors]) {
1221 return; 1269 return;
1270 }
1271 flags[Dirty::Scissors] = false;
1222 1272
1223 ASSERT_MSG(regs.blend.enable[0] == 0, 1273 const auto& regs = gpu.regs;
1224 "Blending and logic op can't be enabled at the same time."); 1274 for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
1225 1275 if (!flags[Dirty::Scissor0 + index]) {
1226 state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); 1276 continue;
1227} 1277 }
1278 flags[Dirty::Scissor0 + index] = false;
1228 1279
1229void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { 1280 const auto& src = regs.scissor_test[index];
1230 const auto& regs = system.GPU().Maxwell3D().regs; 1281 if (src.enable) {
1231 const bool geometry_shaders_enabled = 1282 glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1232 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1283 glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
1233 const std::size_t viewport_count = 1284 src.max_x - src.min_x, src.max_y - src.min_y);
1234 geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; 1285 } else {
1235 for (std::size_t i = 0; i < viewport_count; i++) { 1286 glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1236 const auto& src = regs.scissor_test[i];
1237 auto& dst = current_state.viewports[i].scissor;
1238 dst.enabled = (src.enable != 0);
1239 if (dst.enabled == 0) {
1240 return;
1241 } 1287 }
1242 const u32 width = src.max_x - src.min_x;
1243 const u32 height = src.max_y - src.min_y;
1244 dst.x = src.min_x;
1245 dst.y = src.min_y;
1246 dst.width = width;
1247 dst.height = height;
1248 } 1288 }
1249} 1289}
1250 1290
1251void RasterizerOpenGL::SyncTransformFeedback() {
1252 const auto& regs = system.GPU().Maxwell3D().regs;
1253 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1254}
1255
1256void RasterizerOpenGL::SyncPointState() { 1291void RasterizerOpenGL::SyncPointState() {
1257 const auto& regs = system.GPU().Maxwell3D().regs; 1292 auto& gpu = system.GPU().Maxwell3D();
1293 auto& flags = gpu.dirty.flags;
1294 if (!flags[Dirty::PointSize]) {
1295 return;
1296 }
1297 flags[Dirty::PointSize] = false;
1298
1299 oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
1300
1301 if (gpu.regs.vp_point_size.enable) {
1302 // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
1303 glEnable(GL_PROGRAM_POINT_SIZE);
1304 return;
1305 }
1306
1258 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid 1307 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1259 // in OpenGL). 1308 // in OpenGL).
1260 state.point.program_control = regs.vp_point_size.enable != 0; 1309 glPointSize(std::max(1.0f, gpu.regs.point_size));
1261 state.point.sprite = regs.point_sprite_enable != 0; 1310 glDisable(GL_PROGRAM_POINT_SIZE);
1262 state.point.size = std::max(1.0f, regs.point_size);
1263} 1311}
1264 1312
1265void RasterizerOpenGL::SyncPolygonOffset() { 1313void RasterizerOpenGL::SyncPolygonOffset() {
1266 auto& maxwell3d = system.GPU().Maxwell3D(); 1314 auto& gpu = system.GPU().Maxwell3D();
1267 if (!maxwell3d.dirty.polygon_offset) { 1315 auto& flags = gpu.dirty.flags;
1316 if (!flags[Dirty::PolygonOffset]) {
1268 return; 1317 return;
1269 } 1318 }
1270 const auto& regs = maxwell3d.regs; 1319 flags[Dirty::PolygonOffset] = false;
1271
1272 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1273 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1274 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1275 1320
1276 // Hardware divides polygon offset units by two 1321 const auto& regs = gpu.regs;
1277 state.polygon_offset.units = regs.polygon_offset_units / 2.0f; 1322 oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
1278 state.polygon_offset.factor = regs.polygon_offset_factor; 1323 oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
1279 state.polygon_offset.clamp = regs.polygon_offset_clamp; 1324 oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
1280 1325
1281 state.MarkDirtyPolygonOffset(); 1326 if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable ||
1282 maxwell3d.dirty.polygon_offset = false; 1327 regs.polygon_offset_point_enable) {
1328 // Hardware divides polygon offset units by two
1329 glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f,
1330 regs.polygon_offset_clamp);
1331 }
1283} 1332}
1284 1333
1285void RasterizerOpenGL::SyncAlphaTest() { 1334void RasterizerOpenGL::SyncAlphaTest() {
1335 auto& gpu = system.GPU().Maxwell3D();
1336 auto& flags = gpu.dirty.flags;
1337 if (!flags[Dirty::AlphaTest]) {
1338 return;
1339 }
1340 flags[Dirty::AlphaTest] = false;
1341
1342 const auto& regs = gpu.regs;
1343 if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
1344 LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
1345 }
1346
1347 if (regs.alpha_test_enabled) {
1348 glEnable(GL_ALPHA_TEST);
1349 glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
1350 } else {
1351 glDisable(GL_ALPHA_TEST);
1352 }
1353}
1354
1355void RasterizerOpenGL::SyncFramebufferSRGB() {
1356 auto& gpu = system.GPU().Maxwell3D();
1357 auto& flags = gpu.dirty.flags;
1358 if (!flags[Dirty::FramebufferSRGB]) {
1359 return;
1360 }
1361 flags[Dirty::FramebufferSRGB] = false;
1362
1363 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
1364}
1365
1366void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1286 const auto& regs = system.GPU().Maxwell3D().regs; 1367 const auto& regs = system.GPU().Maxwell3D().regs;
1287 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, 1368 if (regs.tfb_enabled == 0) {
1288 "Alpha Testing is enabled with more than one rendertarget"); 1369 return;
1370 }
1371
1372 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1373 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1374 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1289 1375
1290 state.alpha_test.enabled = regs.alpha_test_enabled; 1376 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1291 if (!state.alpha_test.enabled) { 1377 const auto& binding = regs.tfb_bindings[index];
1378 if (!binding.buffer_enable) {
1379 if (enabled_transform_feedback_buffers[index]) {
1380 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1381 0);
1382 }
1383 enabled_transform_feedback_buffers[index] = false;
1384 continue;
1385 }
1386 enabled_transform_feedback_buffers[index] = true;
1387
1388 auto& tfb_buffer = transform_feedback_buffers[index];
1389 tfb_buffer.Create();
1390
1391 const GLuint handle = tfb_buffer.handle;
1392 const std::size_t size = binding.buffer_size;
1393 glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1394 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1395 static_cast<GLsizeiptr>(size));
1396 }
1397
1398 glBeginTransformFeedback(GL_POINTS);
1399}
1400
1401void RasterizerOpenGL::EndTransformFeedback() {
1402 const auto& regs = system.GPU().Maxwell3D().regs;
1403 if (regs.tfb_enabled == 0) {
1292 return; 1404 return;
1293 } 1405 }
1294 state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); 1406
1295 state.alpha_test.ref = regs.alpha_test_ref; 1407 glEndTransformFeedback();
1408
1409 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1410 const auto& binding = regs.tfb_bindings[index];
1411 if (!binding.buffer_enable) {
1412 continue;
1413 }
1414 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1415
1416 const GLuint handle = transform_feedback_buffers[index].handle;
1417 const GPUVAddr gpu_addr = binding.Address();
1418 const std::size_t size = binding.buffer_size;
1419 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1420 glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
1421 }
1296} 1422}
1297 1423
1298} // namespace OpenGL 1424} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 68abe9a21..2d3be2437 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,7 @@
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h" 31#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 32#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state.h" 33#include "video_core/renderer_opengl/gl_state_tracker.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 34#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/renderer_opengl/utils.h" 35#include "video_core/renderer_opengl/utils.h"
36#include "video_core/textures/texture.h" 36#include "video_core/textures/texture.h"
@@ -55,7 +55,8 @@ struct DrawParameters;
55class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { 55class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
56public: 56public:
57 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 57 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
58 ScreenInfo& info); 58 ScreenInfo& info, GLShader::ProgramManager& program_manager,
59 StateTracker& state_tracker);
59 ~RasterizerOpenGL() override; 60 ~RasterizerOpenGL() override;
60 61
61 void Draw(bool is_indexed, bool is_instanced) override; 62 void Draw(bool is_indexed, bool is_instanced) override;
@@ -76,6 +77,7 @@ public:
76 u32 pixel_stride) override; 77 u32 pixel_stride) override;
77 void LoadDiskResources(const std::atomic_bool& stop_loading, 78 void LoadDiskResources(const std::atomic_bool& stop_loading,
78 const VideoCore::DiskResourceLoadCallback& callback) override; 79 const VideoCore::DiskResourceLoadCallback& callback) override;
80 void SetupDirtyFlags() override;
79 81
80 /// Returns true when there are commands queued to the OpenGL server. 82 /// Returns true when there are commands queued to the OpenGL server.
81 bool AnyCommandQueued() const { 83 bool AnyCommandQueued() const {
@@ -86,8 +88,7 @@ private:
86 /// Configures the color and depth framebuffer states. 88 /// Configures the color and depth framebuffer states.
87 void ConfigureFramebuffers(); 89 void ConfigureFramebuffers();
88 90
89 void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, 91 void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb);
90 bool using_depth_fb, bool using_stencil_fb);
91 92
92 /// Configures the current constbuffers to use for the draw command. 93 /// Configures the current constbuffers to use for the draw command.
93 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); 94 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
@@ -97,7 +98,7 @@ private:
97 98
98 /// Configures a constant buffer. 99 /// Configures a constant buffer.
99 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 100 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
100 const GLShader::ConstBufferEntry& entry); 101 const ConstBufferEntry& entry);
101 102
102 /// Configures the current global memory entries to use for the draw command. 103 /// Configures the current global memory entries to use for the draw command.
103 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 104 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -106,7 +107,7 @@ private:
106 void SetupComputeGlobalMemory(const Shader& kernel); 107 void SetupComputeGlobalMemory(const Shader& kernel);
107 108
108 /// Configures a constant buffer. 109 /// Configures a constant buffer.
109 void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 110 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
110 std::size_t size); 111 std::size_t size);
111 112
112 /// Configures the current textures to use for the draw command. 113 /// Configures the current textures to use for the draw command.
@@ -117,7 +118,7 @@ private:
117 118
118 /// Configures a texture. 119 /// Configures a texture.
119 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 120 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
120 const GLShader::SamplerEntry& entry); 121 const SamplerEntry& entry);
121 122
122 /// Configures images in a graphics shader. 123 /// Configures images in a graphics shader.
123 void SetupDrawImages(std::size_t stage_index, const Shader& shader); 124 void SetupDrawImages(std::size_t stage_index, const Shader& shader);
@@ -126,15 +127,16 @@ private:
126 void SetupComputeImages(const Shader& shader); 127 void SetupComputeImages(const Shader& shader);
127 128
128 /// Configures an image. 129 /// Configures an image.
129 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, 130 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
130 const GLShader::ImageEntry& entry);
131 131
132 /// Syncs the viewport and depth range to match the guest state 132 /// Syncs the viewport and depth range to match the guest state
133 void SyncViewport(OpenGLState& current_state); 133 void SyncViewport();
134
135 /// Syncs the depth clamp state
136 void SyncDepthClamp();
134 137
135 /// Syncs the clip enabled status to match the guest state 138 /// Syncs the clip enabled status to match the guest state
136 void SyncClipEnabled( 139 void SyncClipEnabled(u32 clip_mask);
137 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
138 140
139 /// Syncs the clip coefficients to match the guest state 141 /// Syncs the clip coefficients to match the guest state
140 void SyncClipCoef(); 142 void SyncClipCoef();
@@ -164,16 +166,16 @@ private:
164 void SyncMultiSampleState(); 166 void SyncMultiSampleState();
165 167
166 /// Syncs the scissor test state to match the guest state 168 /// Syncs the scissor test state to match the guest state
167 void SyncScissorTest(OpenGLState& current_state); 169 void SyncScissorTest();
168
169 /// Syncs the transform feedback state to match the guest state
170 void SyncTransformFeedback();
171 170
172 /// Syncs the point state to match the guest state 171 /// Syncs the point state to match the guest state
173 void SyncPointState(); 172 void SyncPointState();
174 173
175 /// Syncs the rasterizer enable state to match the guest state 174 /// Syncs the rasterizer enable state to match the guest state
176 void SyncRasterizeEnable(OpenGLState& current_state); 175 void SyncRasterizeEnable();
176
177 /// Syncs polygon modes to match the guest state
178 void SyncPolygonModes();
177 179
178 /// Syncs Color Mask 180 /// Syncs Color Mask
179 void SyncColorMask(); 181 void SyncColorMask();
@@ -184,6 +186,15 @@ private:
184 /// Syncs the alpha test state to match the guest state 186 /// Syncs the alpha test state to match the guest state
185 void SyncAlphaTest(); 187 void SyncAlphaTest();
186 188
189 /// Syncs the framebuffer sRGB state to match the guest state
190 void SyncFramebufferSRGB();
191
192 /// Begin a transform feedback
193 void BeginTransformFeedback(GLenum primitive_mode);
194
195 /// End a transform feedback
196 void EndTransformFeedback();
197
187 /// Check for extension that are not strictly required but are needed for correct emulation 198 /// Check for extension that are not strictly required but are needed for correct emulation
188 void CheckExtensions(); 199 void CheckExtensions();
189 200
@@ -191,18 +202,17 @@ private:
191 202
192 std::size_t CalculateIndexBufferSize() const; 203 std::size_t CalculateIndexBufferSize() const;
193 204
194 /// Updates and returns a vertex array object representing current vertex format 205 /// Updates the current vertex format
195 GLuint SetupVertexFormat(); 206 void SetupVertexFormat();
196 207
197 void SetupVertexBuffer(GLuint vao); 208 void SetupVertexBuffer();
198 void SetupVertexInstances(GLuint vao); 209 void SetupVertexInstances();
199 210
200 GLintptr SetupIndexBuffer(); 211 GLintptr SetupIndexBuffer();
201 212
202 void SetupShaders(GLenum primitive_mode); 213 void SetupShaders(GLenum primitive_mode);
203 214
204 const Device device; 215 const Device device;
205 OpenGLState state;
206 216
207 TextureCacheOpenGL texture_cache; 217 TextureCacheOpenGL texture_cache;
208 ShaderCacheOpenGL shader_cache; 218 ShaderCacheOpenGL shader_cache;
@@ -212,22 +222,25 @@ private:
212 222
213 Core::System& system; 223 Core::System& system;
214 ScreenInfo& screen_info; 224 ScreenInfo& screen_info;
215 225 GLShader::ProgramManager& program_manager;
216 std::unique_ptr<GLShader::ProgramManager> shader_program_manager; 226 StateTracker& state_tracker;
217 std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
218 Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,
219 OGLVertexArray>
220 vertex_array_cache;
221 227
222 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 228 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
223 OGLBufferCache buffer_cache; 229 OGLBufferCache buffer_cache;
224 230
225 VertexArrayPushBuffer vertex_array_pushbuffer; 231 VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker};
226 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 232 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
227 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 233 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
228 234
235 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
236 transform_feedback_buffers;
237 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
238 enabled_transform_feedback_buffers;
239
229 /// Number of commands queued to the OpenGL driver. Reseted on flush. 240 /// Number of commands queued to the OpenGL driver. Reseted on flush.
230 std::size_t num_queued_commands = 0; 241 std::size_t num_queued_commands = 0;
242
243 u32 last_clip_distance_mask = 0;
231}; 244};
232 245
233} // namespace OpenGL 246} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index c0aee770f..97803d480 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -8,7 +8,6 @@
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "video_core/renderer_opengl/gl_resource_manager.h" 9#include "video_core/renderer_opengl/gl_resource_manager.h"
10#include "video_core/renderer_opengl/gl_shader_util.h" 10#include "video_core/renderer_opengl/gl_shader_util.h"
11#include "video_core/renderer_opengl/gl_state.h"
12 11
13MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); 12MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
14MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); 13MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
@@ -20,7 +19,7 @@ void OGLRenderbuffer::Create() {
20 return; 19 return;
21 20
22 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 21 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
23 glGenRenderbuffers(1, &handle); 22 glCreateRenderbuffers(1, &handle);
24} 23}
25 24
26void OGLRenderbuffer::Release() { 25void OGLRenderbuffer::Release() {
@@ -29,7 +28,6 @@ void OGLRenderbuffer::Release() {
29 28
30 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 29 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
31 glDeleteRenderbuffers(1, &handle); 30 glDeleteRenderbuffers(1, &handle);
32 OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply();
33 handle = 0; 31 handle = 0;
34} 32}
35 33
@@ -47,7 +45,6 @@ void OGLTexture::Release() {
47 45
48 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 46 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
49 glDeleteTextures(1, &handle); 47 glDeleteTextures(1, &handle);
50 OpenGLState::GetCurState().UnbindTexture(handle).Apply();
51 handle = 0; 48 handle = 0;
52} 49}
53 50
@@ -65,7 +62,6 @@ void OGLTextureView::Release() {
65 62
66 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 63 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
67 glDeleteTextures(1, &handle); 64 glDeleteTextures(1, &handle);
68 OpenGLState::GetCurState().UnbindTexture(handle).Apply();
69 handle = 0; 65 handle = 0;
70} 66}
71 67
@@ -83,7 +79,6 @@ void OGLSampler::Release() {
83 79
84 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 80 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
85 glDeleteSamplers(1, &handle); 81 glDeleteSamplers(1, &handle);
86 OpenGLState::GetCurState().ResetSampler(handle).Apply();
87 handle = 0; 82 handle = 0;
88} 83}
89 84
@@ -127,7 +122,6 @@ void OGLProgram::Release() {
127 122
128 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 123 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
129 glDeleteProgram(handle); 124 glDeleteProgram(handle);
130 OpenGLState::GetCurState().ResetProgram(handle).Apply();
131 handle = 0; 125 handle = 0;
132} 126}
133 127
@@ -145,7 +139,6 @@ void OGLPipeline::Release() {
145 139
146 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 140 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
147 glDeleteProgramPipelines(1, &handle); 141 glDeleteProgramPipelines(1, &handle);
148 OpenGLState::GetCurState().ResetPipeline(handle).Apply();
149 handle = 0; 142 handle = 0;
150} 143}
151 144
@@ -189,24 +182,6 @@ void OGLSync::Release() {
189 handle = 0; 182 handle = 0;
190} 183}
191 184
192void OGLVertexArray::Create() {
193 if (handle != 0)
194 return;
195
196 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
197 glCreateVertexArrays(1, &handle);
198}
199
200void OGLVertexArray::Release() {
201 if (handle == 0)
202 return;
203
204 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
205 glDeleteVertexArrays(1, &handle);
206 OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
207 handle = 0;
208}
209
210void OGLFramebuffer::Create() { 185void OGLFramebuffer::Create() {
211 if (handle != 0) 186 if (handle != 0)
212 return; 187 return;
@@ -221,7 +196,6 @@ void OGLFramebuffer::Release() {
221 196
222 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 197 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
223 glDeleteFramebuffers(1, &handle); 198 glDeleteFramebuffers(1, &handle);
224 OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
225 handle = 0; 199 handle = 0;
226} 200}
227 201
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 995a4e45e..de93f4212 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -241,31 +241,6 @@ public:
241 GLsync handle = 0; 241 GLsync handle = 0;
242}; 242};
243 243
244class OGLVertexArray : private NonCopyable {
245public:
246 OGLVertexArray() = default;
247
248 OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
249
250 ~OGLVertexArray() {
251 Release();
252 }
253
254 OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
255 Release();
256 handle = std::exchange(o.handle, 0);
257 return *this;
258 }
259
260 /// Creates a new internal OpenGL resource and stores the handle
261 void Create();
262
263 /// Deletes the internal OpenGL resource
264 void Release();
265
266 GLuint handle = 0;
267};
268
269class OGLFramebuffer : private NonCopyable { 244class OGLFramebuffer : private NonCopyable {
270public: 245public:
271 OGLFramebuffer() = default; 246 OGLFramebuffer() = default;
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
index 3ded5ecea..5c174879a 100644
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
@@ -38,7 +38,7 @@ OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc
38 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); 38 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
39 } else if (GLAD_GL_EXT_texture_filter_anisotropic) { 39 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
40 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); 40 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
41 } else if (tsc.GetMaxAnisotropy() != 1) { 41 } else {
42 LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); 42 LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
43 } 43 }
44 44
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 489eb143c..e3d31c3eb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,12 +2,16 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic>
6#include <functional>
5#include <mutex> 7#include <mutex>
6#include <optional> 8#include <optional>
7#include <string> 9#include <string>
8#include <thread> 10#include <thread>
9#include <unordered_set> 11#include <unordered_set>
12
10#include <boost/functional/hash.hpp> 13#include <boost/functional/hash.hpp>
14
11#include "common/alignment.h" 15#include "common/alignment.h"
12#include "common/assert.h" 16#include "common/assert.h"
13#include "common/logging/log.h" 17#include "common/logging/log.h"
@@ -22,14 +26,16 @@
22#include "video_core/renderer_opengl/gl_shader_cache.h" 26#include "video_core/renderer_opengl/gl_shader_cache.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 27#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h"
25#include "video_core/renderer_opengl/utils.h" 30#include "video_core/renderer_opengl/utils.h"
31#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
27 33
28namespace OpenGL { 34namespace OpenGL {
29 35
30using Tegra::Engines::ShaderType; 36using Tegra::Engines::ShaderType;
31using VideoCommon::Shader::ConstBufferLocker;
32using VideoCommon::Shader::ProgramCode; 37using VideoCommon::Shader::ProgramCode;
38using VideoCommon::Shader::Registry;
33using VideoCommon::Shader::ShaderIR; 39using VideoCommon::Shader::ShaderIR;
34 40
35namespace { 41namespace {
@@ -55,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
55} 61}
56 62
57/// Calculates the size of a program stream 63/// Calculates the size of a program stream
58std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 64std::size_t CalculateProgramSize(const ProgramCode& program) {
59 constexpr std::size_t start_offset = 10; 65 constexpr std::size_t start_offset = 10;
60 // This is the encoded version of BRA that jumps to itself. All Nvidia 66 // This is the encoded version of BRA that jumps to itself. All Nvidia
61 // shaders end with one. 67 // shaders end with one.
@@ -108,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
108 } 114 }
109} 115}
110 116
111/// Describes primitive behavior on geometry shaders
112constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
113 switch (primitive_mode) {
114 case GL_POINTS:
115 return {"points", 1};
116 case GL_LINES:
117 case GL_LINE_STRIP:
118 return {"lines", 2};
119 case GL_LINES_ADJACENCY:
120 case GL_LINE_STRIP_ADJACENCY:
121 return {"lines_adjacency", 4};
122 case GL_TRIANGLES:
123 case GL_TRIANGLE_STRIP:
124 case GL_TRIANGLE_FAN:
125 return {"triangles", 3};
126 case GL_TRIANGLES_ADJACENCY:
127 case GL_TRIANGLE_STRIP_ADJACENCY:
128 return {"triangles_adjacency", 6};
129 default:
130 return {"points", 1};
131 }
132}
133
134/// Hashes one (or two) program streams 117/// Hashes one (or two) program streams
135u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, 118u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
136 const ProgramCode& code_b) { 119 const ProgramCode& code_b = {}) {
137 u64 unique_identifier = boost::hash_value(code); 120 u64 unique_identifier = boost::hash_value(code);
138 if (is_a) { 121 if (is_a) {
139 // VertexA programs include two programs 122 // VertexA programs include two programs
@@ -142,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co
142 return unique_identifier; 125 return unique_identifier;
143} 126}
144 127
145/// Creates an unspecialized program from code streams
146std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
147 const std::optional<ShaderIR>& ir_b) {
148 switch (shader_type) {
149 case ShaderType::Vertex:
150 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
151 case ShaderType::Geometry:
152 return GLShader::GenerateGeometryShader(device, ir);
153 case ShaderType::Fragment:
154 return GLShader::GenerateFragmentShader(device, ir);
155 case ShaderType::Compute:
156 return GLShader::GenerateComputeShader(device, ir);
157 default:
158 UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
159 return {};
160 }
161}
162
163constexpr const char* GetShaderTypeName(ShaderType shader_type) { 128constexpr const char* GetShaderTypeName(ShaderType shader_type) {
164 switch (shader_type) { 129 switch (shader_type) {
165 case ShaderType::Vertex: 130 case ShaderType::Vertex:
@@ -195,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
195 return {}; 160 return {};
196} 161}
197 162
198std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { 163std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
199 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); 164 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
200} 165}
201 166
202Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, 167std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
203 ShaderType shader_type) { 168 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
204 if (shader_type == ShaderType::Compute) { 169 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
205 return system.GPU().KeplerCompute(); 170 entry.graphics_info, entry.compute_info};
206 } else { 171 const auto registry = std::make_shared<Registry>(entry.type, info);
207 return system.GPU().Maxwell3D(); 172 for (const auto& [address, value] : entry.keys) {
208 } 173 const auto [buffer, offset] = address;
209} 174 registry->InsertKey(buffer, offset, value);
210
211std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
212 return std::make_unique<ConstBufferLocker>(shader_type,
213 GetConstBufferEngineInterface(system, shader_type));
214}
215
216void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
217 locker.SetBoundBuffer(usage.bound_buffer);
218 for (const auto& key : usage.keys) {
219 const auto [buffer, offset] = key.first;
220 locker.InsertKey(buffer, offset, key.second);
221 } 175 }
222 for (const auto& [offset, sampler] : usage.bound_samplers) { 176 for (const auto& [offset, sampler] : entry.bound_samplers) {
223 locker.InsertBoundSampler(offset, sampler); 177 registry->InsertBoundSampler(offset, sampler);
224 } 178 }
225 for (const auto& [key, sampler] : usage.bindless_samplers) { 179 for (const auto& [key, sampler] : entry.bindless_samplers) {
226 const auto [buffer, offset] = key; 180 const auto [buffer, offset] = key;
227 locker.InsertBindlessSampler(buffer, offset, sampler); 181 registry->InsertBindlessSampler(buffer, offset, sampler);
228 } 182 }
183 return registry;
229} 184}
230 185
231CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, 186std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
232 const ProgramCode& code, const ProgramCode& code_b, 187 u64 unique_identifier, const ShaderIR& ir,
233 ConstBufferLocker& locker, const ProgramVariant& variant, 188 const Registry& registry, bool hint_retrievable = false) {
234 bool hint_retrievable = false) { 189 const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
235 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); 190 LOG_INFO(Render_OpenGL, "{}", shader_id);
236
237 const bool is_compute = shader_type == ShaderType::Compute;
238 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
239 const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
240 std::optional<ShaderIR> ir_b;
241 if (!code_b.empty()) {
242 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
243 }
244
245 std::string source = fmt::format(R"(// {}
246#version 430 core
247#extension GL_ARB_separate_shader_objects : enable
248)",
249 GetShaderId(unique_identifier, shader_type));
250 if (device.HasShaderBallot()) {
251 source += "#extension GL_ARB_shader_ballot : require\n";
252 }
253 if (device.HasVertexViewportLayer()) {
254 source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
255 }
256 if (device.HasImageLoadFormatted()) {
257 source += "#extension GL_EXT_shader_image_load_formatted : require\n";
258 }
259 if (device.HasWarpIntrinsics()) {
260 source += "#extension GL_NV_gpu_shader5 : require\n"
261 "#extension GL_NV_shader_thread_group : require\n"
262 "#extension GL_NV_shader_thread_shuffle : require\n";
263 }
264 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
265 // on places where we don't want to.
266 // Thanks to Ryujinx for finding this workaround.
267 source += "#pragma optionNV(fastmath off)\n";
268
269 if (shader_type == ShaderType::Geometry) {
270 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
271 source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
272 source += fmt::format("layout ({}) in;\n", glsl_topology);
273 }
274 if (shader_type == ShaderType::Compute) {
275 if (variant.local_memory_size > 0) {
276 source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
277 Common::AlignUp(variant.local_memory_size, 4) / 4);
278 }
279 source +=
280 fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
281 variant.block_x, variant.block_y, variant.block_z);
282
283 if (variant.shared_memory_size > 0) {
284 // shared_memory_size is described in number of words
285 source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
286 }
287 }
288
289 source += '\n';
290 source += GenerateGLSL(device, shader_type, ir, ir_b);
291 191
192 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
292 OGLShader shader; 193 OGLShader shader;
293 shader.Create(source.c_str(), GetGLShaderType(shader_type)); 194 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
294 195
295 auto program = std::make_shared<OGLProgram>(); 196 auto program = std::make_shared<OGLProgram>();
296 program->Create(true, hint_retrievable, shader.handle); 197 program->Create(true, hint_retrievable, shader.handle);
@@ -298,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
298} 199}
299 200
300std::unordered_set<GLenum> GetSupportedFormats() { 201std::unordered_set<GLenum> GetSupportedFormats() {
301 GLint num_formats{}; 202 GLint num_formats;
302 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 203 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
303 204
304 std::vector<GLint> formats(num_formats); 205 std::vector<GLint> formats(num_formats);
@@ -313,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() {
313 214
314} // Anonymous namespace 215} // Anonymous namespace
315 216
316CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, 217CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
317 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) 218 std::shared_ptr<VideoCommon::Shader::Registry> registry,
318 : RasterizerCacheObject{params.host_ptr}, system{params.system}, 219 ShaderEntries entries, std::shared_ptr<OGLProgram> program)
319 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, 220 : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
320 unique_identifier{params.unique_identifier}, shader_type{shader_type}, 221 cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
321 entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { 222
322 if (!params.precompiled_variants) { 223CachedShader::~CachedShader() = default;
323 return; 224
324 } 225GLuint CachedShader::GetHandle() const {
325 for (const auto& pair : *params.precompiled_variants) { 226 DEBUG_ASSERT(registry->IsConsistent());
326 auto locker = MakeLocker(system, shader_type); 227 return program->handle;
327 const auto& usage = pair->first;
328 FillLocker(*locker, usage);
329
330 std::unique_ptr<LockerVariant>* locker_variant = nullptr;
331 const auto it =
332 std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
333 return variant->locker->HasEqualKeys(*locker);
334 });
335 if (it == locker_variants.end()) {
336 locker_variant = &locker_variants.emplace_back();
337 *locker_variant = std::make_unique<LockerVariant>();
338 locker_variant->get()->locker = std::move(locker);
339 } else {
340 locker_variant = &*it;
341 }
342 locker_variant->get()->programs.emplace(usage.variant, pair->second);
343 }
344} 228}
345 229
346Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 230Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
347 Maxwell::ShaderProgram program_type, ProgramCode code, 231 Maxwell::ShaderProgram program_type, ProgramCode code,
348 ProgramCode code_b) { 232 ProgramCode code_b) {
349 const auto shader_type = GetShaderType(program_type); 233 const auto shader_type = GetShaderType(program_type);
350 params.disk_cache.SaveRaw( 234 const std::size_t size_in_bytes = code.size() * sizeof(u64);
351 ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
352 235
353 ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); 236 auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
354 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); 237 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
355 // TODO(Rodrigo): Handle VertexA shaders 238 // TODO(Rodrigo): Handle VertexA shaders
356 // std::optional<ShaderIR> ir_b; 239 // std::optional<ShaderIR> ir_b;
357 // if (!code_b.empty()) { 240 // if (!code_b.empty()) {
358 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); 241 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
359 // } 242 // }
360 return std::shared_ptr<CachedShader>(new CachedShader( 243 auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
361 params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); 244
245 ShaderDiskCacheEntry entry;
246 entry.type = shader_type;
247 entry.code = std::move(code);
248 entry.code_b = std::move(code_b);
249 entry.unique_identifier = params.unique_identifier;
250 entry.bound_buffer = registry->GetBoundBuffer();
251 entry.graphics_info = registry->GetGraphicsInfo();
252 entry.keys = registry->GetKeys();
253 entry.bound_samplers = registry->GetBoundSamplers();
254 entry.bindless_samplers = registry->GetBindlessSamplers();
255 params.disk_cache.SaveEntry(std::move(entry));
256
257 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
258 size_in_bytes, std::move(registry),
259 MakeEntries(ir), std::move(program)));
362} 260}
363 261
364Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 262Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
365 params.disk_cache.SaveRaw( 263 const std::size_t size_in_bytes = code.size() * sizeof(u64);
366 ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); 264
367 265 auto& engine = params.system.GPU().KeplerCompute();
368 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, 266 auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
369 params.system.GPU().KeplerCompute()); 267 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
370 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); 268 const u64 uid = params.unique_identifier;
371 return std::shared_ptr<CachedShader>(new CachedShader( 269 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
372 params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); 270
271 ShaderDiskCacheEntry entry;
272 entry.type = ShaderType::Compute;
273 entry.code = std::move(code);
274 entry.unique_identifier = uid;
275 entry.bound_buffer = registry->GetBoundBuffer();
276 entry.compute_info = registry->GetComputeInfo();
277 entry.keys = registry->GetKeys();
278 entry.bound_samplers = registry->GetBoundSamplers();
279 entry.bindless_samplers = registry->GetBindlessSamplers();
280 params.disk_cache.SaveEntry(std::move(entry));
281
282 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
283 size_in_bytes, std::move(registry),
284 MakeEntries(ir), std::move(program)));
373} 285}
374 286
375Shader CachedShader::CreateFromCache(const ShaderParameters& params, 287Shader CachedShader::CreateFromCache(const ShaderParameters& params,
376 const UnspecializedShader& unspecialized) { 288 const PrecompiledShader& precompiled_shader,
377 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, 289 std::size_t size_in_bytes) {
378 unspecialized.entries, unspecialized.code, 290 return std::shared_ptr<CachedShader>(new CachedShader(
379 unspecialized.code_b)); 291 params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
380} 292 precompiled_shader.entries, precompiled_shader.program));
381
382GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
383 EnsureValidLockerVariant();
384
385 const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
386 auto& program = entry->second;
387 if (!is_cache_miss) {
388 return program->handle;
389 }
390
391 program = BuildShader(device, unique_identifier, shader_type, code, code_b,
392 *curr_locker_variant->locker, variant);
393 disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
394
395 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
396 return program->handle;
397}
398
399bool CachedShader::EnsureValidLockerVariant() {
400 const auto previous_variant = curr_locker_variant;
401 if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
402 curr_locker_variant = nullptr;
403 }
404 if (!curr_locker_variant) {
405 for (auto& variant : locker_variants) {
406 if (variant->locker->IsConsistent()) {
407 curr_locker_variant = variant.get();
408 }
409 }
410 }
411 if (!curr_locker_variant) {
412 auto& new_variant = locker_variants.emplace_back();
413 new_variant = std::make_unique<LockerVariant>();
414 new_variant->locker = MakeLocker(system, shader_type);
415 curr_locker_variant = new_variant.get();
416 }
417 return previous_variant == curr_locker_variant;
418}
419
420ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
421 const ConstBufferLocker& locker) const {
422 return ShaderDiskCacheUsage{unique_identifier, variant,
423 locker.GetBoundBuffer(), locker.GetKeys(),
424 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
425} 293}
426 294
427ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 295ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -431,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System&
431 299
432void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, 300void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
433 const VideoCore::DiskResourceLoadCallback& callback) { 301 const VideoCore::DiskResourceLoadCallback& callback) {
434 const auto transferable = disk_cache.LoadTransferable(); 302 const std::optional transferable = disk_cache.LoadTransferable();
435 if (!transferable) { 303 if (!transferable) {
436 return; 304 return;
437 } 305 }
438 const auto [raws, shader_usages] = *transferable;
439 if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
440 return;
441 }
442 306
443 const auto dumps = disk_cache.LoadPrecompiled(); 307 const std::vector gl_cache = disk_cache.LoadPrecompiled();
444 const auto supported_formats = GetSupportedFormats(); 308 const auto supported_formats = GetSupportedFormats();
445 309
446 // Track if precompiled cache was altered during loading to know if we have to 310 // Track if precompiled cache was altered during loading to know if we have to
@@ -449,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
449 313
450 // Inform the frontend about shader build initialization 314 // Inform the frontend about shader build initialization
451 if (callback) { 315 if (callback) {
452 callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); 316 callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
453 } 317 }
454 318
455 std::mutex mutex; 319 std::mutex mutex;
456 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex 320 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
457 std::atomic_bool compilation_failed = false; 321 std::atomic_bool gl_cache_failed = false;
458 322
459 const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, 323 const auto find_precompiled = [&gl_cache](u64 id) {
460 std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, 324 return std::find_if(gl_cache.begin(), gl_cache.end(),
461 const ShaderDumpsMap& dumps) { 325 [id](const auto& entry) { return entry.unique_identifier == id; });
326 };
327
328 const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
329 std::size_t end) {
462 context->MakeCurrent(); 330 context->MakeCurrent();
463 SCOPE_EXIT({ return context->DoneCurrent(); }); 331 SCOPE_EXIT({ return context->DoneCurrent(); });
464 332
465 for (std::size_t i = begin; i < end; ++i) { 333 for (std::size_t i = begin; i < end; ++i) {
466 if (stop_loading || compilation_failed) { 334 if (stop_loading) {
467 return; 335 return;
468 } 336 }
469 const auto& usage{shader_usages[i]}; 337 const auto& entry = (*transferable)[i];
470 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; 338 const u64 uid = entry.unique_identifier;
471 const auto dump{dumps.find(usage)}; 339 const auto it = find_precompiled(uid);
472 340 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
473 CachedProgram shader; 341
474 if (dump != dumps.end()) { 342 const bool is_compute = entry.type == ShaderType::Compute;
475 // If the shader is dumped, attempt to load it with 343 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
476 shader = GeneratePrecompiledProgram(dump->second, supported_formats); 344 auto registry = MakeRegistry(entry);
477 if (!shader) { 345 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
478 compilation_failed = true; 346
479 return; 347 std::shared_ptr<OGLProgram> program;
348 if (precompiled_entry) {
349 // If the shader is precompiled, attempt to load it with
350 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
351 if (!program) {
352 gl_cache_failed = true;
480 } 353 }
481 } 354 }
482 if (!shader) { 355 if (!program) {
483 auto locker{MakeLocker(system, unspecialized.type)}; 356 // Otherwise compile it from GLSL
484 FillLocker(*locker, usage); 357 program = BuildShader(device, entry.type, uid, ir, *registry, true);
485
486 shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
487 unspecialized.code, unspecialized.code_b, *locker,
488 usage.variant, true);
489 } 358 }
490 359
360 PrecompiledShader shader;
361 shader.program = std::move(program);
362 shader.registry = std::move(registry);
363 shader.entries = MakeEntries(ir);
364
491 std::scoped_lock lock{mutex}; 365 std::scoped_lock lock{mutex};
492 if (callback) { 366 if (callback) {
493 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, 367 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
494 shader_usages.size()); 368 transferable->size());
495 } 369 }
496 370 runtime_cache.emplace(entry.unique_identifier, std::move(shader));
497 precompiled_programs.emplace(usage, std::move(shader));
498
499 // TODO(Rodrigo): Is there a better way to do this?
500 precompiled_variants[usage.unique_identifier].push_back(
501 precompiled_programs.find(usage));
502 } 371 }
503 }; 372 };
504 373
505 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; 374 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
506 const std::size_t bucket_size{shader_usages.size() / num_workers}; 375 const std::size_t bucket_size{transferable->size() / num_workers};
507 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); 376 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
508 std::vector<std::thread> threads(num_workers); 377 std::vector<std::thread> threads(num_workers);
509 for (std::size_t i = 0; i < num_workers; ++i) { 378 for (std::size_t i = 0; i < num_workers; ++i) {
510 const bool is_last_worker = i + 1 == num_workers; 379 const bool is_last_worker = i + 1 == num_workers;
511 const std::size_t start{bucket_size * i}; 380 const std::size_t start{bucket_size * i};
512 const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; 381 const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
513 382
514 // On some platforms the shared context has to be created from the GUI thread 383 // On some platforms the shared context has to be created from the GUI thread
515 contexts[i] = emu_window.CreateSharedContext(); 384 contexts[i] = emu_window.CreateSharedContext();
516 threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); 385 threads[i] = std::thread(worker, contexts[i].get(), start, end);
517 } 386 }
518 for (auto& thread : threads) { 387 for (auto& thread : threads) {
519 thread.join(); 388 thread.join();
520 } 389 }
521 390
522 if (compilation_failed) { 391 if (gl_cache_failed) {
523 // Invalidate the precompiled cache if a shader dumped shader was rejected 392 // Invalidate the precompiled cache if a shader dumped shader was rejected
524 disk_cache.InvalidatePrecompiled(); 393 disk_cache.InvalidatePrecompiled();
525 precompiled_cache_altered = true; 394 precompiled_cache_altered = true;
@@ -532,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
532 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw 401 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
533 // before precompiling them 402 // before precompiling them
534 403
535 for (std::size_t i = 0; i < shader_usages.size(); ++i) { 404 for (std::size_t i = 0; i < transferable->size(); ++i) {
536 const auto& usage{shader_usages[i]}; 405 const u64 id = (*transferable)[i].unique_identifier;
537 if (dumps.find(usage) == dumps.end()) { 406 const auto it = find_precompiled(id);
538 const auto& program{precompiled_programs.at(usage)}; 407 if (it == gl_cache.end()) {
539 disk_cache.SaveDump(usage, program->handle); 408 const GLuint program = runtime_cache.at(id).program->handle;
409 disk_cache.SavePrecompiled(id, program);
540 precompiled_cache_altered = true; 410 precompiled_cache_altered = true;
541 } 411 }
542 } 412 }
@@ -546,84 +416,33 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
546 } 416 }
547} 417}
548 418
549const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { 419std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
550 const auto it = precompiled_variants.find(unique_identifier); 420 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
551 return it == precompiled_variants.end() ? nullptr : &it->second; 421 const std::unordered_set<GLenum>& supported_formats) {
552} 422 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
553 423 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
554CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
555 const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
556 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
557 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
558 return {}; 424 return {};
559 } 425 }
560 426
561 CachedProgram shader = std::make_shared<OGLProgram>(); 427 auto program = std::make_shared<OGLProgram>();
562 shader->handle = glCreateProgram(); 428 program->handle = glCreateProgram();
563 glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); 429 glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
564 glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), 430 glProgramBinary(program->handle, precompiled_entry.binary_format,
565 static_cast<GLsizei>(dump.binary.size())); 431 precompiled_entry.binary.data(),
566 432 static_cast<GLsizei>(precompiled_entry.binary.size()));
567 GLint link_status{}; 433
568 glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); 434 GLint link_status;
435 glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
569 if (link_status == GL_FALSE) { 436 if (link_status == GL_FALSE) {
570 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); 437 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
571 return {}; 438 return {};
572 } 439 }
573 440
574 return shader; 441 return program;
575}
576
577bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
578 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
579 const std::vector<ShaderDiskCacheRaw>& raws) {
580 if (callback) {
581 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
582 }
583
584 for (std::size_t i = 0; i < raws.size(); ++i) {
585 if (stop_loading) {
586 return false;
587 }
588 const auto& raw{raws[i]};
589 const u64 unique_identifier{raw.GetUniqueIdentifier()};
590 const u64 calculated_hash{
591 GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
592 if (unique_identifier != calculated_hash) {
593 LOG_ERROR(Render_OpenGL,
594 "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
595 "removing shader cache",
596 raw.GetUniqueIdentifier(), calculated_hash);
597 disk_cache.InvalidateTransferable();
598 return false;
599 }
600
601 const u32 main_offset =
602 raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
603 ConstBufferLocker locker(raw.GetType());
604 const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
605 // TODO(Rodrigo): Handle VertexA shaders
606 // std::optional<ShaderIR> ir_b;
607 // if (raw.HasProgramA()) {
608 // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
609 // }
610
611 UnspecializedShader unspecialized;
612 unspecialized.entries = GLShader::GetEntries(ir);
613 unspecialized.type = raw.GetType();
614 unspecialized.code = raw.GetCode();
615 unspecialized.code_b = raw.GetCodeB();
616 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
617
618 if (callback) {
619 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
620 }
621 }
622 return true;
623} 442}
624 443
625Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 444Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
626 if (!system.GPU().Maxwell3D().dirty.shaders) { 445 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
627 return last_shaders[static_cast<std::size_t>(program)]; 446 return last_shaders[static_cast<std::size_t>(program)];
628 } 447 }
629 448
@@ -647,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
647 466
648 const auto unique_identifier = GetUniqueIdentifier( 467 const auto unique_identifier = GetUniqueIdentifier(
649 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 468 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
650 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
651 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; 469 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
652 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 470 const ShaderParameters params{system, disk_cache, device,
653 cpu_addr, host_ptr, unique_identifier}; 471 cpu_addr, host_ptr, unique_identifier};
654 472
655 const auto found = unspecialized_shaders.find(unique_identifier); 473 const auto found = runtime_cache.find(unique_identifier);
656 if (found == unspecialized_shaders.end()) { 474 if (found == runtime_cache.end()) {
657 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), 475 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
658 std::move(code_b)); 476 std::move(code_b));
659 } else { 477 } else {
660 shader = CachedShader::CreateFromCache(params, found->second); 478 const std::size_t size_in_bytes = code.size() * sizeof(u64);
479 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
661 } 480 }
662 Register(shader); 481 Register(shader);
663 482
@@ -672,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
672 return kernel; 491 return kernel;
673 } 492 }
674 493
675 // No kernel found - create a new one 494 // No kernel found, create a new one
676 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 495 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
677 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; 496 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
678 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
679 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 497 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
680 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 498 const ShaderParameters params{system, disk_cache, device,
681 cpu_addr, host_ptr, unique_identifier}; 499 cpu_addr, host_ptr, unique_identifier};
682 500
683 const auto found = unspecialized_shaders.find(unique_identifier); 501 const auto found = runtime_cache.find(unique_identifier);
684 if (found == unspecialized_shaders.end()) { 502 if (found == runtime_cache.end()) {
685 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 503 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
686 } else { 504 } else {
687 kernel = CachedShader::CreateFromCache(params, found->second); 505 const std::size_t size_in_bytes = code.size() * sizeof(u64);
506 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
688 } 507 }
689 508
690 Register(kernel); 509 Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 7b1470db3..4935019fc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -22,7 +22,7 @@
22#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 24#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
25#include "video_core/shader/const_buffer_locker.h" 25#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 26#include "video_core/shader/shader_ir.h"
27 27
28namespace Core { 28namespace Core {
@@ -41,22 +41,17 @@ class RasterizerOpenGL;
41struct UnspecializedShader; 41struct UnspecializedShader;
42 42
43using Shader = std::shared_ptr<CachedShader>; 43using Shader = std::shared_ptr<CachedShader>;
44using CachedProgram = std::shared_ptr<OGLProgram>;
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; 45
47using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; 46struct PrecompiledShader {
48 47 std::shared_ptr<OGLProgram> program;
49struct UnspecializedShader { 48 std::shared_ptr<VideoCommon::Shader::Registry> registry;
50 GLShader::ShaderEntries entries; 49 ShaderEntries entries;
51 Tegra::Engines::ShaderType type;
52 ProgramCode code;
53 ProgramCode code_b;
54}; 50};
55 51
56struct ShaderParameters { 52struct ShaderParameters {
57 Core::System& system; 53 Core::System& system;
58 ShaderDiskCacheOpenGL& disk_cache; 54 ShaderDiskCacheOpenGL& disk_cache;
59 const PrecompiledVariants* precompiled_variants;
60 const Device& device; 55 const Device& device;
61 VAddr cpu_addr; 56 VAddr cpu_addr;
62 u8* host_ptr; 57 u8* host_ptr;
@@ -65,61 +60,45 @@ struct ShaderParameters {
65 60
66class CachedShader final : public RasterizerCacheObject { 61class CachedShader final : public RasterizerCacheObject {
67public: 62public:
68 static Shader CreateStageFromMemory(const ShaderParameters& params, 63 ~CachedShader();
69 Maxwell::ShaderProgram program_type,
70 ProgramCode program_code, ProgramCode program_code_b);
71 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
72 64
73 static Shader CreateFromCache(const ShaderParameters& params, 65 /// Gets the GL program handle for the shader
74 const UnspecializedShader& unspecialized); 66 GLuint GetHandle() const;
75 67
68 /// Returns the guest CPU address of the shader
76 VAddr GetCpuAddr() const override { 69 VAddr GetCpuAddr() const override {
77 return cpu_addr; 70 return cpu_addr;
78 } 71 }
79 72
73 /// Returns the size in bytes of the shader
80 std::size_t GetSizeInBytes() const override { 74 std::size_t GetSizeInBytes() const override {
81 return code.size() * sizeof(u64); 75 return size_in_bytes;
82 } 76 }
83 77
84 /// Gets the shader entries for the shader 78 /// Gets the shader entries for the shader
85 const GLShader::ShaderEntries& GetShaderEntries() const { 79 const ShaderEntries& GetEntries() const {
86 return entries; 80 return entries;
87 } 81 }
88 82
89 /// Gets the GL program handle for the shader 83 static Shader CreateStageFromMemory(const ShaderParameters& params,
90 GLuint GetHandle(const ProgramVariant& variant); 84 Maxwell::ShaderProgram program_type,
91 85 ProgramCode program_code, ProgramCode program_code_b);
92private: 86 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
93 struct LockerVariant {
94 std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
95 std::unordered_map<ProgramVariant, CachedProgram> programs;
96 };
97
98 explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
99 GLShader::ShaderEntries entries, ProgramCode program_code,
100 ProgramCode program_code_b);
101
102 bool EnsureValidLockerVariant();
103
104 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
105 const VideoCommon::Shader::ConstBufferLocker& locker) const;
106
107 Core::System& system;
108 ShaderDiskCacheOpenGL& disk_cache;
109 const Device& device;
110
111 VAddr cpu_addr{};
112
113 u64 unique_identifier{};
114 Tegra::Engines::ShaderType shader_type{};
115
116 GLShader::ShaderEntries entries;
117 87
118 ProgramCode code; 88 static Shader CreateFromCache(const ShaderParameters& params,
119 ProgramCode code_b; 89 const PrecompiledShader& precompiled_shader,
90 std::size_t size_in_bytes);
120 91
121 LockerVariant* curr_locker_variant = nullptr; 92private:
122 std::vector<std::unique_ptr<LockerVariant>> locker_variants; 93 explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
94 std::shared_ptr<VideoCommon::Shader::Registry> registry,
95 ShaderEntries entries, std::shared_ptr<OGLProgram> program);
96
97 std::shared_ptr<VideoCommon::Shader::Registry> registry;
98 ShaderEntries entries;
99 VAddr cpu_addr = 0;
100 std::size_t size_in_bytes = 0;
101 std::shared_ptr<OGLProgram> program;
123}; 102};
124 103
125class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 104class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -142,25 +121,15 @@ protected:
142 void FlushObjectInner(const Shader& object) override {} 121 void FlushObjectInner(const Shader& object) override {}
143 122
144private: 123private:
145 bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, 124 std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
146 const VideoCore::DiskResourceLoadCallback& callback, 125 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
147 const std::vector<ShaderDiskCacheRaw>& raws); 126 const std::unordered_set<GLenum>& supported_formats);
148
149 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
150 const std::unordered_set<GLenum>& supported_formats);
151
152 const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
153 127
154 Core::System& system; 128 Core::System& system;
155 Core::Frontend::EmuWindow& emu_window; 129 Core::Frontend::EmuWindow& emu_window;
156 const Device& device; 130 const Device& device;
157
158 ShaderDiskCacheOpenGL disk_cache; 131 ShaderDiskCacheOpenGL disk_cache;
159 132 std::unordered_map<u64, PrecompiledShader> runtime_cache;
160 PrecompiledPrograms precompiled_programs;
161 std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
162
163 std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
164 133
165 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 134 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
166}; 135};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4735000b5..2c38f57fd 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,8 +23,9 @@
23#include "video_core/shader/ast.h" 23#include "video_core/shader/ast.h"
24#include "video_core/shader/node.h" 24#include "video_core/shader/node.h"
25#include "video_core/shader/shader_ir.h" 25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader/transform_feedback.h"
26 27
27namespace OpenGL::GLShader { 28namespace OpenGL {
28 29
29namespace { 30namespace {
30 31
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
36using Tegra::Shader::IpaMode; 37using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 38using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry;
39 42
40using namespace std::string_literals; 43using namespace std::string_literals;
41using namespace VideoCommon::Shader; 44using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
48 51
49enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; 52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
50 53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
51struct TextureOffset {}; 59struct TextureOffset {};
52struct TextureDerivates {}; 60struct TextureDerivates {};
53using TextureArgument = std::pair<Type, Node>; 61using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
56constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 64constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
57 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); 65 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
58 66
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint
69#define itof intBitsToFloat
70#define utof uintBitsToFloat
71
72bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
73 bvec2 is_nan1 = isnan(pair1);
74 bvec2 is_nan2 = isnan(pair2);
75 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
76}}
77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80
81layout (std140, binding = {}) uniform vs_config {{
82 float y_direction;
83}};
84)";
85
59class ShaderWriter final { 86class ShaderWriter final {
60public: 87public:
61 void AddExpression(std::string_view text) { 88 void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
269 } 296 }
270} 297}
271 298
299/// Describes primitive behavior on geometry shaders
300std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
301 switch (topology) {
302 case Maxwell::PrimitiveTopology::Points:
303 return {"points", 1};
304 case Maxwell::PrimitiveTopology::Lines:
305 case Maxwell::PrimitiveTopology::LineStrip:
306 return {"lines", 2};
307 case Maxwell::PrimitiveTopology::LinesAdjacency:
308 case Maxwell::PrimitiveTopology::LineStripAdjacency:
309 return {"lines_adjacency", 4};
310 case Maxwell::PrimitiveTopology::Triangles:
311 case Maxwell::PrimitiveTopology::TriangleStrip:
312 case Maxwell::PrimitiveTopology::TriangleFan:
313 return {"triangles", 3};
314 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
315 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
316 return {"triangles_adjacency", 6};
317 default:
318 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
319 return {"points", 1};
320 }
321}
322
272/// Generates code to use for a swizzle operation. 323/// Generates code to use for a swizzle operation.
273constexpr const char* GetSwizzle(u32 element) { 324constexpr const char* GetSwizzle(std::size_t element) {
274 constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; 325 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
275 return swizzle.at(element); 326 return swizzle.at(element);
276} 327}
277 328
329constexpr const char* GetColorSwizzle(std::size_t element) {
330 constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
331 return swizzle.at(element);
332}
333
278/// Translate topology 334/// Translate topology
279std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { 335std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
280 switch (topology) { 336 switch (topology) {
@@ -337,15 +393,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
337 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 393 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
338} 394}
339 395
340[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { 396struct GenericVaryingDescription {
341 return stage == ShaderType::Vertex; 397 std::string name;
342} 398 u8 first_element = 0;
399 bool is_scalar = false;
400};
343 401
344class GLSLDecompiler final { 402class GLSLDecompiler final {
345public: 403public:
346 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, 404 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
347 std::string suffix) 405 ShaderType stage, std::string_view identifier, std::string_view suffix)
348 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 406 : device{device}, ir{ir}, registry{registry}, stage{stage},
407 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
408 if (stage != ShaderType::Compute) {
409 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
410 }
411 }
412
413 void Decompile() {
414 DeclareHeader();
415 DeclareVertex();
416 DeclareGeometry();
417 DeclareFragment();
418 DeclareCompute();
419 DeclareInputAttributes();
420 DeclareOutputAttributes();
421 DeclareImages();
422 DeclareSamplers();
423 DeclareGlobalMemory();
424 DeclareConstantBuffers();
425 DeclareLocalMemory();
426 DeclareRegisters();
427 DeclarePredicates();
428 DeclareInternalFlags();
429 DeclareCustomVariables();
430 DeclarePhysicalAttributeReader();
431
432 code.AddLine("void main() {{");
433 ++code.scope;
434
435 if (stage == ShaderType::Vertex) {
436 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
437 }
438
439 if (ir.IsDecompiled()) {
440 DecompileAST();
441 } else {
442 DecompileBranchMode();
443 }
444
445 --code.scope;
446 code.AddLine("}}");
447 }
448
449 std::string GetResult() {
450 return code.GetResult();
451 }
452
453private:
454 friend class ASTDecompiler;
455 friend class ExprDecompiler;
349 456
350 void DecompileBranchMode() { 457 void DecompileBranchMode() {
351 // VM's program counter 458 // VM's program counter
@@ -387,46 +494,40 @@ public:
387 494
388 void DecompileAST(); 495 void DecompileAST();
389 496
390 void Decompile() { 497 void DeclareHeader() {
391 DeclareVertex(); 498 if (!identifier.empty()) {
392 DeclareGeometry(); 499 code.AddLine("// {}", identifier);
393 DeclareRegisters(); 500 }
394 DeclareCustomVariables(); 501 code.AddLine("#version 440 core");
395 DeclarePredicates(); 502 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
396 DeclareLocalMemory(); 503 if (device.HasShaderBallot()) {
397 DeclareInternalFlags(); 504 code.AddLine("#extension GL_ARB_shader_ballot : require");
398 DeclareInputAttributes(); 505 }
399 DeclareOutputAttributes(); 506 if (device.HasVertexViewportLayer()) {
400 DeclareConstantBuffers(); 507 code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
401 DeclareGlobalMemory();
402 DeclareSamplers();
403 DeclareImages();
404 DeclarePhysicalAttributeReader();
405
406 code.AddLine("void execute_{}() {{", suffix);
407 ++code.scope;
408
409 if (ir.IsDecompiled()) {
410 DecompileAST();
411 } else {
412 DecompileBranchMode();
413 } 508 }
509 if (device.HasImageLoadFormatted()) {
510 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
511 }
512 if (device.HasWarpIntrinsics()) {
513 code.AddLine("#extension GL_NV_gpu_shader5 : require");
514 code.AddLine("#extension GL_NV_shader_thread_group : require");
515 code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
516 }
517 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
518 // operations) on places where we don't want to.
519 // Thanks to Ryujinx for finding this workaround.
520 code.AddLine("#pragma optionNV(fastmath off)");
414 521
415 --code.scope; 522 code.AddNewLine();
416 code.AddLine("}}");
417 }
418 523
419 std::string GetResult() { 524 code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
420 return code.GetResult();
421 } 525 }
422 526
423private:
424 friend class ASTDecompiler;
425 friend class ExprDecompiler;
426
427 void DeclareVertex() { 527 void DeclareVertex() {
428 if (!IsVertexShader(stage)) 528 if (stage != ShaderType::Vertex) {
429 return; 529 return;
530 }
430 531
431 DeclareVertexRedeclarations(); 532 DeclareVertexRedeclarations();
432 } 533 }
@@ -436,9 +537,15 @@ private:
436 return; 537 return;
437 } 538 }
438 539
540 const auto& info = registry.GetGraphicsInfo();
541 const auto input_topology = info.primitive_topology;
542 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
543 max_input_vertices = max_vertices;
544 code.AddLine("layout ({}) in;", glsl_topology);
545
439 const auto topology = GetTopologyName(header.common3.output_topology); 546 const auto topology = GetTopologyName(header.common3.output_topology);
440 const auto max_vertices = header.common4.max_output_vertices.Value(); 547 const auto max_output_vertices = header.common4.max_output_vertices.Value();
441 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); 548 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
442 code.AddNewLine(); 549 code.AddNewLine();
443 550
444 code.AddLine("in gl_PerVertex {{"); 551 code.AddLine("in gl_PerVertex {{");
@@ -450,11 +557,40 @@ private:
450 DeclareVertexRedeclarations(); 557 DeclareVertexRedeclarations();
451 } 558 }
452 559
560 void DeclareFragment() {
561 if (stage != ShaderType::Fragment) {
562 return;
563 }
564 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
565 code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
566 }
567 }
568
569 void DeclareCompute() {
570 if (stage != ShaderType::Compute) {
571 return;
572 }
573 const auto& info = registry.GetComputeInfo();
574 if (const u32 size = info.shared_memory_size_in_words; size > 0) {
575 code.AddLine("shared uint smem[{}];", size);
576 code.AddNewLine();
577 }
578 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
579 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
580 code.AddNewLine();
581 }
582
453 void DeclareVertexRedeclarations() { 583 void DeclareVertexRedeclarations() {
454 code.AddLine("out gl_PerVertex {{"); 584 code.AddLine("out gl_PerVertex {{");
455 ++code.scope; 585 ++code.scope;
456 586
457 code.AddLine("vec4 gl_Position;"); 587 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
588 if (!pos_xfb.empty()) {
589 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
590 }
591 const char* pos_type =
592 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
593 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
458 594
459 for (const auto attribute : ir.GetOutputAttributes()) { 595 for (const auto attribute : ir.GetOutputAttributes()) {
460 if (attribute == Attribute::Index::ClipDistances0123 || 596 if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -463,14 +599,14 @@ private:
463 break; 599 break;
464 } 600 }
465 } 601 }
466 if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { 602 if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
467 if (ir.UsesLayer()) { 603 if (ir.UsesLayer()) {
468 code.AddLine("int gl_Layer;"); 604 code.AddLine("int gl_Layer;");
469 } 605 }
470 if (ir.UsesViewportIndex()) { 606 if (ir.UsesViewportIndex()) {
471 code.AddLine("int gl_ViewportIndex;"); 607 code.AddLine("int gl_ViewportIndex;");
472 } 608 }
473 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && 609 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
474 !device.HasVertexViewportLayer()) { 610 !device.HasVertexViewportLayer()) {
475 LOG_ERROR( 611 LOG_ERROR(
476 Render_OpenGL, 612 Render_OpenGL,
@@ -525,18 +661,16 @@ private:
525 } 661 }
526 662
527 void DeclareLocalMemory() { 663 void DeclareLocalMemory() {
664 u64 local_memory_size = 0;
528 if (stage == ShaderType::Compute) { 665 if (stage == ShaderType::Compute) {
529 code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); 666 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
530 code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); 667 } else {
531 code.AddLine("#endif"); 668 local_memory_size = header.GetLocalMemorySize();
532 return;
533 } 669 }
534
535 const u64 local_memory_size = header.GetLocalMemorySize();
536 if (local_memory_size == 0) { 670 if (local_memory_size == 0) {
537 return; 671 return;
538 } 672 }
539 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 673 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
540 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); 674 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
541 code.AddNewLine(); 675 code.AddNewLine();
542 } 676 }
@@ -589,7 +723,7 @@ private:
589 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { 723 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
590 const u32 location{GetGenericAttributeIndex(index)}; 724 const u32 location{GetGenericAttributeIndex(index)};
591 725
592 std::string name{GetInputAttribute(index)}; 726 std::string name{GetGenericInputAttribute(index)};
593 if (stage == ShaderType::Geometry) { 727 if (stage == ShaderType::Geometry) {
594 name = "gs_" + name + "[]"; 728 name = "gs_" + name + "[]";
595 } 729 }
@@ -626,9 +760,59 @@ private:
626 } 760 }
627 } 761 }
628 762
763 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
764 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
765 const auto it = transform_feedback.find(location);
766 if (it == transform_feedback.end()) {
767 return {};
768 }
769 return it->second.components;
770 }
771
772 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
773 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
774 const auto it = transform_feedback.find(location);
775 if (it == transform_feedback.end()) {
776 return {};
777 }
778
779 const VaryingTFB& tfb = it->second;
780 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
781 tfb.offset, tfb.stride);
782 }
783
629 void DeclareOutputAttribute(Attribute::Index index) { 784 void DeclareOutputAttribute(Attribute::Index index) {
630 const u32 location{GetGenericAttributeIndex(index)}; 785 static constexpr std::string_view swizzle = "xyzw";
631 code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); 786 u8 element = 0;
787 while (element < 4) {
788 auto xfb = GetTransformFeedbackDecoration(index, element);
789 if (!xfb.empty()) {
790 xfb = fmt::format(", {}", xfb);
791 }
792 const std::size_t remainder = 4 - element;
793 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
794 const char* const type = FLOAT_TYPES.at(num_components - 1);
795
796 const u32 location = GetGenericAttributeIndex(index);
797
798 GenericVaryingDescription description;
799 description.first_element = static_cast<u8>(element);
800 description.is_scalar = num_components == 1;
801 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
802 if (element != 0 || num_components != 4) {
803 const std::string_view name_swizzle = swizzle.substr(element, num_components);
804 description.name = fmt::format("{}_{}", description.name, name_swizzle);
805 }
806 for (std::size_t i = 0; i < num_components; ++i) {
807 const u8 offset = static_cast<u8>(location * 4 + element + i);
808 varying_description.insert({offset, description});
809 }
810
811 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
812 xfb, type, description.name);
813
814 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
815 }
632 } 816 }
633 817
634 void DeclareConstantBuffers() { 818 void DeclareConstantBuffers() {
@@ -925,7 +1109,8 @@ private:
925 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 1109 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
926 // set an 0x80000000 index for those and the shader fails to build. Find out why 1110 // set an 0x80000000 index for those and the shader fails to build. Find out why
927 // this happens and what's its intent. 1111 // this happens and what's its intent.
928 return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); 1112 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1113 max_input_vertices.value());
929 } 1114 }
930 return std::string(name); 1115 return std::string(name);
931 }; 1116 };
@@ -959,7 +1144,7 @@ private:
959 // TODO(Subv): Find out what the values are for the first two elements when inside a 1144 // TODO(Subv): Find out what the values are for the first two elements when inside a
960 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 1145 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
961 // shader. 1146 // shader.
962 ASSERT(IsVertexShader(stage)); 1147 ASSERT(stage == ShaderType::Vertex);
963 switch (element) { 1148 switch (element) {
964 case 2: 1149 case 2:
965 // Config pack's first value is instance_id. 1150 // Config pack's first value is instance_id.
@@ -980,7 +1165,7 @@ private:
980 return {"0", Type::Int}; 1165 return {"0", Type::Int};
981 default: 1166 default:
982 if (IsGenericAttribute(attribute)) { 1167 if (IsGenericAttribute(attribute)) {
983 return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), 1168 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
984 Type::Float}; 1169 Type::Float};
985 } 1170 }
986 break; 1171 break;
@@ -1030,12 +1215,12 @@ private:
1030 UNIMPLEMENTED(); 1215 UNIMPLEMENTED();
1031 return {}; 1216 return {};
1032 case 1: 1217 case 1:
1033 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { 1218 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1034 return {}; 1219 return {};
1035 } 1220 }
1036 return {{"gl_Layer", Type::Int}}; 1221 return {{"gl_Layer", Type::Int}};
1037 case 2: 1222 case 2:
1038 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { 1223 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1039 return {}; 1224 return {};
1040 } 1225 }
1041 return {{"gl_ViewportIndex", Type::Int}}; 1226 return {{"gl_ViewportIndex", Type::Int}};
@@ -1049,8 +1234,7 @@ private:
1049 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; 1234 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
1050 default: 1235 default:
1051 if (IsGenericAttribute(attribute)) { 1236 if (IsGenericAttribute(attribute)) {
1052 return { 1237 return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
1053 {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
1054 } 1238 }
1055 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); 1239 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
1056 return {}; 1240 return {};
@@ -1822,16 +2006,19 @@ private:
1822 expr += GetSampler(meta->sampler); 2006 expr += GetSampler(meta->sampler);
1823 expr += ", "; 2007 expr += ", ";
1824 2008
1825 expr += constructors.at(operation.GetOperandsCount() - 1); 2009 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
1826 expr += '('; 2010 expr += '(';
1827 for (std::size_t i = 0; i < count; ++i) { 2011 for (std::size_t i = 0; i < count; ++i) {
1828 expr += VisitOperand(operation, i).AsInt(); 2012 if (i > 0) {
1829 const std::size_t next = i + 1;
1830 if (next == count)
1831 expr += ')';
1832 else if (next < count)
1833 expr += ", "; 2013 expr += ", ";
2014 }
2015 expr += VisitOperand(operation, i).AsInt();
2016 }
2017 if (meta->array) {
2018 expr += ", ";
2019 expr += Visit(meta->array).AsInt();
1834 } 2020 }
2021 expr += ')';
1835 2022
1836 if (meta->lod && !meta->sampler.IsBuffer()) { 2023 if (meta->lod && !meta->sampler.IsBuffer()) {
1837 expr += ", "; 2024 expr += ", ";
@@ -1945,7 +2132,7 @@ private:
1945 // TODO(Subv): Figure out how dual-source blending is configured in the Switch. 2132 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1946 for (u32 component = 0; component < 4; ++component) { 2133 for (u32 component = 0; component < 4; ++component) {
1947 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { 2134 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1948 code.AddLine("FragColor{}[{}] = {};", render_target, component, 2135 code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
1949 SafeGetRegister(current_reg).AsFloat()); 2136 SafeGetRegister(current_reg).AsFloat());
1950 ++current_reg; 2137 ++current_reg;
1951 } 2138 }
@@ -2261,27 +2448,34 @@ private:
2261 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2448 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2262 2449
2263 std::string GetRegister(u32 index) const { 2450 std::string GetRegister(u32 index) const {
2264 return GetDeclarationWithSuffix(index, "gpr"); 2451 return AppendSuffix(index, "gpr");
2265 } 2452 }
2266 2453
2267 std::string GetCustomVariable(u32 index) const { 2454 std::string GetCustomVariable(u32 index) const {
2268 return GetDeclarationWithSuffix(index, "custom_var"); 2455 return AppendSuffix(index, "custom_var");
2269 } 2456 }
2270 2457
2271 std::string GetPredicate(Tegra::Shader::Pred pred) const { 2458 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2272 return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); 2459 return AppendSuffix(static_cast<u32>(pred), "pred");
2273 } 2460 }
2274 2461
2275 std::string GetInputAttribute(Attribute::Index attribute) const { 2462 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2276 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); 2463 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2277 } 2464 }
2278 2465
2279 std::string GetOutputAttribute(Attribute::Index attribute) const { 2466 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2280 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); 2467
2468 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2469 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2470 const auto& description = varying_description.at(offset);
2471 if (description.is_scalar) {
2472 return description.name;
2473 }
2474 return fmt::format("{}[{}]", description.name, element - description.first_element);
2281 } 2475 }
2282 2476
2283 std::string GetConstBuffer(u32 index) const { 2477 std::string GetConstBuffer(u32 index) const {
2284 return GetDeclarationWithSuffix(index, "cbuf"); 2478 return AppendSuffix(index, "cbuf");
2285 } 2479 }
2286 2480
2287 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { 2481 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2488,15 @@ private:
2294 } 2488 }
2295 2489
2296 std::string GetConstBufferBlock(u32 index) const { 2490 std::string GetConstBufferBlock(u32 index) const {
2297 return GetDeclarationWithSuffix(index, "cbuf_block"); 2491 return AppendSuffix(index, "cbuf_block");
2298 } 2492 }
2299 2493
2300 std::string GetLocalMemory() const { 2494 std::string GetLocalMemory() const {
2301 return "lmem_" + suffix; 2495 if (suffix.empty()) {
2496 return "lmem";
2497 } else {
2498 return "lmem_" + std::string{suffix};
2499 }
2302 } 2500 }
2303 2501
2304 std::string GetInternalFlag(InternalFlag flag) const { 2502 std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,23 +2505,31 @@ private:
2307 const auto index = static_cast<u32>(flag); 2505 const auto index = static_cast<u32>(flag);
2308 ASSERT(index < static_cast<u32>(InternalFlag::Amount)); 2506 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
2309 2507
2310 return fmt::format("{}_{}", InternalFlagNames[index], suffix); 2508 if (suffix.empty()) {
2509 return InternalFlagNames[index];
2510 } else {
2511 return fmt::format("{}_{}", InternalFlagNames[index], suffix);
2512 }
2311 } 2513 }
2312 2514
2313 std::string GetSampler(const Sampler& sampler) const { 2515 std::string GetSampler(const Sampler& sampler) const {
2314 return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); 2516 return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
2315 } 2517 }
2316 2518
2317 std::string GetImage(const Image& image) const { 2519 std::string GetImage(const Image& image) const {
2318 return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); 2520 return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
2319 } 2521 }
2320 2522
2321 std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { 2523 std::string AppendSuffix(u32 index, std::string_view name) const {
2322 return fmt::format("{}_{}_{}", name, index, suffix); 2524 if (suffix.empty()) {
2525 return fmt::format("{}{}", name, index);
2526 } else {
2527 return fmt::format("{}{}_{}", name, index, suffix);
2528 }
2323 } 2529 }
2324 2530
2325 u32 GetNumPhysicalInputAttributes() const { 2531 u32 GetNumPhysicalInputAttributes() const {
2326 return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); 2532 return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
2327 } 2533 }
2328 2534
2329 u32 GetNumPhysicalAttributes() const { 2535 u32 GetNumPhysicalAttributes() const {
@@ -2334,17 +2540,31 @@ private:
2334 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); 2540 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2335 } 2541 }
2336 2542
2543 bool IsRenderTargetEnabled(u32 render_target) const {
2544 for (u32 component = 0; component < 4; ++component) {
2545 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
2546 return true;
2547 }
2548 }
2549 return false;
2550 }
2551
2337 const Device& device; 2552 const Device& device;
2338 const ShaderIR& ir; 2553 const ShaderIR& ir;
2554 const Registry& registry;
2339 const ShaderType stage; 2555 const ShaderType stage;
2340 const std::string suffix; 2556 const std::string_view identifier;
2557 const std::string_view suffix;
2341 const Header header; 2558 const Header header;
2559 std::unordered_map<u8, VaryingTFB> transform_feedback;
2342 2560
2343 ShaderWriter code; 2561 ShaderWriter code;
2562
2563 std::optional<u32> max_input_vertices;
2344}; 2564};
2345 2565
2346std::string GetFlowVariable(u32 i) { 2566std::string GetFlowVariable(u32 index) {
2347 return fmt::format("flow_var_{}", i); 2567 return fmt::format("flow_var{}", index);
2348} 2568}
2349 2569
2350class ExprDecompiler { 2570class ExprDecompiler {
@@ -2531,7 +2751,7 @@ void GLSLDecompiler::DecompileAST() {
2531 2751
2532} // Anonymous namespace 2752} // Anonymous namespace
2533 2753
2534ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { 2754ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
2535 ShaderEntries entries; 2755 ShaderEntries entries;
2536 for (const auto& cbuf : ir.GetConstantBuffers()) { 2756 for (const auto& cbuf : ir.GetConstantBuffers()) {
2537 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), 2757 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2547,33 +2767,20 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
2547 for (const auto& image : ir.GetImages()) { 2767 for (const auto& image : ir.GetImages()) {
2548 entries.images.emplace_back(image); 2768 entries.images.emplace_back(image);
2549 } 2769 }
2550 entries.clip_distances = ir.GetClipDistances(); 2770 const auto clip_distances = ir.GetClipDistances();
2771 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
2772 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2773 }
2551 entries.shader_length = ir.GetLength(); 2774 entries.shader_length = ir.GetLength();
2552 return entries; 2775 return entries;
2553} 2776}
2554 2777
2555std::string GetCommonDeclarations() { 2778std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2556 return R"(#define ftoi floatBitsToInt 2779 ShaderType stage, std::string_view identifier,
2557#define ftou floatBitsToUint 2780 std::string_view suffix) {
2558#define itof intBitsToFloat 2781 GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
2559#define utof uintBitsToFloat
2560
2561bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2562 bvec2 is_nan1 = isnan(pair1);
2563 bvec2 is_nan2 = isnan(pair2);
2564 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2565}
2566
2567const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
2568const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
2569)";
2570}
2571
2572std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
2573 const std::string& suffix) {
2574 GLSLDecompiler decompiler(device, ir, stage, suffix);
2575 decompiler.Decompile(); 2782 decompiler.Decompile();
2576 return decompiler.GetResult(); 2783 return decompiler.GetResult();
2577} 2784}
2578 2785
2579} // namespace OpenGL::GLShader 2786} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 7876f48d6..e7dbd810c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -6,22 +6,18 @@
6 6
7#include <array> 7#include <array>
8#include <string> 8#include <string>
9#include <string_view>
9#include <utility> 10#include <utility>
10#include <vector> 11#include <vector>
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/shader_type.h" 14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
14#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
15 17
16namespace VideoCommon::Shader {
17class ShaderIR;
18}
19
20namespace OpenGL { 18namespace OpenGL {
21class Device;
22}
23 19
24namespace OpenGL::GLShader { 20class Device;
25 21
26using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
27using SamplerEntry = VideoCommon::Shader::Sampler; 23using SamplerEntry = VideoCommon::Shader::Sampler;
@@ -74,15 +70,15 @@ struct ShaderEntries {
74 std::vector<GlobalMemoryEntry> global_memory_entries; 70 std::vector<GlobalMemoryEntry> global_memory_entries;
75 std::vector<SamplerEntry> samplers; 71 std::vector<SamplerEntry> samplers;
76 std::vector<ImageEntry> images; 72 std::vector<ImageEntry> images;
77 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 73 u32 clip_distances{};
78 std::size_t shader_length{}; 74 std::size_t shader_length{};
79}; 75};
80 76
81ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); 77ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
82
83std::string GetCommonDeclarations();
84 78
85std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 79std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
86 Tegra::Engines::ShaderType stage, const std::string& suffix); 80 const VideoCommon::Shader::Registry& registry,
81 Tegra::Engines::ShaderType stage, std::string_view identifier,
82 std::string_view suffix = {});
87 83
88} // namespace OpenGL::GLShader 84} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 1fc204f6f..9e95a122b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -31,32 +31,24 @@ namespace {
31 31
32using ShaderCacheVersionHash = std::array<u8, 64>; 32using ShaderCacheVersionHash = std::array<u8, 64>;
33 33
34enum class TransferableEntryKind : u32 {
35 Raw,
36 Usage,
37};
38
39struct ConstBufferKey { 34struct ConstBufferKey {
40 u32 cbuf{}; 35 u32 cbuf = 0;
41 u32 offset{}; 36 u32 offset = 0;
42 u32 value{}; 37 u32 value = 0;
43}; 38};
44 39
45struct BoundSamplerKey { 40struct BoundSamplerKey {
46 u32 offset{}; 41 u32 offset = 0;
47 Tegra::Engines::SamplerDescriptor sampler{}; 42 Tegra::Engines::SamplerDescriptor sampler;
48}; 43};
49 44
50struct BindlessSamplerKey { 45struct BindlessSamplerKey {
51 u32 cbuf{}; 46 u32 cbuf = 0;
52 u32 offset{}; 47 u32 offset = 0;
53 Tegra::Engines::SamplerDescriptor sampler{}; 48 Tegra::Engines::SamplerDescriptor sampler;
54}; 49};
55 50
56constexpr u32 NativeVersion = 12; 51constexpr u32 NativeVersion = 20;
57
58// Making sure sizes doesn't change by accident
59static_assert(sizeof(ProgramVariant) == 20);
60 52
61ShaderCacheVersionHash GetShaderCacheVersionHash() { 53ShaderCacheVersionHash GetShaderCacheVersionHash() {
62 ShaderCacheVersionHash hash{}; 54 ShaderCacheVersionHash hash{};
@@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
67 59
68} // Anonymous namespace 60} // Anonymous namespace
69 61
70ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, 62ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
71 ProgramCode code_b)
72 : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
73 code_b)} {}
74 63
75ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; 64ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
76 65
77ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; 66bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
78 67 if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
79bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
80 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
81 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
82 return false; 68 return false;
83 } 69 }
84 u32 code_size{}; 70 u32 code_size;
85 u32 code_size_b{}; 71 u32 code_size_b;
86 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || 72 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
87 file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { 73 file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
88 return false; 74 return false;
89 } 75 }
90
91 code.resize(code_size); 76 code.resize(code_size);
92 code_b.resize(code_size_b); 77 code_b.resize(code_size_b);
93 78
94 if (file.ReadArray(code.data(), code_size) != code_size) 79 if (file.ReadArray(code.data(), code_size) != code_size) {
95 return false; 80 return false;
96 81 }
97 if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { 82 if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
98 return false; 83 return false;
99 } 84 }
85
86 u8 is_texture_handler_size_known;
87 u32 texture_handler_size_value;
88 u32 num_keys;
89 u32 num_bound_samplers;
90 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
96 file.ReadArray(&num_bindless_samplers, 1) != 1) {
97 return false;
98 }
99 if (is_texture_handler_size_known) {
100 texture_handler_size = texture_handler_size_value;
101 }
102
103 std::vector<ConstBufferKey> flat_keys(num_keys);
104 std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
105 std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
106 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
107 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
108 flat_bound_samplers.size() ||
109 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
110 flat_bindless_samplers.size()) {
111 return false;
112 }
113 for (const auto& key : flat_keys) {
114 keys.insert({{key.cbuf, key.offset}, key.value});
115 }
116 for (const auto& key : flat_bound_samplers) {
117 bound_samplers.emplace(key.offset, key.sampler);
118 }
119 for (const auto& key : flat_bindless_samplers) {
120 bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
121 }
122
100 return true; 123 return true;
101} 124}
102 125
103bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { 126bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
104 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || 127 if (file.WriteObject(static_cast<u32>(type)) != 1 ||
105 file.WriteObject(static_cast<u32>(code.size())) != 1 || 128 file.WriteObject(static_cast<u32>(code.size())) != 1 ||
106 file.WriteObject(static_cast<u32>(code_b.size())) != 1) { 129 file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
107 return false; 130 return false;
108 } 131 }
109 132 if (file.WriteArray(code.data(), code.size()) != code.size()) {
110 if (file.WriteArray(code.data(), code.size()) != code.size())
111 return false; 133 return false;
112 134 }
113 if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { 135 if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
114 return false; 136 return false;
115 } 137 }
116 return true; 138
139 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
140 file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
141 file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
143 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
146 return false;
147 }
148
149 std::vector<ConstBufferKey> flat_keys;
150 flat_keys.reserve(keys.size());
151 for (const auto& [address, value] : keys) {
152 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
153 }
154
155 std::vector<BoundSamplerKey> flat_bound_samplers;
156 flat_bound_samplers.reserve(bound_samplers.size());
157 for (const auto& [address, sampler] : bound_samplers) {
158 flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
159 }
160
161 std::vector<BindlessSamplerKey> flat_bindless_samplers;
162 flat_bindless_samplers.reserve(bindless_samplers.size());
163 for (const auto& [address, sampler] : bindless_samplers) {
164 flat_bindless_samplers.push_back(
165 BindlessSamplerKey{address.first, address.second, sampler});
166 }
167
168 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
169 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
170 flat_bound_samplers.size() &&
171 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
172 flat_bindless_samplers.size();
117} 173}
118 174
119ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} 175ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
120 176
121ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; 177ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
122 178
123std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> 179std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
124ShaderDiskCacheOpenGL::LoadTransferable() {
125 // Skip games without title id 180 // Skip games without title id
126 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; 181 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
127 if (!Settings::values.use_disk_shader_cache || !has_title_id) { 182 if (!Settings::values.use_disk_shader_cache || !has_title_id) {
@@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
130 185
131 FileUtil::IOFile file(GetTransferablePath(), "rb"); 186 FileUtil::IOFile file(GetTransferablePath(), "rb");
132 if (!file.IsOpen()) { 187 if (!file.IsOpen()) {
133 LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", 188 LOG_INFO(Render_OpenGL, "No transferable shader cache found");
134 GetTitleID());
135 is_usable = true; 189 is_usable = true;
136 return {}; 190 return {};
137 } 191 }
138 192
139 u32 version{}; 193 u32 version{};
140 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { 194 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
141 LOG_ERROR(Render_OpenGL, 195 LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
142 "Failed to get transferable cache version for title id={}, skipping",
143 GetTitleID());
144 return {}; 196 return {};
145 } 197 }
146 198
@@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
158 } 210 }
159 211
160 // Version is valid, load the shaders 212 // Version is valid, load the shaders
161 constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; 213 std::vector<ShaderDiskCacheEntry> entries;
162 std::vector<ShaderDiskCacheRaw> raws;
163 std::vector<ShaderDiskCacheUsage> usages;
164 while (file.Tell() < file.GetSize()) { 214 while (file.Tell() < file.GetSize()) {
165 TransferableEntryKind kind{}; 215 ShaderDiskCacheEntry& entry = entries.emplace_back();
166 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { 216 if (!entry.Load(file)) {
167 LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); 217 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
168 return {};
169 }
170
171 switch (kind) {
172 case TransferableEntryKind::Raw: {
173 ShaderDiskCacheRaw entry;
174 if (!entry.Load(file)) {
175 LOG_ERROR(Render_OpenGL, error_loading);
176 return {};
177 }
178 transferable.insert({entry.GetUniqueIdentifier(), {}});
179 raws.push_back(std::move(entry));
180 break;
181 }
182 case TransferableEntryKind::Usage: {
183 ShaderDiskCacheUsage usage;
184
185 u32 num_keys{};
186 u32 num_bound_samplers{};
187 u32 num_bindless_samplers{};
188 if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
189 file.ReadArray(&usage.variant, 1) != 1 ||
190 file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
191 file.ReadArray(&num_bound_samplers, 1) != 1 ||
192 file.ReadArray(&num_bindless_samplers, 1) != 1) {
193 LOG_ERROR(Render_OpenGL, error_loading);
194 return {};
195 }
196
197 std::vector<ConstBufferKey> keys(num_keys);
198 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
199 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
200 if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
201 file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
202 bound_samplers.size() ||
203 file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
204 bindless_samplers.size()) {
205 LOG_ERROR(Render_OpenGL, error_loading);
206 return {};
207 }
208 for (const auto& key : keys) {
209 usage.keys.insert({{key.cbuf, key.offset}, key.value});
210 }
211 for (const auto& key : bound_samplers) {
212 usage.bound_samplers.emplace(key.offset, key.sampler);
213 }
214 for (const auto& key : bindless_samplers) {
215 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
216 }
217
218 usages.push_back(std::move(usage));
219 break;
220 }
221 default:
222 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
223 static_cast<u32>(kind));
224 return {}; 218 return {};
225 } 219 }
226 } 220 }
227 221
228 is_usable = true; 222 is_usable = true;
229 return {{std::move(raws), std::move(usages)}}; 223 return {std::move(entries)};
230} 224}
231 225
232std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> 226std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
233ShaderDiskCacheOpenGL::LoadPrecompiled() {
234 if (!is_usable) { 227 if (!is_usable) {
235 return {}; 228 return {};
236 } 229 }
237 230
238 std::string path = GetPrecompiledPath(); 231 FileUtil::IOFile file(GetPrecompiledPath(), "rb");
239 FileUtil::IOFile file(path, "rb");
240 if (!file.IsOpen()) { 232 if (!file.IsOpen()) {
241 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", 233 LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
242 GetTitleID());
243 return {}; 234 return {};
244 } 235 }
245 236
246 const auto result = LoadPrecompiledFile(file); 237 if (const auto result = LoadPrecompiledFile(file)) {
247 if (!result) { 238 return *result;
248 LOG_INFO(Render_OpenGL,
249 "Failed to load precompiled cache for game with title id={}, removing",
250 GetTitleID());
251 file.Close();
252 InvalidatePrecompiled();
253 return {};
254 } 239 }
255 return *result; 240
241 LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
242 file.Close();
243 InvalidatePrecompiled();
244 return {};
256} 245}
257 246
258std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> 247std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
259ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { 248 FileUtil::IOFile& file) {
260 // Read compressed file from disk and decompress to virtual precompiled cache file 249 // Read compressed file from disk and decompress to virtual precompiled cache file
261 std::vector<u8> compressed(file.GetSize()); 250 std::vector<u8> compressed(file.GetSize());
262 file.ReadBytes(compressed.data(), compressed.size()); 251 file.ReadBytes(compressed.data(), compressed.size());
@@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
275 return {}; 264 return {};
276 } 265 }
277 266
278 ShaderDumpsMap dumps; 267 std::vector<ShaderDiskCachePrecompiled> entries;
279 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { 268 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
280 u32 num_keys{}; 269 u32 binary_size;
281 u32 num_bound_samplers{}; 270 auto& entry = entries.emplace_back();
282 u32 num_bindless_samplers{}; 271 if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
283 ShaderDiskCacheUsage usage; 272 !LoadObjectFromPrecompiled(entry.binary_format) ||
284 if (!LoadObjectFromPrecompiled(usage.unique_identifier) || 273 !LoadObjectFromPrecompiled(binary_size)) {
285 !LoadObjectFromPrecompiled(usage.variant) ||
286 !LoadObjectFromPrecompiled(usage.bound_buffer) ||
287 !LoadObjectFromPrecompiled(num_keys) ||
288 !LoadObjectFromPrecompiled(num_bound_samplers) ||
289 !LoadObjectFromPrecompiled(num_bindless_samplers)) {
290 return {};
291 }
292 std::vector<ConstBufferKey> keys(num_keys);
293 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
294 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
295 if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
296 !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
297 bound_samplers.size() ||
298 !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
299 bindless_samplers.size()) {
300 return {};
301 }
302 for (const auto& key : keys) {
303 usage.keys.insert({{key.cbuf, key.offset}, key.value});
304 }
305 for (const auto& key : bound_samplers) {
306 usage.bound_samplers.emplace(key.offset, key.sampler);
307 }
308 for (const auto& key : bindless_samplers) {
309 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
310 }
311
312 ShaderDiskCacheDump dump;
313 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
314 return {};
315 }
316
317 u32 binary_length{};
318 if (!LoadObjectFromPrecompiled(binary_length)) {
319 return {}; 274 return {};
320 } 275 }
321 276
322 dump.binary.resize(binary_length); 277 entry.binary.resize(binary_size);
323 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { 278 if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
324 return {}; 279 return {};
325 } 280 }
326
327 dumps.emplace(std::move(usage), dump);
328 } 281 }
329 return dumps; 282 return entries;
330} 283}
331 284
332void ShaderDiskCacheOpenGL::InvalidateTransferable() { 285void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
346 } 299 }
347} 300}
348 301
349void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { 302void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
350 if (!is_usable) { 303 if (!is_usable) {
351 return; 304 return;
352 } 305 }
353 306
354 const u64 id = entry.GetUniqueIdentifier(); 307 const u64 id = entry.unique_identifier;
355 if (transferable.find(id) != transferable.end()) { 308 if (stored_transferable.find(id) != stored_transferable.end()) {
356 // The shader already exists 309 // The shader already exists
357 return; 310 return;
358 } 311 }
@@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
361 if (!file.IsOpen()) { 314 if (!file.IsOpen()) {
362 return; 315 return;
363 } 316 }
364 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { 317 if (!entry.Save(file)) {
365 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); 318 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
366 file.Close(); 319 file.Close();
367 InvalidateTransferable(); 320 InvalidateTransferable();
368 return; 321 return;
369 } 322 }
370 transferable.insert({id, {}});
371}
372 323
373void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { 324 stored_transferable.insert(id);
374 if (!is_usable) {
375 return;
376 }
377
378 const auto it = transferable.find(usage.unique_identifier);
379 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
380
381 auto& usages{it->second};
382 if (usages.find(usage) != usages.end()) {
383 // Skip this variant since the shader is already stored.
384 return;
385 }
386 usages.insert(usage);
387
388 FileUtil::IOFile file = AppendTransferableFile();
389 if (!file.IsOpen())
390 return;
391 const auto Close = [&] {
392 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
393 file.Close();
394 InvalidateTransferable();
395 };
396
397 if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
398 file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
399 file.WriteObject(usage.bound_buffer) != 1 ||
400 file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
401 file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
402 file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
403 Close();
404 return;
405 }
406 for (const auto& [pair, value] : usage.keys) {
407 const auto [cbuf, offset] = pair;
408 if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
409 Close();
410 return;
411 }
412 }
413 for (const auto& [offset, sampler] : usage.bound_samplers) {
414 if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
415 Close();
416 return;
417 }
418 }
419 for (const auto& [pair, sampler] : usage.bindless_samplers) {
420 const auto [cbuf, offset] = pair;
421 if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
422 Close();
423 return;
424 }
425 }
426} 325}
427 326
428void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { 327void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
429 if (!is_usable) { 328 if (!is_usable) {
430 return; 329 return;
431 } 330 }
@@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
437 SavePrecompiledHeaderToVirtualPrecompiledCache(); 336 SavePrecompiledHeaderToVirtualPrecompiledCache();
438 } 337 }
439 338
440 GLint binary_length{}; 339 GLint binary_length;
441 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); 340 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
442 341
443 GLenum binary_format{}; 342 GLenum binary_format;
444 std::vector<u8> binary(binary_length); 343 std::vector<u8> binary(binary_length);
445 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 344 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
446 345
447 const auto Close = [&] { 346 if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
347 !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
348 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
448 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", 349 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
449 usage.unique_identifier); 350 unique_identifier);
450 InvalidatePrecompiled(); 351 InvalidatePrecompiled();
451 };
452
453 if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
454 !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
455 !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
456 !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
457 !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
458 Close();
459 return;
460 }
461 for (const auto& [pair, value] : usage.keys) {
462 const auto [cbuf, offset] = pair;
463 if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
464 Close();
465 return;
466 }
467 }
468 for (const auto& [offset, sampler] : usage.bound_samplers) {
469 if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
470 Close();
471 return;
472 }
473 }
474 for (const auto& [pair, sampler] : usage.bindless_samplers) {
475 const auto [cbuf, offset] = pair;
476 if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
477 Close();
478 return;
479 }
480 }
481 if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
482 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
483 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
484 Close();
485 } 352 }
486} 353}
487 354
@@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
534 if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { 401 if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
535 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", 402 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
536 precompiled_path); 403 precompiled_path);
537 return;
538 } 404 }
539} 405}
540 406
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index ef2371f6d..d5be52e40 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -19,8 +19,7 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/shader/registry.h"
23#include "video_core/shader/const_buffer_locker.h"
24 23
25namespace Core { 24namespace Core {
26class System; 25class System;
@@ -32,139 +31,39 @@ class IOFile;
32 31
33namespace OpenGL { 32namespace OpenGL {
34 33
35struct ShaderDiskCacheUsage;
36struct ShaderDiskCacheDump;
37
38using ProgramCode = std::vector<u64>; 34using ProgramCode = std::vector<u64>;
39using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
40
41/// Describes the different variants a program can be compiled with.
42struct ProgramVariant final {
43 ProgramVariant() = default;
44
45 /// Graphics constructor.
46 explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
47 : primitive_mode{primitive_mode} {}
48
49 /// Compute constructor.
50 explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
51 u32 local_memory_size) noexcept
52 : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
53 shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
54
55 // Graphics specific parameters.
56 GLenum primitive_mode{};
57
58 // Compute specific parameters.
59 u32 block_x{};
60 u16 block_y{};
61 u16 block_z{};
62 u32 shared_memory_size{};
63 u32 local_memory_size{};
64
65 bool operator==(const ProgramVariant& rhs) const noexcept {
66 return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
67 local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
68 rhs.block_z, rhs.shared_memory_size,
69 rhs.local_memory_size);
70 }
71
72 bool operator!=(const ProgramVariant& rhs) const noexcept {
73 return !operator==(rhs);
74 }
75};
76static_assert(std::is_trivially_copyable_v<ProgramVariant>);
77
78/// Describes how a shader is used.
79struct ShaderDiskCacheUsage {
80 u64 unique_identifier{};
81 ProgramVariant variant;
82 u32 bound_buffer{};
83 VideoCommon::Shader::KeyMap keys;
84 VideoCommon::Shader::BoundSamplerMap bound_samplers;
85 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
86
87 bool operator==(const ShaderDiskCacheUsage& rhs) const {
88 return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
89 std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
90 rhs.bindless_samplers);
91 }
92
93 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
94 return !operator==(rhs);
95 }
96};
97
98} // namespace OpenGL
99
100namespace std {
101
102template <>
103struct hash<OpenGL::ProgramVariant> {
104 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
105 return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
106 static_cast<std::size_t>(variant.block_x) ^
107 (static_cast<std::size_t>(variant.block_y) << 32) ^
108 (static_cast<std::size_t>(variant.block_z) << 48) ^
109 (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
110 (static_cast<std::size_t>(variant.local_memory_size) << 36);
111 }
112};
113
114template <>
115struct hash<OpenGL::ShaderDiskCacheUsage> {
116 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
117 return static_cast<std::size_t>(usage.unique_identifier) ^
118 std::hash<OpenGL::ProgramVariant>{}(usage.variant);
119 }
120};
121
122} // namespace std
123
124namespace OpenGL {
125 35
126/// Describes a shader how it's used by the guest GPU 36/// Describes a shader and how it's used by the guest GPU
127class ShaderDiskCacheRaw { 37struct ShaderDiskCacheEntry {
128public: 38 ShaderDiskCacheEntry();
129 explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, 39 ~ShaderDiskCacheEntry();
130 ProgramCode code, ProgramCode code_b = {});
131 ShaderDiskCacheRaw();
132 ~ShaderDiskCacheRaw();
133 40
134 bool Load(FileUtil::IOFile& file); 41 bool Load(FileUtil::IOFile& file);
135 42
136 bool Save(FileUtil::IOFile& file) const; 43 bool Save(FileUtil::IOFile& file) const;
137 44
138 u64 GetUniqueIdentifier() const {
139 return unique_identifier;
140 }
141
142 bool HasProgramA() const { 45 bool HasProgramA() const {
143 return !code.empty() && !code_b.empty(); 46 return !code.empty() && !code_b.empty();
144 } 47 }
145 48
146 Tegra::Engines::ShaderType GetType() const {
147 return type;
148 }
149
150 const ProgramCode& GetCode() const {
151 return code;
152 }
153
154 const ProgramCode& GetCodeB() const {
155 return code_b;
156 }
157
158private:
159 u64 unique_identifier{};
160 Tegra::Engines::ShaderType type{}; 49 Tegra::Engines::ShaderType type{};
161 ProgramCode code; 50 ProgramCode code;
162 ProgramCode code_b; 51 ProgramCode code_b;
52
53 u64 unique_identifier = 0;
54 std::optional<u32> texture_handler_size;
55 u32 bound_buffer = 0;
56 VideoCommon::Shader::GraphicsInfo graphics_info;
57 VideoCommon::Shader::ComputeInfo compute_info;
58 VideoCommon::Shader::KeyMap keys;
59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
163}; 61};
164 62
165/// Contains an OpenGL dumped binary program 63/// Contains an OpenGL dumped binary program
166struct ShaderDiskCacheDump { 64struct ShaderDiskCachePrecompiled {
167 GLenum binary_format{}; 65 u64 unique_identifier = 0;
66 GLenum binary_format = 0;
168 std::vector<u8> binary; 67 std::vector<u8> binary;
169}; 68};
170 69
@@ -174,11 +73,10 @@ public:
174 ~ShaderDiskCacheOpenGL(); 73 ~ShaderDiskCacheOpenGL();
175 74
176 /// Loads transferable cache. If file has a old version or on failure, it deletes the file. 75 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
177 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> 76 std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
178 LoadTransferable();
179 77
180 /// Loads current game's precompiled cache. Invalidates on failure. 78 /// Loads current game's precompiled cache. Invalidates on failure.
181 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); 79 std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
182 80
183 /// Removes the transferable (and precompiled) cache file. 81 /// Removes the transferable (and precompiled) cache file.
184 void InvalidateTransferable(); 82 void InvalidateTransferable();
@@ -187,21 +85,18 @@ public:
187 void InvalidatePrecompiled(); 85 void InvalidatePrecompiled();
188 86
189 /// Saves a raw dump to the transferable file. Checks for collisions. 87 /// Saves a raw dump to the transferable file. Checks for collisions.
190 void SaveRaw(const ShaderDiskCacheRaw& entry); 88 void SaveEntry(const ShaderDiskCacheEntry& entry);
191
192 /// Saves shader usage to the transferable file. Does not check for collisions.
193 void SaveUsage(const ShaderDiskCacheUsage& usage);
194 89
195 /// Saves a dump entry to the precompiled file. Does not check for collisions. 90 /// Saves a dump entry to the precompiled file. Does not check for collisions.
196 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); 91 void SavePrecompiled(u64 unique_identifier, GLuint program);
197 92
198 /// Serializes virtual precompiled shader cache file to real file 93 /// Serializes virtual precompiled shader cache file to real file
199 void SaveVirtualPrecompiledFile(); 94 void SaveVirtualPrecompiledFile();
200 95
201private: 96private:
202 /// Loads the transferable cache. Returns empty on failure. 97 /// Loads the transferable cache. Returns empty on failure.
203 std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> 98 std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
204 LoadPrecompiledFile(FileUtil::IOFile& file); 99 FileUtil::IOFile& file);
205 100
206 /// Opens current game's transferable file and write it's header if it doesn't exist 101 /// Opens current game's transferable file and write it's header if it doesn't exist
207 FileUtil::IOFile AppendTransferableFile() const; 102 FileUtil::IOFile AppendTransferableFile() const;
@@ -270,7 +165,7 @@ private:
270 std::size_t precompiled_cache_virtual_file_offset = 0; 165 std::size_t precompiled_cache_virtual_file_offset = 0;
271 166
272 // Stored transferable shaders 167 // Stored transferable shaders
273 std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; 168 std::unordered_set<u64> stored_transferable;
274 169
275 // The cache has been loaded at boot 170 // The cache has been loaded at boot
276 bool is_usable{}; 171 bool is_usable{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
deleted file mode 100644
index 34946fb47..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/gl_shader_gen.h"
14#include "video_core/shader/shader_ir.h"
15
16namespace OpenGL::GLShader {
17
18using Tegra::Engines::Maxwell3D;
19using Tegra::Engines::ShaderType;
20using VideoCommon::Shader::CompileDepth;
21using VideoCommon::Shader::CompilerSettings;
22using VideoCommon::Shader::ProgramCode;
23using VideoCommon::Shader::ShaderIR;
24
25std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
26 std::string out = GetCommonDeclarations();
27 out += fmt::format(R"(
28layout (std140, binding = {}) uniform vs_config {{
29 float y_direction;
30}};
31
32)",
33 EmulationUniformBlockBinding);
34 out += Decompile(device, ir, ShaderType::Vertex, "vertex");
35 if (ir_b) {
36 out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
37 }
38
39 out += R"(
40void main() {
41 gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
42 execute_vertex();
43)";
44 if (ir_b) {
45 out += " execute_vertex_b();";
46 }
47 out += "}\n";
48 return out;
49}
50
51std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
52 std::string out = GetCommonDeclarations();
53 out += fmt::format(R"(
54layout (std140, binding = {}) uniform gs_config {{
55 float y_direction;
56}};
57
58)",
59 EmulationUniformBlockBinding);
60 out += Decompile(device, ir, ShaderType::Geometry, "geometry");
61
62 out += R"(
63void main() {
64 execute_geometry();
65}
66)";
67 return out;
68}
69
70std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
71 std::string out = GetCommonDeclarations();
72 out += fmt::format(R"(
73layout (location = 0) out vec4 FragColor0;
74layout (location = 1) out vec4 FragColor1;
75layout (location = 2) out vec4 FragColor2;
76layout (location = 3) out vec4 FragColor3;
77layout (location = 4) out vec4 FragColor4;
78layout (location = 5) out vec4 FragColor5;
79layout (location = 6) out vec4 FragColor6;
80layout (location = 7) out vec4 FragColor7;
81
82layout (std140, binding = {}) uniform fs_config {{
83 float y_direction;
84}};
85
86)",
87 EmulationUniformBlockBinding);
88 out += Decompile(device, ir, ShaderType::Fragment, "fragment");
89
90 out += R"(
91void main() {
92 execute_fragment();
93}
94)";
95 return out;
96}
97
98std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
99 std::string out = GetCommonDeclarations();
100 out += Decompile(device, ir, ShaderType::Compute, "compute");
101 out += R"(
102void main() {
103 execute_compute();
104}
105)";
106 return out;
107}
108
109} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
deleted file mode 100644
index cba2be9f9..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ /dev/null
@@ -1,34 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/renderer_opengl/gl_shader_decompiler.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace OpenGL {
14class Device;
15}
16
17namespace OpenGL::GLShader {
18
19using VideoCommon::Shader::ProgramCode;
20using VideoCommon::Shader::ShaderIR;
21
22/// Generates the GLSL vertex shader program source code for the given VS program
23std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
24
25/// Generates the GLSL geometry shader program source code for the given GS program
26std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
27
28/// Generates the GLSL fragment shader program source code for the given FS program
29std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
30
31/// Generates the GLSL compute shader program source code for the given CS program
32std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
33
34} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 75d3fac04..9c7b0adbd 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,45 +2,52 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <glad/glad.h>
6
5#include "common/common_types.h" 7#include "common/common_types.h"
6#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
7#include "video_core/renderer_opengl/gl_shader_manager.h" 9#include "video_core/renderer_opengl/gl_shader_manager.h"
8 10
9namespace OpenGL::GLShader { 11namespace OpenGL::GLShader {
10 12
11using Tegra::Engines::Maxwell3D; 13ProgramManager::ProgramManager() = default;
12
13ProgramManager::ProgramManager() {
14 pipeline.Create();
15}
16 14
17ProgramManager::~ProgramManager() = default; 15ProgramManager::~ProgramManager() = default;
18 16
19void ProgramManager::ApplyTo(OpenGLState& state) { 17void ProgramManager::Create() {
20 UpdatePipeline(); 18 graphics_pipeline.Create();
21 state.draw.shader_program = 0; 19 glBindProgramPipeline(graphics_pipeline.handle);
22 state.draw.program_pipeline = pipeline.handle;
23} 20}
24 21
25void ProgramManager::UpdatePipeline() { 22void ProgramManager::BindGraphicsPipeline() {
23 if (!is_graphics_bound) {
24 is_graphics_bound = true;
25 glUseProgram(0);
26 }
27
26 // Avoid updating the pipeline when values have no changed 28 // Avoid updating the pipeline when values have no changed
27 if (old_state == current_state) { 29 if (old_state == current_state) {
28 return; 30 return;
29 } 31 }
30 32
31 // Workaround for AMD bug 33 // Workaround for AMD bug
32 constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | 34 static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
33 GL_FRAGMENT_SHADER_BIT}; 35 GL_FRAGMENT_SHADER_BIT};
34 glUseProgramStages(pipeline.handle, all_used_stages, 0); 36 const GLuint handle = graphics_pipeline.handle;
35 37 glUseProgramStages(handle, all_used_stages, 0);
36 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); 38 glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
37 glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); 39 glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
38 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); 40 glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
39 41
40 old_state = current_state; 42 old_state = current_state;
41} 43}
42 44
43void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) { 45void ProgramManager::BindComputeShader(GLuint program) {
46 is_graphics_bound = false;
47 glUseProgram(program);
48}
49
50void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
44 const auto& regs = maxwell.regs; 51 const auto& regs = maxwell.regs;
45 52
46 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. 53 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 478c165ce..d2e47f2a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -9,7 +9,6 @@
9#include <glad/glad.h> 9#include <glad/glad.h>
10 10
11#include "video_core/renderer_opengl/gl_resource_manager.h" 11#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/gl_state.h"
13#include "video_core/renderer_opengl/maxwell_to_gl.h" 12#include "video_core/renderer_opengl/maxwell_to_gl.h"
14 13
15namespace OpenGL::GLShader { 14namespace OpenGL::GLShader {
@@ -32,49 +31,47 @@ public:
32 explicit ProgramManager(); 31 explicit ProgramManager();
33 ~ProgramManager(); 32 ~ProgramManager();
34 33
35 void ApplyTo(OpenGLState& state); 34 void Create();
36 35
37 void UseProgrammableVertexShader(GLuint program) { 36 /// Updates the graphics pipeline and binds it.
37 void BindGraphicsPipeline();
38
39 /// Binds a compute shader.
40 void BindComputeShader(GLuint program);
41
42 void UseVertexShader(GLuint program) {
38 current_state.vertex_shader = program; 43 current_state.vertex_shader = program;
39 } 44 }
40 45
41 void UseProgrammableGeometryShader(GLuint program) { 46 void UseGeometryShader(GLuint program) {
42 current_state.geometry_shader = program; 47 current_state.geometry_shader = program;
43 } 48 }
44 49
45 void UseProgrammableFragmentShader(GLuint program) { 50 void UseFragmentShader(GLuint program) {
46 current_state.fragment_shader = program; 51 current_state.fragment_shader = program;
47 } 52 }
48 53
49 void UseTrivialGeometryShader() {
50 current_state.geometry_shader = 0;
51 }
52
53 void UseTrivialFragmentShader() {
54 current_state.fragment_shader = 0;
55 }
56
57private: 54private:
58 struct PipelineState { 55 struct PipelineState {
59 bool operator==(const PipelineState& rhs) const { 56 bool operator==(const PipelineState& rhs) const noexcept {
60 return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && 57 return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
61 geometry_shader == rhs.geometry_shader; 58 geometry_shader == rhs.geometry_shader;
62 } 59 }
63 60
64 bool operator!=(const PipelineState& rhs) const { 61 bool operator!=(const PipelineState& rhs) const noexcept {
65 return !operator==(rhs); 62 return !operator==(rhs);
66 } 63 }
67 64
68 GLuint vertex_shader{}; 65 GLuint vertex_shader = 0;
69 GLuint fragment_shader{}; 66 GLuint fragment_shader = 0;
70 GLuint geometry_shader{}; 67 GLuint geometry_shader = 0;
71 }; 68 };
72 69
73 void UpdatePipeline(); 70 OGLPipeline graphics_pipeline;
74 71 OGLPipeline compute_pipeline;
75 OGLPipeline pipeline;
76 PipelineState current_state; 72 PipelineState current_state;
77 PipelineState old_state; 73 PipelineState old_state;
74 bool is_graphics_bound = true;
78}; 75};
79 76
80} // namespace OpenGL::GLShader 77} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
deleted file mode 100644
index 7d3bc1a1f..000000000
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ /dev/null
@@ -1,569 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <iterator>
7#include <glad/glad.h>
8#include "common/assert.h"
9#include "common/logging/log.h"
10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_state.h"
12
13MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
14
15namespace OpenGL {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18
19OpenGLState OpenGLState::cur_state;
20
21namespace {
22
23template <typename T>
24bool UpdateValue(T& current_value, const T new_value) {
25 const bool changed = current_value != new_value;
26 current_value = new_value;
27 return changed;
28}
29
30template <typename T1, typename T2>
31bool UpdateTie(T1 current_value, const T2 new_value) {
32 const bool changed = current_value != new_value;
33 current_value = new_value;
34 return changed;
35}
36
37template <typename T>
38std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
39 std::optional<std::size_t> first;
40 std::size_t last;
41 for (std::size_t i = 0; i < std::size(current_values); ++i) {
42 if (!UpdateValue(current_values[i], new_values[i])) {
43 continue;
44 }
45 if (!first) {
46 first = i;
47 }
48 last = i;
49 }
50 if (!first) {
51 return std::nullopt;
52 }
53 return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
54}
55
56void Enable(GLenum cap, bool enable) {
57 if (enable) {
58 glEnable(cap);
59 } else {
60 glDisable(cap);
61 }
62}
63
64void Enable(GLenum cap, GLuint index, bool enable) {
65 if (enable) {
66 glEnablei(cap, index);
67 } else {
68 glDisablei(cap, index);
69 }
70}
71
72void Enable(GLenum cap, bool& current_value, bool new_value) {
73 if (UpdateValue(current_value, new_value)) {
74 Enable(cap, new_value);
75 }
76}
77
78void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
79 if (UpdateValue(current_value, new_value)) {
80 Enable(cap, index, new_value);
81 }
82}
83
84} // Anonymous namespace
85
86OpenGLState::OpenGLState() = default;
87
88void OpenGLState::SetDefaultViewports() {
89 viewports.fill(Viewport{});
90
91 depth_clamp.far_plane = false;
92 depth_clamp.near_plane = false;
93}
94
95void OpenGLState::ApplyFramebufferState() {
96 if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
97 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
98 }
99 if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
100 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
101 }
102}
103
104void OpenGLState::ApplyVertexArrayState() {
105 if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
106 glBindVertexArray(draw.vertex_array);
107 }
108}
109
110void OpenGLState::ApplyShaderProgram() {
111 if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
112 glUseProgram(draw.shader_program);
113 }
114}
115
116void OpenGLState::ApplyProgramPipeline() {
117 if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
118 glBindProgramPipeline(draw.program_pipeline);
119 }
120}
121
122void OpenGLState::ApplyClipDistances() {
123 for (std::size_t i = 0; i < clip_distance.size(); ++i) {
124 Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
125 clip_distance[i]);
126 }
127}
128
129void OpenGLState::ApplyPointSize() {
130 Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
131 Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
132 if (UpdateValue(cur_state.point.size, point.size)) {
133 glPointSize(point.size);
134 }
135}
136
137void OpenGLState::ApplyFragmentColorClamp() {
138 if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
139 glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
140 fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
141 }
142}
143
144void OpenGLState::ApplyMultisample() {
145 Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
146 multisample_control.alpha_to_coverage);
147 Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
148 multisample_control.alpha_to_one);
149}
150
151void OpenGLState::ApplyDepthClamp() {
152 if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
153 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
154 return;
155 }
156 cur_state.depth_clamp = depth_clamp;
157
158 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
159 "Unimplemented Depth Clamp Separation!");
160
161 Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
162}
163
164void OpenGLState::ApplySRgb() {
165 if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
166 return;
167 cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
168 if (framebuffer_srgb.enabled) {
169 glEnable(GL_FRAMEBUFFER_SRGB);
170 } else {
171 glDisable(GL_FRAMEBUFFER_SRGB);
172 }
173}
174
175void OpenGLState::ApplyCulling() {
176 Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
177
178 if (UpdateValue(cur_state.cull.mode, cull.mode)) {
179 glCullFace(cull.mode);
180 }
181
182 if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
183 glFrontFace(cull.front_face);
184 }
185}
186
187void OpenGLState::ApplyRasterizerDiscard() {
188 Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
189}
190
191void OpenGLState::ApplyColorMask() {
192 if (!dirty.color_mask) {
193 return;
194 }
195 dirty.color_mask = false;
196
197 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
198 const auto& updated = color_mask[i];
199 auto& current = cur_state.color_mask[i];
200 if (updated.red_enabled != current.red_enabled ||
201 updated.green_enabled != current.green_enabled ||
202 updated.blue_enabled != current.blue_enabled ||
203 updated.alpha_enabled != current.alpha_enabled) {
204 current = updated;
205 glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
206 updated.blue_enabled, updated.alpha_enabled);
207 }
208 }
209}
210
211void OpenGLState::ApplyDepth() {
212 Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
213
214 if (cur_state.depth.test_func != depth.test_func) {
215 cur_state.depth.test_func = depth.test_func;
216 glDepthFunc(depth.test_func);
217 }
218
219 if (cur_state.depth.write_mask != depth.write_mask) {
220 cur_state.depth.write_mask = depth.write_mask;
221 glDepthMask(depth.write_mask);
222 }
223}
224
225void OpenGLState::ApplyPrimitiveRestart() {
226 Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
227
228 if (cur_state.primitive_restart.index != primitive_restart.index) {
229 cur_state.primitive_restart.index = primitive_restart.index;
230 glPrimitiveRestartIndex(primitive_restart.index);
231 }
232}
233
234void OpenGLState::ApplyStencilTest() {
235 if (!dirty.stencil_state) {
236 return;
237 }
238 dirty.stencil_state = false;
239
240 Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
241
242 const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
243 if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
244 current.test_mask != config.test_mask) {
245 current.test_func = config.test_func;
246 current.test_ref = config.test_ref;
247 current.test_mask = config.test_mask;
248 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
249 }
250 if (current.action_depth_fail != config.action_depth_fail ||
251 current.action_depth_pass != config.action_depth_pass ||
252 current.action_stencil_fail != config.action_stencil_fail) {
253 current.action_depth_fail = config.action_depth_fail;
254 current.action_depth_pass = config.action_depth_pass;
255 current.action_stencil_fail = config.action_stencil_fail;
256 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
257 config.action_depth_pass);
258 }
259 if (current.write_mask != config.write_mask) {
260 current.write_mask = config.write_mask;
261 glStencilMaskSeparate(face, config.write_mask);
262 }
263 };
264 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
265 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
266}
267
268void OpenGLState::ApplyViewport() {
269 for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
270 const auto& updated = viewports[i];
271 auto& current = cur_state.viewports[i];
272
273 if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
274 current.height != updated.height) {
275 current.x = updated.x;
276 current.y = updated.y;
277 current.width = updated.width;
278 current.height = updated.height;
279 glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
280 static_cast<GLfloat>(updated.width),
281 static_cast<GLfloat>(updated.height));
282 }
283 if (current.depth_range_near != updated.depth_range_near ||
284 current.depth_range_far != updated.depth_range_far) {
285 current.depth_range_near = updated.depth_range_near;
286 current.depth_range_far = updated.depth_range_far;
287 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
288 }
289
290 Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
291
292 if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
293 current.scissor.width != updated.scissor.width ||
294 current.scissor.height != updated.scissor.height) {
295 current.scissor.x = updated.scissor.x;
296 current.scissor.y = updated.scissor.y;
297 current.scissor.width = updated.scissor.width;
298 current.scissor.height = updated.scissor.height;
299 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
300 updated.scissor.height);
301 }
302 }
303}
304
305void OpenGLState::ApplyGlobalBlending() {
306 const Blend& updated = blend[0];
307 Blend& current = cur_state.blend[0];
308
309 Enable(GL_BLEND, current.enabled, updated.enabled);
310
311 if (current.src_rgb_func != updated.src_rgb_func ||
312 current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
313 current.dst_a_func != updated.dst_a_func) {
314 current.src_rgb_func = updated.src_rgb_func;
315 current.dst_rgb_func = updated.dst_rgb_func;
316 current.src_a_func = updated.src_a_func;
317 current.dst_a_func = updated.dst_a_func;
318 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
319 updated.dst_a_func);
320 }
321
322 if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
323 current.rgb_equation = updated.rgb_equation;
324 current.a_equation = updated.a_equation;
325 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
326 }
327}
328
329void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
330 const Blend& updated = blend[target];
331 Blend& current = cur_state.blend[target];
332
333 if (current.enabled != updated.enabled || force) {
334 current.enabled = updated.enabled;
335 Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
336 }
337
338 if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
339 current.dst_a_func),
340 std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
341 updated.dst_a_func))) {
342 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
343 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
344 }
345
346 if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
347 std::tie(updated.rgb_equation, updated.a_equation))) {
348 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
349 updated.a_equation);
350 }
351}
352
353void OpenGLState::ApplyBlending() {
354 if (!dirty.blend_state) {
355 return;
356 }
357 dirty.blend_state = false;
358
359 if (independant_blend.enabled) {
360 const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
361 for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
362 ApplyTargetBlending(target, force);
363 }
364 } else {
365 ApplyGlobalBlending();
366 }
367 cur_state.independant_blend.enabled = independant_blend.enabled;
368
369 if (UpdateTie(
370 std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
371 cur_state.blend_color.blue, cur_state.blend_color.alpha),
372 std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
373 glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
374 }
375}
376
377void OpenGLState::ApplyLogicOp() {
378 Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
379
380 if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
381 glLogicOp(logic_op.operation);
382 }
383}
384
385void OpenGLState::ApplyPolygonOffset() {
386 if (!dirty.polygon_offset) {
387 return;
388 }
389 dirty.polygon_offset = false;
390
391 Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
392 polygon_offset.fill_enable);
393 Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
394 polygon_offset.line_enable);
395 Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
396 polygon_offset.point_enable);
397
398 if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
399 cur_state.polygon_offset.clamp),
400 std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
401 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
402 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
403 } else {
404 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
405 "Unimplemented Depth polygon offset clamp.");
406 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
407 }
408 }
409}
410
411void OpenGLState::ApplyAlphaTest() {
412 Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
413 if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
414 std::tie(alpha_test.func, alpha_test.ref))) {
415 glAlphaFunc(alpha_test.func, alpha_test.ref);
416 }
417}
418
419void OpenGLState::ApplyClipControl() {
420 if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
421 std::tie(clip_control.origin, clip_control.depth_mode))) {
422 glClipControl(clip_control.origin, clip_control.depth_mode);
423 }
424}
425
426void OpenGLState::ApplyRenderBuffer() {
427 if (cur_state.renderbuffer != renderbuffer) {
428 cur_state.renderbuffer = renderbuffer;
429 glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
430 }
431}
432
433void OpenGLState::ApplyTextures() {
434 const std::size_t size = std::size(textures);
435 for (std::size_t i = 0; i < size; ++i) {
436 if (UpdateValue(cur_state.textures[i], textures[i])) {
437 // BindTextureUnit doesn't support binding null textures, skip those binds.
438 // TODO(Rodrigo): Stop using null textures
439 if (textures[i] != 0) {
440 glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
441 }
442 }
443 }
444}
445
446void OpenGLState::ApplySamplers() {
447 const std::size_t size = std::size(samplers);
448 for (std::size_t i = 0; i < size; ++i) {
449 if (UpdateValue(cur_state.samplers[i], samplers[i])) {
450 glBindSampler(static_cast<GLuint>(i), samplers[i]);
451 }
452 }
453}
454
455void OpenGLState::ApplyImages() {
456 if (const auto update = UpdateArray(cur_state.images, images)) {
457 glBindImageTextures(update->first, update->second, images.data() + update->first);
458 }
459}
460
461void OpenGLState::Apply() {
462 MICROPROFILE_SCOPE(OpenGL_State);
463 ApplyFramebufferState();
464 ApplyVertexArrayState();
465 ApplyShaderProgram();
466 ApplyProgramPipeline();
467 ApplyClipDistances();
468 ApplyPointSize();
469 ApplyFragmentColorClamp();
470 ApplyMultisample();
471 ApplyRasterizerDiscard();
472 ApplyColorMask();
473 ApplyDepthClamp();
474 ApplyViewport();
475 ApplyStencilTest();
476 ApplySRgb();
477 ApplyCulling();
478 ApplyDepth();
479 ApplyPrimitiveRestart();
480 ApplyBlending();
481 ApplyLogicOp();
482 ApplyTextures();
483 ApplySamplers();
484 ApplyImages();
485 ApplyPolygonOffset();
486 ApplyAlphaTest();
487 ApplyClipControl();
488 ApplyRenderBuffer();
489}
490
491void OpenGLState::EmulateViewportWithScissor() {
492 auto& current = viewports[0];
493 if (current.scissor.enabled) {
494 const GLint left = std::max(current.x, current.scissor.x);
495 const GLint right =
496 std::max(current.x + current.width, current.scissor.x + current.scissor.width);
497 const GLint bottom = std::max(current.y, current.scissor.y);
498 const GLint top =
499 std::max(current.y + current.height, current.scissor.y + current.scissor.height);
500 current.scissor.x = std::max(left, 0);
501 current.scissor.y = std::max(bottom, 0);
502 current.scissor.width = std::max(right - left, 0);
503 current.scissor.height = std::max(top - bottom, 0);
504 } else {
505 current.scissor.enabled = true;
506 current.scissor.x = current.x;
507 current.scissor.y = current.y;
508 current.scissor.width = current.width;
509 current.scissor.height = current.height;
510 }
511}
512
513OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
514 for (auto& texture : textures) {
515 if (texture == handle) {
516 texture = 0;
517 }
518 }
519 return *this;
520}
521
522OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
523 for (auto& sampler : samplers) {
524 if (sampler == handle) {
525 sampler = 0;
526 }
527 }
528 return *this;
529}
530
531OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
532 if (draw.shader_program == handle) {
533 draw.shader_program = 0;
534 }
535 return *this;
536}
537
538OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
539 if (draw.program_pipeline == handle) {
540 draw.program_pipeline = 0;
541 }
542 return *this;
543}
544
545OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
546 if (draw.vertex_array == handle) {
547 draw.vertex_array = 0;
548 }
549 return *this;
550}
551
552OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
553 if (draw.read_framebuffer == handle) {
554 draw.read_framebuffer = 0;
555 }
556 if (draw.draw_framebuffer == handle) {
557 draw.draw_framebuffer = 0;
558 }
559 return *this;
560}
561
562OpenGLState& OpenGLState::ResetRenderbuffer(GLuint handle) {
563 if (renderbuffer == handle) {
564 renderbuffer = 0;
565 }
566 return *this;
567}
568
569} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
deleted file mode 100644
index bce662f2c..000000000
--- a/src/video_core/renderer_opengl/gl_state.h
+++ /dev/null
@@ -1,251 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9#include <glad/glad.h>
10#include "video_core/engines/maxwell_3d.h"
11
12namespace OpenGL {
13
14class OpenGLState {
15public:
16 struct {
17 bool enabled = false; // GL_FRAMEBUFFER_SRGB
18 } framebuffer_srgb;
19
20 struct {
21 bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
22 bool alpha_to_one = false; // GL_ALPHA_TO_ONE
23 } multisample_control;
24
25 struct {
26 bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
27 } fragment_color_clamp;
28
29 struct {
30 bool far_plane = false;
31 bool near_plane = false;
32 } depth_clamp; // GL_DEPTH_CLAMP
33
34 struct {
35 bool enabled = false; // GL_CULL_FACE
36 GLenum mode = GL_BACK; // GL_CULL_FACE_MODE
37 GLenum front_face = GL_CCW; // GL_FRONT_FACE
38 } cull;
39
40 struct {
41 bool test_enabled = false; // GL_DEPTH_TEST
42 GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
43 GLenum test_func = GL_LESS; // GL_DEPTH_FUNC
44 } depth;
45
46 struct {
47 bool enabled = false;
48 GLuint index = 0;
49 } primitive_restart; // GL_PRIMITIVE_RESTART
50
51 bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
52
53 struct ColorMask {
54 GLboolean red_enabled = GL_TRUE;
55 GLboolean green_enabled = GL_TRUE;
56 GLboolean blue_enabled = GL_TRUE;
57 GLboolean alpha_enabled = GL_TRUE;
58 };
59 std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
60 color_mask; // GL_COLOR_WRITEMASK
61
62 struct {
63 bool test_enabled = false; // GL_STENCIL_TEST
64 struct {
65 GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC
66 GLint test_ref = 0; // GL_STENCIL_REF
67 GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK
68 GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK
69 GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
70 GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL
71 GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS
72 } front, back;
73 } stencil;
74
75 struct Blend {
76 bool enabled = false; // GL_BLEND
77 GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
78 GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA
79 GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB
80 GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB
81 GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA
82 GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA
83 };
84 std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
85
86 struct {
87 bool enabled = false;
88 } independant_blend;
89
90 struct {
91 GLclampf red = 0.0f;
92 GLclampf green = 0.0f;
93 GLclampf blue = 0.0f;
94 GLclampf alpha = 0.0f;
95 } blend_color; // GL_BLEND_COLOR
96
97 struct {
98 bool enabled = false; // GL_LOGIC_OP_MODE
99 GLenum operation = GL_COPY;
100 } logic_op;
101
102 static constexpr std::size_t NumSamplers = 32 * 5;
103 static constexpr std::size_t NumImages = 8 * 5;
104 std::array<GLuint, NumSamplers> textures = {};
105 std::array<GLuint, NumSamplers> samplers = {};
106 std::array<GLuint, NumImages> images = {};
107
108 struct {
109 GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
110 GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
111 GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING
112 GLuint shader_program = 0; // GL_CURRENT_PROGRAM
113 GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
114 } draw;
115
116 struct Viewport {
117 GLint x = 0;
118 GLint y = 0;
119 GLint width = 0;
120 GLint height = 0;
121 GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
122 GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE
123 struct {
124 bool enabled = false; // GL_SCISSOR_TEST
125 GLint x = 0;
126 GLint y = 0;
127 GLsizei width = 0;
128 GLsizei height = 0;
129 } scissor;
130 };
131 std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
132
133 struct {
134 bool program_control = false; // GL_PROGRAM_POINT_SIZE
135 bool sprite = false; // GL_POINT_SPRITE
136 GLfloat size = 1.0f; // GL_POINT_SIZE
137 } point;
138
139 struct {
140 bool point_enable = false;
141 bool line_enable = false;
142 bool fill_enable = false;
143 GLfloat units = 0.0f;
144 GLfloat factor = 0.0f;
145 GLfloat clamp = 0.0f;
146 } polygon_offset;
147
148 struct {
149 bool enabled = false; // GL_ALPHA_TEST
150 GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
151 GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF
152 } alpha_test;
153
154 std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
155
156 struct {
157 GLenum origin = GL_LOWER_LEFT;
158 GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
159 } clip_control;
160
161 GLuint renderbuffer{}; // GL_RENDERBUFFER_BINDING
162
163 OpenGLState();
164
165 /// Get the currently active OpenGL state
166 static OpenGLState GetCurState() {
167 return cur_state;
168 }
169
170 void SetDefaultViewports();
171 /// Apply this state as the current OpenGL state
172 void Apply();
173
174 void ApplyFramebufferState();
175 void ApplyVertexArrayState();
176 void ApplyShaderProgram();
177 void ApplyProgramPipeline();
178 void ApplyClipDistances();
179 void ApplyPointSize();
180 void ApplyFragmentColorClamp();
181 void ApplyMultisample();
182 void ApplySRgb();
183 void ApplyCulling();
184 void ApplyRasterizerDiscard();
185 void ApplyColorMask();
186 void ApplyDepth();
187 void ApplyPrimitiveRestart();
188 void ApplyStencilTest();
189 void ApplyViewport();
190 void ApplyTargetBlending(std::size_t target, bool force);
191 void ApplyGlobalBlending();
192 void ApplyBlending();
193 void ApplyLogicOp();
194 void ApplyTextures();
195 void ApplySamplers();
196 void ApplyImages();
197 void ApplyDepthClamp();
198 void ApplyPolygonOffset();
199 void ApplyAlphaTest();
200 void ApplyClipControl();
201 void ApplyRenderBuffer();
202
203 /// Resets any references to the given resource
204 OpenGLState& UnbindTexture(GLuint handle);
205 OpenGLState& ResetSampler(GLuint handle);
206 OpenGLState& ResetProgram(GLuint handle);
207 OpenGLState& ResetPipeline(GLuint handle);
208 OpenGLState& ResetVertexArray(GLuint handle);
209 OpenGLState& ResetFramebuffer(GLuint handle);
210 OpenGLState& ResetRenderbuffer(GLuint handle);
211
212 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
213 void EmulateViewportWithScissor();
214
215 void MarkDirtyBlendState() {
216 dirty.blend_state = true;
217 }
218
219 void MarkDirtyStencilState() {
220 dirty.stencil_state = true;
221 }
222
223 void MarkDirtyPolygonOffset() {
224 dirty.polygon_offset = true;
225 }
226
227 void MarkDirtyColorMask() {
228 dirty.color_mask = true;
229 }
230
231 void AllDirty() {
232 dirty.blend_state = true;
233 dirty.stencil_state = true;
234 dirty.polygon_offset = true;
235 dirty.color_mask = true;
236 }
237
238private:
239 static OpenGLState cur_state;
240
241 struct {
242 bool blend_state;
243 bool stencil_state;
244 bool viewport_state;
245 bool polygon_offset;
246 bool color_mask;
247 } dirty{};
248};
249static_assert(std::is_trivially_copyable_v<OpenGLState>);
250
251} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
new file mode 100644
index 000000000..255ac3147
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -0,0 +1,247 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8
9#include "common/common_types.h"
10#include "core/core.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/gpu.h"
13#include "video_core/renderer_opengl/gl_state_tracker.h"
14
15#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
16#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
17
18namespace OpenGL {
19
20namespace {
21
22using namespace Dirty;
23using namespace VideoCommon::Dirty;
24using Tegra::Engines::Maxwell3D;
25using Regs = Maxwell3D::Regs;
26using Tables = Maxwell3D::DirtyState::Tables;
27using Table = Maxwell3D::DirtyState::Table;
28
29void SetupDirtyColorMasks(Tables& tables) {
30 tables[0][OFF(color_mask_common)] = ColorMaskCommon;
31 for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
32 const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
33 FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
34 }
35
36 FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
37}
38
39void SetupDirtyVertexArrays(Tables& tables) {
40 static constexpr std::size_t num_array = 3;
41 static constexpr std::size_t instance_base_offset = 3;
42 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
43 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
44 const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
45
46 FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
47 FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
48
49 const std::size_t instance_array_offset = array_offset + instance_base_offset;
50 tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
51 tables[1][instance_array_offset] = VertexInstances;
52
53 const std::size_t instance_offset = OFF(instanced_arrays) + i;
54 tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
55 tables[1][instance_offset] = VertexInstances;
56 }
57}
58
59void SetupDirtyVertexFormat(Tables& tables) {
60 for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
61 const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
62 FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
63 }
64
65 FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
66}
67
68void SetupDirtyViewports(Tables& tables) {
69 for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
70 const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
71 const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
72
73 FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
74 FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
75 }
76
77 FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
78 FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
79
80 tables[0][OFF(viewport_transform_enabled)] = ViewportTransform;
81 tables[1][OFF(viewport_transform_enabled)] = Viewports;
82}
83
84void SetupDirtyScissors(Tables& tables) {
85 for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
86 const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
87 FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
88 }
89 FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
90}
91
92void SetupDirtyShaders(Tables& tables) {
93 FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
94 Shaders);
95}
96
97void SetupDirtyPolygonModes(Tables& tables) {
98 tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
99 tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
100
101 tables[1][OFF(polygon_mode_front)] = PolygonModes;
102 tables[1][OFF(polygon_mode_back)] = PolygonModes;
103 tables[0][OFF(fill_rectangle)] = PolygonModes;
104}
105
106void SetupDirtyDepthTest(Tables& tables) {
107 auto& table = tables[0];
108 table[OFF(depth_test_enable)] = DepthTest;
109 table[OFF(depth_write_enabled)] = DepthMask;
110 table[OFF(depth_test_func)] = DepthTest;
111}
112
113void SetupDirtyStencilTest(Tables& tables) {
114 static constexpr std::array offsets = {
115 OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref),
116 OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail),
117 OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
118 OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask),
119 OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass),
120 OFF(stencil_back_mask)};
121 for (const auto offset : offsets) {
122 tables[0][offset] = StencilTest;
123 }
124}
125
126void SetupDirtyAlphaTest(Tables& tables) {
127 auto& table = tables[0];
128 table[OFF(alpha_test_ref)] = AlphaTest;
129 table[OFF(alpha_test_func)] = AlphaTest;
130 table[OFF(alpha_test_enabled)] = AlphaTest;
131}
132
133void SetupDirtyBlend(Tables& tables) {
134 FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
135
136 tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled;
137
138 for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
139 const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]);
140 FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i);
141
142 tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
143 }
144 FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates);
145 FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
146}
147
148void SetupDirtyPrimitiveRestart(Tables& tables) {
149 FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
150}
151
152void SetupDirtyPolygonOffset(Tables& tables) {
153 auto& table = tables[0];
154 table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
155 table[OFF(polygon_offset_line_enable)] = PolygonOffset;
156 table[OFF(polygon_offset_point_enable)] = PolygonOffset;
157 table[OFF(polygon_offset_factor)] = PolygonOffset;
158 table[OFF(polygon_offset_units)] = PolygonOffset;
159 table[OFF(polygon_offset_clamp)] = PolygonOffset;
160}
161
162void SetupDirtyMultisampleControl(Tables& tables) {
163 FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl);
164}
165
166void SetupDirtyRasterizeEnable(Tables& tables) {
167 tables[0][OFF(rasterize_enable)] = RasterizeEnable;
168}
169
170void SetupDirtyFramebufferSRGB(Tables& tables) {
171 tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
172}
173
174void SetupDirtyLogicOp(Tables& tables) {
175 FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
176}
177
178void SetupDirtyFragmentClampColor(Tables& tables) {
179 tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
180}
181
182void SetupDirtyPointSize(Tables& tables) {
183 tables[0][OFF(vp_point_size)] = PointSize;
184 tables[0][OFF(point_size)] = PointSize;
185 tables[0][OFF(point_sprite_enable)] = PointSize;
186}
187
188void SetupDirtyClipControl(Tables& tables) {
189 auto& table = tables[0];
190 table[OFF(screen_y_control)] = ClipControl;
191 table[OFF(depth_mode)] = ClipControl;
192}
193
194void SetupDirtyDepthClampEnabled(Tables& tables) {
195 tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled;
196}
197
198void SetupDirtyMisc(Tables& tables) {
199 auto& table = tables[0];
200
201 table[OFF(clip_distance_enabled)] = ClipDistances;
202
203 table[OFF(front_face)] = FrontFace;
204
205 table[OFF(cull_test_enabled)] = CullTest;
206 table[OFF(cull_face)] = CullTest;
207}
208
209} // Anonymous namespace
210
211StateTracker::StateTracker(Core::System& system) : system{system} {}
212
213void StateTracker::Initialize() {
214 auto& dirty = system.GPU().Maxwell3D().dirty;
215 auto& tables = dirty.tables;
216 SetupDirtyRenderTargets(tables);
217 SetupDirtyColorMasks(tables);
218 SetupDirtyViewports(tables);
219 SetupDirtyScissors(tables);
220 SetupDirtyVertexArrays(tables);
221 SetupDirtyVertexFormat(tables);
222 SetupDirtyShaders(tables);
223 SetupDirtyPolygonModes(tables);
224 SetupDirtyDepthTest(tables);
225 SetupDirtyStencilTest(tables);
226 SetupDirtyAlphaTest(tables);
227 SetupDirtyBlend(tables);
228 SetupDirtyPrimitiveRestart(tables);
229 SetupDirtyPolygonOffset(tables);
230 SetupDirtyMultisampleControl(tables);
231 SetupDirtyRasterizeEnable(tables);
232 SetupDirtyFramebufferSRGB(tables);
233 SetupDirtyLogicOp(tables);
234 SetupDirtyFragmentClampColor(tables);
235 SetupDirtyPointSize(tables);
236 SetupDirtyClipControl(tables);
237 SetupDirtyDepthClampEnabled(tables);
238 SetupDirtyMisc(tables);
239
240 auto& store = dirty.on_write_stores;
241 store[VertexBuffers] = true;
242 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
243 store[VertexBuffer0 + i] = true;
244 }
245}
246
247} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
new file mode 100644
index 000000000..b882d75c3
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -0,0 +1,215 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <limits>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "core/core.h"
13#include "video_core/dirty_flags.h"
14#include "video_core/engines/maxwell_3d.h"
15
16namespace Core {
17class System;
18}
19
20namespace OpenGL {
21
22namespace Dirty {
23
24enum : u8 {
25 First = VideoCommon::Dirty::LastCommonEntry,
26
27 VertexFormats,
28 VertexFormat0,
29 VertexFormat31 = VertexFormat0 + 31,
30
31 VertexBuffers,
32 VertexBuffer0,
33 VertexBuffer31 = VertexBuffer0 + 31,
34
35 VertexInstances,
36 VertexInstance0,
37 VertexInstance31 = VertexInstance0 + 31,
38
39 ViewportTransform,
40 Viewports,
41 Viewport0,
42 Viewport15 = Viewport0 + 15,
43
44 Scissors,
45 Scissor0,
46 Scissor15 = Scissor0 + 15,
47
48 ColorMaskCommon,
49 ColorMasks,
50 ColorMask0,
51 ColorMask7 = ColorMask0 + 7,
52
53 BlendColor,
54 BlendIndependentEnabled,
55 BlendStates,
56 BlendState0,
57 BlendState7 = BlendState0 + 7,
58
59 Shaders,
60 ClipDistances,
61
62 PolygonModes,
63 PolygonModeFront,
64 PolygonModeBack,
65
66 ColorMask,
67 FrontFace,
68 CullTest,
69 DepthMask,
70 DepthTest,
71 StencilTest,
72 AlphaTest,
73 PrimitiveRestart,
74 PolygonOffset,
75 MultisampleControl,
76 RasterizeEnable,
77 FramebufferSRGB,
78 LogicOp,
79 FragmentClampColor,
80 PointSize,
81 ClipControl,
82 DepthClampEnabled,
83
84 Last
85};
86static_assert(Last <= std::numeric_limits<u8>::max());
87
88} // namespace Dirty
89
90class StateTracker {
91public:
92 explicit StateTracker(Core::System& system);
93
94 void Initialize();
95
96 void BindIndexBuffer(GLuint new_index_buffer) {
97 if (index_buffer == new_index_buffer) {
98 return;
99 }
100 index_buffer = new_index_buffer;
101 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
102 }
103
104 void NotifyScreenDrawVertexArray() {
105 auto& flags = system.GPU().Maxwell3D().dirty.flags;
106 flags[OpenGL::Dirty::VertexFormats] = true;
107 flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
108 flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
109
110 flags[OpenGL::Dirty::VertexBuffers] = true;
111 flags[OpenGL::Dirty::VertexBuffer0] = true;
112
113 flags[OpenGL::Dirty::VertexInstances] = true;
114 flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
115 flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
116 }
117
118 void NotifyPolygonModes() {
119 auto& flags = system.GPU().Maxwell3D().dirty.flags;
120 flags[OpenGL::Dirty::PolygonModes] = true;
121 flags[OpenGL::Dirty::PolygonModeFront] = true;
122 flags[OpenGL::Dirty::PolygonModeBack] = true;
123 }
124
125 void NotifyViewport0() {
126 auto& flags = system.GPU().Maxwell3D().dirty.flags;
127 flags[OpenGL::Dirty::Viewports] = true;
128 flags[OpenGL::Dirty::Viewport0] = true;
129 }
130
131 void NotifyScissor0() {
132 auto& flags = system.GPU().Maxwell3D().dirty.flags;
133 flags[OpenGL::Dirty::Scissors] = true;
134 flags[OpenGL::Dirty::Scissor0] = true;
135 }
136
137 void NotifyColorMask0() {
138 auto& flags = system.GPU().Maxwell3D().dirty.flags;
139 flags[OpenGL::Dirty::ColorMasks] = true;
140 flags[OpenGL::Dirty::ColorMask0] = true;
141 }
142
143 void NotifyBlend0() {
144 auto& flags = system.GPU().Maxwell3D().dirty.flags;
145 flags[OpenGL::Dirty::BlendStates] = true;
146 flags[OpenGL::Dirty::BlendState0] = true;
147 }
148
149 void NotifyFramebuffer() {
150 auto& flags = system.GPU().Maxwell3D().dirty.flags;
151 flags[VideoCommon::Dirty::RenderTargets] = true;
152 }
153
154 void NotifyFrontFace() {
155 auto& flags = system.GPU().Maxwell3D().dirty.flags;
156 flags[OpenGL::Dirty::FrontFace] = true;
157 }
158
159 void NotifyCullTest() {
160 auto& flags = system.GPU().Maxwell3D().dirty.flags;
161 flags[OpenGL::Dirty::CullTest] = true;
162 }
163
164 void NotifyDepthMask() {
165 auto& flags = system.GPU().Maxwell3D().dirty.flags;
166 flags[OpenGL::Dirty::DepthMask] = true;
167 }
168
169 void NotifyDepthTest() {
170 auto& flags = system.GPU().Maxwell3D().dirty.flags;
171 flags[OpenGL::Dirty::DepthTest] = true;
172 }
173
174 void NotifyStencilTest() {
175 auto& flags = system.GPU().Maxwell3D().dirty.flags;
176 flags[OpenGL::Dirty::StencilTest] = true;
177 }
178
179 void NotifyPolygonOffset() {
180 auto& flags = system.GPU().Maxwell3D().dirty.flags;
181 flags[OpenGL::Dirty::PolygonOffset] = true;
182 }
183
184 void NotifyRasterizeEnable() {
185 auto& flags = system.GPU().Maxwell3D().dirty.flags;
186 flags[OpenGL::Dirty::RasterizeEnable] = true;
187 }
188
189 void NotifyFramebufferSRGB() {
190 auto& flags = system.GPU().Maxwell3D().dirty.flags;
191 flags[OpenGL::Dirty::FramebufferSRGB] = true;
192 }
193
194 void NotifyLogicOp() {
195 auto& flags = system.GPU().Maxwell3D().dirty.flags;
196 flags[OpenGL::Dirty::LogicOp] = true;
197 }
198
199 void NotifyClipControl() {
200 auto& flags = system.GPU().Maxwell3D().dirty.flags;
201 flags[OpenGL::Dirty::ClipControl] = true;
202 }
203
204 void NotifyAlphaTest() {
205 auto& flags = system.GPU().Maxwell3D().dirty.flags;
206 flags[OpenGL::Dirty::AlphaTest] = true;
207 }
208
209private:
210 Core::System& system;
211
212 GLuint index_buffer = 0;
213};
214
215} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 35ba334e4..6ec328c53 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -7,7 +7,6 @@
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/microprofile.h" 9#include "common/microprofile.h"
10#include "video_core/renderer_opengl/gl_state.h"
11#include "video_core/renderer_opengl/gl_stream_buffer.h" 10#include "video_core/renderer_opengl/gl_stream_buffer.h"
12 11
13MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 12MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index cf934b0d8..f424e3000 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -10,7 +10,7 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "video_core/morton.h" 11#include "video_core/morton.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 13#include "video_core/renderer_opengl/gl_state_tracker.h"
14#include "video_core/renderer_opengl/gl_texture_cache.h" 14#include "video_core/renderer_opengl/gl_texture_cache.h"
15#include "video_core/renderer_opengl/utils.h" 15#include "video_core/renderer_opengl/utils.h"
16#include "video_core/texture_cache/surface_base.h" 16#include "video_core/texture_cache/surface_base.h"
@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI 53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U 55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
56 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
56 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI 57 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
57 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F 58 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
58 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI 59 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
@@ -397,6 +398,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
397 const bool is_proxy) 398 const bool is_proxy)
398 : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { 399 : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
399 target = GetTextureTarget(params.target); 400 target = GetTextureTarget(params.target);
401 format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
400 if (!is_proxy) { 402 if (!is_proxy) {
401 texture_view = CreateTextureView(); 403 texture_view = CreateTextureView();
402 } 404 }
@@ -467,25 +469,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
467} 469}
468 470
469OGLTextureView CachedSurfaceView::CreateTextureView() const { 471OGLTextureView CachedSurfaceView::CreateTextureView() const {
470 const auto& owner_params = surface.GetSurfaceParams();
471 OGLTextureView texture_view; 472 OGLTextureView texture_view;
472 texture_view.Create(); 473 texture_view.Create();
473 474
474 const GLuint handle{texture_view.handle}; 475 glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
475 const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
476
477 glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
478 params.num_levels, params.base_layer, params.num_layers); 476 params.num_levels, params.base_layer, params.num_layers);
479 477 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
480 ApplyTextureDefaults(owner_params, handle);
481 478
482 return texture_view; 479 return texture_view;
483} 480}
484 481
485TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, 482TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
486 VideoCore::RasterizerInterface& rasterizer, 483 VideoCore::RasterizerInterface& rasterizer,
487 const Device& device) 484 const Device& device, StateTracker& state_tracker)
488 : TextureCacheBase{system, rasterizer} { 485 : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
489 src_framebuffer.Create(); 486 src_framebuffer.Create();
490 dst_framebuffer.Create(); 487 dst_framebuffer.Create();
491} 488}
@@ -519,25 +516,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
519 const Tegra::Engines::Fermi2D::Config& copy_config) { 516 const Tegra::Engines::Fermi2D::Config& copy_config) {
520 const auto& src_params{src_view->GetSurfaceParams()}; 517 const auto& src_params{src_view->GetSurfaceParams()};
521 const auto& dst_params{dst_view->GetSurfaceParams()}; 518 const auto& dst_params{dst_view->GetSurfaceParams()};
519 UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
520 UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
522 521
523 OpenGLState prev_state{OpenGLState::GetCurState()}; 522 state_tracker.NotifyScissor0();
524 SCOPE_EXIT({ 523 state_tracker.NotifyFramebuffer();
525 prev_state.AllDirty(); 524 state_tracker.NotifyRasterizeEnable();
526 prev_state.Apply(); 525 state_tracker.NotifyFramebufferSRGB();
527 });
528
529 OpenGLState state;
530 state.draw.read_framebuffer = src_framebuffer.handle;
531 state.draw.draw_framebuffer = dst_framebuffer.handle;
532 state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
533 state.AllDirty();
534 state.Apply();
535 526
536 u32 buffers{}; 527 if (dst_params.srgb_conversion) {
528 glEnable(GL_FRAMEBUFFER_SRGB);
529 } else {
530 glDisable(GL_FRAMEBUFFER_SRGB);
531 }
532 glDisable(GL_RASTERIZER_DISCARD);
533 glDisablei(GL_SCISSOR_TEST, 0);
537 534
538 UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); 535 glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
539 UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); 536 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
540 537
538 GLenum buffers = 0;
541 if (src_params.type == SurfaceType::ColorTexture) { 539 if (src_params.type == SurfaceType::ColorTexture) {
542 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); 540 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
543 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 541 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8e13ab38b..6658c6ffd 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -27,6 +27,7 @@ using VideoCommon::ViewParams;
27class CachedSurfaceView; 27class CachedSurfaceView;
28class CachedSurface; 28class CachedSurface;
29class TextureCacheOpenGL; 29class TextureCacheOpenGL;
30class StateTracker;
30 31
31using Surface = std::shared_ptr<CachedSurface>; 32using Surface = std::shared_ptr<CachedSurface>;
32using View = std::shared_ptr<CachedSurfaceView>; 33using View = std::shared_ptr<CachedSurfaceView>;
@@ -96,6 +97,10 @@ public:
96 return texture_view.handle; 97 return texture_view.handle;
97 } 98 }
98 99
100 GLenum GetFormat() const {
101 return format;
102 }
103
99 const SurfaceParams& GetSurfaceParams() const { 104 const SurfaceParams& GetSurfaceParams() const {
100 return surface.GetSurfaceParams(); 105 return surface.GetSurfaceParams();
101 } 106 }
@@ -113,6 +118,7 @@ private:
113 118
114 CachedSurface& surface; 119 CachedSurface& surface;
115 GLenum target{}; 120 GLenum target{};
121 GLenum format{};
116 122
117 OGLTextureView texture_view; 123 OGLTextureView texture_view;
118 u32 swizzle{}; 124 u32 swizzle{};
@@ -122,7 +128,7 @@ private:
122class TextureCacheOpenGL final : public TextureCacheBase { 128class TextureCacheOpenGL final : public TextureCacheBase {
123public: 129public:
124 explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 130 explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
125 const Device& device); 131 const Device& device, StateTracker& state_tracker);
126 ~TextureCacheOpenGL(); 132 ~TextureCacheOpenGL();
127 133
128protected: 134protected:
@@ -139,6 +145,8 @@ protected:
139private: 145private:
140 GLuint FetchPBO(std::size_t buffer_size); 146 GLuint FetchPBO(std::size_t buffer_size);
141 147
148 StateTracker& state_tracker;
149
142 OGLFramebuffer src_framebuffer; 150 OGLFramebuffer src_framebuffer;
143 OGLFramebuffer dst_framebuffer; 151 OGLFramebuffer dst_framebuffer;
144 std::unordered_map<u32, OGLBuffer> copy_pbo_cache; 152 std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 7ed505628..89f0e04ef 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
92 } 92 }
93 case Maxwell::VertexAttribute::Type::UnsignedScaled: 93 case Maxwell::VertexAttribute::Type::UnsignedScaled:
94 switch (attrib.size) { 94 switch (attrib.size) {
95 case Maxwell::VertexAttribute::Size::Size_8:
95 case Maxwell::VertexAttribute::Size::Size_8_8: 96 case Maxwell::VertexAttribute::Size::Size_8_8:
97 case Maxwell::VertexAttribute::Size::Size_8_8_8:
98 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
96 return GL_UNSIGNED_BYTE; 99 return GL_UNSIGNED_BYTE;
100 case Maxwell::VertexAttribute::Size::Size_16:
101 case Maxwell::VertexAttribute::Size::Size_16_16:
102 case Maxwell::VertexAttribute::Size::Size_16_16_16:
103 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
104 return GL_UNSIGNED_SHORT;
105 default:
106 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
107 return {};
108 }
109 case Maxwell::VertexAttribute::Type::SignedScaled:
110 switch (attrib.size) {
111 case Maxwell::VertexAttribute::Size::Size_8:
112 case Maxwell::VertexAttribute::Size::Size_8_8:
113 case Maxwell::VertexAttribute::Size::Size_8_8_8:
114 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
115 return GL_BYTE;
116 case Maxwell::VertexAttribute::Size::Size_16:
117 case Maxwell::VertexAttribute::Size::Size_16_16:
118 case Maxwell::VertexAttribute::Size::Size_16_16_16:
119 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
120 return GL_SHORT;
97 default: 121 default:
98 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); 122 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
99 return {}; 123 return {};
@@ -401,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
401 return GL_KEEP; 425 return GL_KEEP;
402} 426}
403 427
404inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { 428inline GLenum FrontFace(Maxwell::FrontFace front_face) {
405 switch (front_face) { 429 switch (front_face) {
406 case Maxwell::Cull::FrontFace::ClockWise: 430 case Maxwell::FrontFace::ClockWise:
407 return GL_CW; 431 return GL_CW;
408 case Maxwell::Cull::FrontFace::CounterClockWise: 432 case Maxwell::FrontFace::CounterClockWise:
409 return GL_CCW; 433 return GL_CCW;
410 } 434 }
411 LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); 435 LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
412 return GL_CCW; 436 return GL_CCW;
413} 437}
414 438
415inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { 439inline GLenum CullFace(Maxwell::CullFace cull_face) {
416 switch (cull_face) { 440 switch (cull_face) {
417 case Maxwell::Cull::CullFace::Front: 441 case Maxwell::CullFace::Front:
418 return GL_FRONT; 442 return GL_FRONT;
419 case Maxwell::Cull::CullFace::Back: 443 case Maxwell::CullFace::Back:
420 return GL_BACK; 444 return GL_BACK;
421 case Maxwell::Cull::CullFace::FrontAndBack: 445 case Maxwell::CullFace::FrontAndBack:
422 return GL_FRONT_AND_BACK; 446 return GL_FRONT_AND_BACK;
423 } 447 }
424 LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); 448 LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
@@ -464,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
464 return GL_COPY; 488 return GL_COPY;
465} 489}
466 490
491inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
492 switch (polygon_mode) {
493 case Maxwell::PolygonMode::Point:
494 return GL_POINT;
495 case Maxwell::PolygonMode::Line:
496 return GL_LINE;
497 case Maxwell::PolygonMode::Fill:
498 return GL_FILL;
499 }
500 UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
501 return GL_FILL;
502}
503
467} // namespace MaxwellToGL 504} // namespace MaxwellToGL
468} // namespace OpenGL 505} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a4340b502..fca5e3ec0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,8 +5,11 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <cstring>
8#include <memory> 9#include <memory>
10
9#include <glad/glad.h> 11#include <glad/glad.h>
12
10#include "common/assert.h" 13#include "common/assert.h"
11#include "common/logging/log.h" 14#include "common/logging/log.h"
12#include "common/microprofile.h" 15#include "common/microprofile.h"
@@ -20,10 +23,13 @@
20#include "core/telemetry_session.h" 23#include "core/telemetry_session.h"
21#include "video_core/morton.h" 24#include "video_core/morton.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 25#include "video_core/renderer_opengl/gl_rasterizer.h"
26#include "video_core/renderer_opengl/gl_shader_manager.h"
23#include "video_core/renderer_opengl/renderer_opengl.h" 27#include "video_core/renderer_opengl/renderer_opengl.h"
24 28
25namespace OpenGL { 29namespace OpenGL {
26 30
31namespace {
32
27// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have 33// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
28// to wait on available presentation frames. 34// to wait on available presentation frames.
29constexpr std::size_t SWAP_CHAIN_SIZE = 3; 35constexpr std::size_t SWAP_CHAIN_SIZE = 3;
@@ -40,133 +46,13 @@ struct Frame {
40 bool is_srgb{}; /// Framebuffer is sRGB or RGB 46 bool is_srgb{}; /// Framebuffer is sRGB or RGB
41}; 47};
42 48
43/** 49constexpr char VERTEX_SHADER[] = R"(
44 * For smooth Vsync rendering, we want to always present the latest frame that the core generates, 50#version 430 core
45 * but also make sure that rendering happens at the pace that the frontend dictates. This is a
46 * helper class that the renderer uses to sync frames between the render thread and the presentation
47 * thread
48 */
49class FrameMailbox {
50public:
51 std::mutex swap_chain_lock;
52 std::condition_variable present_cv;
53 std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
54 std::queue<Frame*> free_queue;
55 std::deque<Frame*> present_queue;
56 Frame* previous_frame{};
57
58 FrameMailbox() {
59 for (auto& frame : swap_chain) {
60 free_queue.push(&frame);
61 }
62 }
63
64 ~FrameMailbox() {
65 // lock the mutex and clear out the present and free_queues and notify any people who are
66 // blocked to prevent deadlock on shutdown
67 std::scoped_lock lock{swap_chain_lock};
68 std::queue<Frame*>().swap(free_queue);
69 present_queue.clear();
70 present_cv.notify_all();
71 }
72
73 void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
74 frame->present.Release();
75 frame->present.Create();
76 GLint previous_draw_fbo{};
77 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
78 glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
79 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
80 frame->color.handle);
81 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
82 LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
83 }
84 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
85 frame->color_reloaded = false;
86 }
87
88 void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
89 OpenGLState prev_state = OpenGLState::GetCurState();
90 OpenGLState state = OpenGLState::GetCurState();
91
92 // Recreate the color texture attachment
93 frame->color.Release();
94 frame->color.Create();
95 state.renderbuffer = frame->color.handle;
96 state.Apply();
97 glRenderbufferStorage(GL_RENDERBUFFER, frame->is_srgb ? GL_SRGB8 : GL_RGB8, width, height);
98
99 // Recreate the FBO for the render target
100 frame->render.Release();
101 frame->render.Create();
102 state.draw.read_framebuffer = frame->render.handle;
103 state.draw.draw_framebuffer = frame->render.handle;
104 state.Apply();
105 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
106 frame->color.handle);
107 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
108 LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
109 }
110 prev_state.Apply();
111 frame->width = width;
112 frame->height = height;
113 frame->color_reloaded = true;
114 }
115
116 Frame* GetRenderFrame() {
117 std::unique_lock lock{swap_chain_lock};
118
119 // If theres no free frames, we will reuse the oldest render frame
120 if (free_queue.empty()) {
121 auto frame = present_queue.back();
122 present_queue.pop_back();
123 return frame;
124 }
125
126 Frame* frame = free_queue.front();
127 free_queue.pop();
128 return frame;
129 }
130
131 void ReleaseRenderFrame(Frame* frame) {
132 std::unique_lock lock{swap_chain_lock};
133 present_queue.push_front(frame);
134 present_cv.notify_one();
135 }
136
137 Frame* TryGetPresentFrame(int timeout_ms) {
138 std::unique_lock lock{swap_chain_lock};
139 // wait for new entries in the present_queue
140 present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
141 [&] { return !present_queue.empty(); });
142 if (present_queue.empty()) {
143 // timed out waiting for a frame to draw so return the previous frame
144 return previous_frame;
145 }
146
147 // free the previous frame and add it back to the free queue
148 if (previous_frame) {
149 free_queue.push(previous_frame);
150 }
151 51
152 // the newest entries are pushed to the front of the queue 52out gl_PerVertex {
153 Frame* frame = present_queue.front(); 53 vec4 gl_Position;
154 present_queue.pop_front();
155 // remove all old entries from the present queue and move them back to the free_queue
156 for (auto f : present_queue) {
157 free_queue.push(f);
158 }
159 present_queue.clear();
160 previous_frame = frame;
161 return frame;
162 }
163}; 54};
164 55
165namespace {
166
167constexpr char vertex_shader[] = R"(
168#version 430 core
169
170layout (location = 0) in vec2 vert_position; 56layout (location = 0) in vec2 vert_position;
171layout (location = 1) in vec2 vert_tex_coord; 57layout (location = 1) in vec2 vert_tex_coord;
172layout (location = 0) out vec2 frag_tex_coord; 58layout (location = 0) out vec2 frag_tex_coord;
@@ -187,7 +73,7 @@ void main() {
187} 73}
188)"; 74)";
189 75
190constexpr char fragment_shader[] = R"( 76constexpr char FRAGMENT_SHADER[] = R"(
191#version 430 core 77#version 430 core
192 78
193layout (location = 0) in vec2 frag_tex_coord; 79layout (location = 0) in vec2 frag_tex_coord;
@@ -196,7 +82,7 @@ layout (location = 0) out vec4 color;
196layout (binding = 0) uniform sampler2D color_texture; 82layout (binding = 0) uniform sampler2D color_texture;
197 83
198void main() { 84void main() {
199 color = texture(color_texture, frag_tex_coord); 85 color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
200} 86}
201)"; 87)";
202 88
@@ -205,13 +91,31 @@ constexpr GLint TexCoordLocation = 1;
205constexpr GLint ModelViewMatrixLocation = 0; 91constexpr GLint ModelViewMatrixLocation = 0;
206 92
207struct ScreenRectVertex { 93struct ScreenRectVertex {
208 constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) 94 constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
209 : position{{x, y}}, tex_coord{{u, v}} {} 95 : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
210 96
211 std::array<GLfloat, 2> position; 97 std::array<GLfloat, 2> position;
212 std::array<GLfloat, 2> tex_coord; 98 std::array<GLfloat, 2> tex_coord;
213}; 99};
214 100
101/// Returns true if any debug tool is attached
102bool HasDebugTool() {
103 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
104 if (nsight) {
105 return true;
106 }
107
108 GLint num_extensions;
109 glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
110 for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
111 const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
112 if (!std::strcmp(name, "GL_EXT_debug_tool")) {
113 return true;
114 }
115 }
116 return false;
117}
118
215/** 119/**
216 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left 120 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
217 * corner and (width, height) on the lower-bottom. 121 * corner and (width, height) on the lower-bottom.
@@ -295,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
295 199
296} // Anonymous namespace 200} // Anonymous namespace
297 201
202/**
203 * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
204 * but also make sure that rendering happens at the pace that the frontend dictates. This is a
205 * helper class that the renderer uses to sync frames between the render thread and the presentation
206 * thread
207 */
208class FrameMailbox {
209public:
210 std::mutex swap_chain_lock;
211 std::condition_variable present_cv;
212 std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
213 std::queue<Frame*> free_queue;
214 std::deque<Frame*> present_queue;
215 Frame* previous_frame{};
216
217 FrameMailbox() : has_debug_tool{HasDebugTool()} {
218 for (auto& frame : swap_chain) {
219 free_queue.push(&frame);
220 }
221 }
222
223 ~FrameMailbox() {
224 // lock the mutex and clear out the present and free_queues and notify any people who are
225 // blocked to prevent deadlock on shutdown
226 std::scoped_lock lock{swap_chain_lock};
227 std::queue<Frame*>().swap(free_queue);
228 present_queue.clear();
229 present_cv.notify_all();
230 }
231
232 void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
233 frame->present.Release();
234 frame->present.Create();
235 GLint previous_draw_fbo{};
236 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
237 glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
238 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
239 frame->color.handle);
240 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
241 LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
242 }
243 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
244 frame->color_reloaded = false;
245 }
246
247 void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
248 // Recreate the color texture attachment
249 frame->color.Release();
250 frame->color.Create();
251 const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
252 glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
253
254 // Recreate the FBO for the render target
255 frame->render.Release();
256 frame->render.Create();
257 glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
258 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
259 frame->color.handle);
260 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
261 LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
262 }
263
264 frame->width = width;
265 frame->height = height;
266 frame->color_reloaded = true;
267 }
268
269 Frame* GetRenderFrame() {
270 std::unique_lock lock{swap_chain_lock};
271
272 // If theres no free frames, we will reuse the oldest render frame
273 if (free_queue.empty()) {
274 auto frame = present_queue.back();
275 present_queue.pop_back();
276 return frame;
277 }
278
279 Frame* frame = free_queue.front();
280 free_queue.pop();
281 return frame;
282 }
283
284 void ReleaseRenderFrame(Frame* frame) {
285 std::unique_lock lock{swap_chain_lock};
286 present_queue.push_front(frame);
287 present_cv.notify_one();
288
289 DebugNotifyNextFrame();
290 }
291
292 Frame* TryGetPresentFrame(int timeout_ms) {
293 DebugWaitForNextFrame();
294
295 std::unique_lock lock{swap_chain_lock};
296 // wait for new entries in the present_queue
297 present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
298 [&] { return !present_queue.empty(); });
299 if (present_queue.empty()) {
300 // timed out waiting for a frame to draw so return the previous frame
301 return previous_frame;
302 }
303
304 // free the previous frame and add it back to the free queue
305 if (previous_frame) {
306 free_queue.push(previous_frame);
307 }
308
309 // the newest entries are pushed to the front of the queue
310 Frame* frame = present_queue.front();
311 present_queue.pop_front();
312 // remove all old entries from the present queue and move them back to the free_queue
313 for (auto f : present_queue) {
314 free_queue.push(f);
315 }
316 present_queue.clear();
317 previous_frame = frame;
318 return frame;
319 }
320
321private:
322 std::mutex debug_synch_mutex;
323 std::condition_variable debug_synch_condition;
324 std::atomic_int frame_for_debug{};
325 const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
326
327 /// Signal that a new frame is available (called from GPU thread)
328 void DebugNotifyNextFrame() {
329 if (!has_debug_tool) {
330 return;
331 }
332 frame_for_debug++;
333 std::lock_guard lock{debug_synch_mutex};
334 debug_synch_condition.notify_one();
335 }
336
337 /// Wait for a new frame to be available (called from presentation thread)
338 void DebugWaitForNextFrame() {
339 if (!has_debug_tool) {
340 return;
341 }
342 const int last_frame = frame_for_debug;
343 std::unique_lock lock{debug_synch_mutex};
344 debug_synch_condition.wait(lock,
345 [this, last_frame] { return frame_for_debug > last_frame; });
346 }
347};
348
298RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) 349RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
299 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, 350 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
300 frame_mailbox{std::make_unique<FrameMailbox>()} {} 351 frame_mailbox{std::make_unique<FrameMailbox>()} {}
@@ -311,11 +362,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
311 return; 362 return;
312 } 363 }
313 364
314 // Maintain the rasterizer's state as a priority
315 OpenGLState prev_state = OpenGLState::GetCurState();
316 state.AllDirty();
317 state.Apply();
318
319 PrepareRendertarget(framebuffer); 365 PrepareRendertarget(framebuffer);
320 RenderScreenshot(); 366 RenderScreenshot();
321 367
@@ -358,8 +404,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
358 frame->is_srgb = screen_info.display_srgb; 404 frame->is_srgb = screen_info.display_srgb;
359 frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); 405 frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
360 } 406 }
361 state.draw.draw_framebuffer = frame->render.handle; 407 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
362 state.Apply();
363 DrawScreen(layout); 408 DrawScreen(layout);
364 // Create a fence for the frontend to wait on and swap this frame to OffTex 409 // Create a fence for the frontend to wait on and swap this frame to OffTex
365 frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 410 frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
@@ -368,10 +413,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
368 m_current_frame++; 413 m_current_frame++;
369 rasterizer->TickFrame(); 414 rasterizer->TickFrame();
370 } 415 }
371
372 // Restore the rasterizer state
373 prev_state.AllDirty();
374 prev_state.Apply();
375} 416}
376 417
377void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { 418void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
@@ -442,31 +483,24 @@ void RendererOpenGL::InitOpenGLObjects() {
442 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 483 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
443 0.0f); 484 0.0f);
444 485
445 // Link shaders and get variable locations 486 // Create shader programs
446 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); 487 OGLShader vertex_shader;
447 state.draw.shader_program = shader.handle; 488 vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
448 state.AllDirty(); 489
449 state.Apply(); 490 OGLShader fragment_shader;
491 fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
492
493 vertex_program.Create(true, false, vertex_shader.handle);
494 fragment_program.Create(true, false, fragment_shader.handle);
495
496 // Create program pipeline
497 program_manager.Create();
450 498
451 // Generate VBO handle for drawing 499 // Generate VBO handle for drawing
452 vertex_buffer.Create(); 500 vertex_buffer.Create();
453 501
454 // Generate VAO
455 vertex_array.Create();
456 state.draw.vertex_array = vertex_array.handle;
457
458 // Attach vertex data to VAO 502 // Attach vertex data to VAO
459 glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); 503 glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
460 glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
461 offsetof(ScreenRectVertex, position));
462 glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
463 offsetof(ScreenRectVertex, tex_coord));
464 glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
465 glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
466 glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
467 glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
468 glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
469 sizeof(ScreenRectVertex));
470 504
471 // Allocate textures for the screen 505 // Allocate textures for the screen
472 screen_info.texture.resource.Create(GL_TEXTURE_2D); 506 screen_info.texture.resource.Create(GL_TEXTURE_2D);
@@ -499,7 +533,8 @@ void RendererOpenGL::CreateRasterizer() {
499 if (rasterizer) { 533 if (rasterizer) {
500 return; 534 return;
501 } 535 }
502 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); 536 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
537 program_manager, state_tracker);
503} 538}
504 539
505void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 540void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -538,8 +573,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
538 glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); 573 glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
539} 574}
540 575
541void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, 576void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
542 float h) { 577 if (renderer_settings.set_background_color) {
578 // Update background color before drawing
579 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
580 0.0f);
581 }
582
583 // Set projection matrix
584 const std::array ortho_matrix =
585 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
586 glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
587 std::data(ortho_matrix));
588
543 const auto& texcoords = screen_info.display_texcoords; 589 const auto& texcoords = screen_info.display_texcoords;
544 auto left = texcoords.left; 590 auto left = texcoords.left;
545 auto right = texcoords.right; 591 auto right = texcoords.right;
@@ -571,46 +617,79 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
571 static_cast<f32>(screen_info.texture.height); 617 static_cast<f32>(screen_info.texture.height);
572 } 618 }
573 619
620 const auto& screen = layout.screen;
574 const std::array vertices = { 621 const std::array vertices = {
575 ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v), 622 ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
576 ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v), 623 ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
577 ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v), 624 ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
578 ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), 625 ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
579 }; 626 };
580
581 state.textures[0] = screen_info.display_texture;
582 state.framebuffer_srgb.enabled = screen_info.display_srgb;
583 state.AllDirty();
584 state.Apply();
585 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); 627 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
586 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
587 // Restore default state
588 state.framebuffer_srgb.enabled = false;
589 state.textures[0] = 0;
590 state.AllDirty();
591 state.Apply();
592}
593 628
594void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { 629 // TODO: Signal state tracker about these changes
595 if (renderer_settings.set_background_color) { 630 state_tracker.NotifyScreenDrawVertexArray();
596 // Update background color before drawing 631 state_tracker.NotifyPolygonModes();
597 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 632 state_tracker.NotifyViewport0();
598 0.0f); 633 state_tracker.NotifyScissor0();
634 state_tracker.NotifyColorMask0();
635 state_tracker.NotifyBlend0();
636 state_tracker.NotifyFramebuffer();
637 state_tracker.NotifyFrontFace();
638 state_tracker.NotifyCullTest();
639 state_tracker.NotifyDepthTest();
640 state_tracker.NotifyStencilTest();
641 state_tracker.NotifyPolygonOffset();
642 state_tracker.NotifyRasterizeEnable();
643 state_tracker.NotifyFramebufferSRGB();
644 state_tracker.NotifyLogicOp();
645 state_tracker.NotifyClipControl();
646 state_tracker.NotifyAlphaTest();
647
648 program_manager.UseVertexShader(vertex_program.handle);
649 program_manager.UseGeometryShader(0);
650 program_manager.UseFragmentShader(fragment_program.handle);
651 program_manager.BindGraphicsPipeline();
652
653 glEnable(GL_CULL_FACE);
654 if (screen_info.display_srgb) {
655 glEnable(GL_FRAMEBUFFER_SRGB);
656 } else {
657 glDisable(GL_FRAMEBUFFER_SRGB);
599 } 658 }
659 glDisable(GL_COLOR_LOGIC_OP);
660 glDisable(GL_DEPTH_TEST);
661 glDisable(GL_STENCIL_TEST);
662 glDisable(GL_POLYGON_OFFSET_FILL);
663 glDisable(GL_RASTERIZER_DISCARD);
664 glDisable(GL_ALPHA_TEST);
665 glDisablei(GL_BLEND, 0);
666 glDisablei(GL_SCISSOR_TEST, 0);
667 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
668 glCullFace(GL_BACK);
669 glFrontFace(GL_CW);
670 glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
671 glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
672 glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
673 static_cast<GLfloat>(layout.height));
674 glDepthRangeIndexed(0, 0.0, 0.0);
675
676 glEnableVertexAttribArray(PositionLocation);
677 glEnableVertexAttribArray(TexCoordLocation);
678 glVertexAttribDivisor(PositionLocation, 0);
679 glVertexAttribDivisor(TexCoordLocation, 0);
680 glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
681 offsetof(ScreenRectVertex, position));
682 glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
683 offsetof(ScreenRectVertex, tex_coord));
684 glVertexAttribBinding(PositionLocation, 0);
685 glVertexAttribBinding(TexCoordLocation, 0);
686 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
687
688 glBindTextureUnit(0, screen_info.display_texture);
689 glBindSampler(0, 0);
600 690
601 const auto& screen = layout.screen;
602
603 glViewport(0, 0, layout.width, layout.height);
604 glClear(GL_COLOR_BUFFER_BIT); 691 glClear(GL_COLOR_BUFFER_BIT);
605 692 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
606 // Set projection matrix
607 const std::array ortho_matrix =
608 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
609 glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
610
611 DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
612 static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
613 static_cast<float>(screen.GetHeight()));
614} 693}
615 694
616void RendererOpenGL::TryPresent(int timeout_ms) { 695void RendererOpenGL::TryPresent(int timeout_ms) {
@@ -653,13 +732,14 @@ void RendererOpenGL::RenderScreenshot() {
653 return; 732 return;
654 } 733 }
655 734
735 GLint old_read_fb;
736 GLint old_draw_fb;
737 glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
738 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
739
656 // Draw the current frame to the screenshot framebuffer 740 // Draw the current frame to the screenshot framebuffer
657 screenshot_framebuffer.Create(); 741 screenshot_framebuffer.Create();
658 GLuint old_read_fb = state.draw.read_framebuffer; 742 glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
659 GLuint old_draw_fb = state.draw.draw_framebuffer;
660 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
661 state.AllDirty();
662 state.Apply();
663 743
664 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 744 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
665 745
@@ -676,12 +756,11 @@ void RendererOpenGL::RenderScreenshot() {
676 renderer_settings.screenshot_bits); 756 renderer_settings.screenshot_bits);
677 757
678 screenshot_framebuffer.Release(); 758 screenshot_framebuffer.Release();
679 state.draw.read_framebuffer = old_read_fb;
680 state.draw.draw_framebuffer = old_draw_fb;
681 state.AllDirty();
682 state.Apply();
683 glDeleteRenderbuffers(1, &renderbuffer); 759 glDeleteRenderbuffers(1, &renderbuffer);
684 760
761 glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
762 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
763
685 renderer_settings.screenshot_complete_callback(); 764 renderer_settings.screenshot_complete_callback();
686 renderer_settings.screenshot_requested = false; 765 renderer_settings.screenshot_requested = false;
687} 766}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index d45e69cbc..33073ce5b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,7 +10,8 @@
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 13#include "video_core/renderer_opengl/gl_shader_manager.h"
14#include "video_core/renderer_opengl/gl_state_tracker.h"
14 15
15namespace Core { 16namespace Core {
16class System; 17class System;
@@ -76,8 +77,6 @@ private:
76 /// Draws the emulated screens to the emulator window. 77 /// Draws the emulated screens to the emulator window.
77 void DrawScreen(const Layout::FramebufferLayout& layout); 78 void DrawScreen(const Layout::FramebufferLayout& layout);
78 79
79 void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
80
81 void RenderScreenshot(); 80 void RenderScreenshot();
82 81
83 /// Loads framebuffer from emulated memory into the active OpenGL texture. 82 /// Loads framebuffer from emulated memory into the active OpenGL texture.
@@ -93,17 +92,20 @@ private:
93 Core::Frontend::EmuWindow& emu_window; 92 Core::Frontend::EmuWindow& emu_window;
94 Core::System& system; 93 Core::System& system;
95 94
96 OpenGLState state; 95 StateTracker state_tracker{system};
97 96
98 // OpenGL object IDs 97 // OpenGL object IDs
99 OGLVertexArray vertex_array;
100 OGLBuffer vertex_buffer; 98 OGLBuffer vertex_buffer;
101 OGLProgram shader; 99 OGLProgram vertex_program;
100 OGLProgram fragment_program;
102 OGLFramebuffer screenshot_framebuffer; 101 OGLFramebuffer screenshot_framebuffer;
103 102
104 /// Display information for Switch screen 103 /// Display information for Switch screen
105 ScreenInfo screen_info; 104 ScreenInfo screen_info;
106 105
106 /// Global dummy shader pipeline
107 GLShader::ProgramManager program_manager;
108
107 /// OpenGL framebuffer data 109 /// OpenGL framebuffer data
108 std::vector<u8> gl_framebuffer_data; 110 std::vector<u8> gl_framebuffer_data;
109 111
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index ac99e6385..b751086fa 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -9,6 +9,7 @@
9#include <glad/glad.h> 9#include <glad/glad.h>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
12#include "video_core/renderer_opengl/utils.h" 13#include "video_core/renderer_opengl/utils.h"
13 14
14namespace OpenGL { 15namespace OpenGL {
@@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry {
20 GLsizei stride{}; 21 GLsizei stride{};
21}; 22};
22 23
23VertexArrayPushBuffer::VertexArrayPushBuffer() = default; 24VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
25 : state_tracker{state_tracker} {}
24 26
25VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; 27VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
26 28
27void VertexArrayPushBuffer::Setup(GLuint vao_) { 29void VertexArrayPushBuffer::Setup() {
28 vao = vao_;
29 index_buffer = nullptr; 30 index_buffer = nullptr;
30 vertex_buffers.clear(); 31 vertex_buffers.clear();
31} 32}
@@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint*
41 42
42void VertexArrayPushBuffer::Bind() { 43void VertexArrayPushBuffer::Bind() {
43 if (index_buffer) { 44 if (index_buffer) {
44 glVertexArrayElementBuffer(vao, *index_buffer); 45 state_tracker.BindIndexBuffer(*index_buffer);
45 } 46 }
46 47
47 // TODO(Rodrigo): Find a way to ARB_multi_bind this
48 for (const auto& entry : vertex_buffers) { 48 for (const auto& entry : vertex_buffers) {
49 glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, 49 glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
50 entry.stride);
51 } 50 }
52} 51}
53 52
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 3ad7c02d4..47ee3177b 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,12 +11,14 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class StateTracker;
15
14class VertexArrayPushBuffer final { 16class VertexArrayPushBuffer final {
15public: 17public:
16 explicit VertexArrayPushBuffer(); 18 explicit VertexArrayPushBuffer(StateTracker& state_tracker);
17 ~VertexArrayPushBuffer(); 19 ~VertexArrayPushBuffer();
18 20
19 void Setup(GLuint vao_); 21 void Setup();
20 22
21 void SetIndexBuffer(const GLuint* buffer); 23 void SetIndexBuffer(const GLuint* buffer);
22 24
@@ -28,7 +30,8 @@ public:
28private: 30private:
29 struct Entry; 31 struct Entry;
30 32
31 GLuint vao{}; 33 StateTracker& state_tracker;
34
32 const GLuint* index_buffer{}; 35 const GLuint* index_buffer{};
33 std::vector<Entry> vertex_buffers; 36 std::vector<Entry> vertex_buffers;
34}; 37};
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 4e3ff231e..2bb376555 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -112,19 +112,18 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
112 const auto& clip = regs.view_volume_clip_control; 112 const auto& clip = regs.view_volume_clip_control;
113 const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; 113 const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
114 114
115 Maxwell::Cull::FrontFace front_face = regs.cull.front_face; 115 Maxwell::FrontFace front_face = regs.front_face;
116 if (regs.screen_y_control.triangle_rast_flip != 0 && 116 if (regs.screen_y_control.triangle_rast_flip != 0 &&
117 regs.viewport_transform[0].scale_y > 0.0f) { 117 regs.viewport_transform[0].scale_y > 0.0f) {
118 if (front_face == Maxwell::Cull::FrontFace::CounterClockWise) 118 if (front_face == Maxwell::FrontFace::CounterClockWise)
119 front_face = Maxwell::Cull::FrontFace::ClockWise; 119 front_face = Maxwell::FrontFace::ClockWise;
120 else if (front_face == Maxwell::Cull::FrontFace::ClockWise) 120 else if (front_face == Maxwell::FrontFace::ClockWise)
121 front_face = Maxwell::Cull::FrontFace::CounterClockWise; 121 front_face = Maxwell::FrontFace::CounterClockWise;
122 } 122 }
123 123
124 const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; 124 const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
125 return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, 125 return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled,
126 depth_clamp_enabled, gl_ndc, regs.cull.cull_face, 126 depth_clamp_enabled, gl_ndc, regs.cull_face, front_face);
127 front_face);
128} 127}
129 128
130} // Anonymous namespace 129} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 87056ef37..4c8ba7f90 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -171,8 +171,8 @@ struct FixedPipelineState {
171 171
172 struct Rasterizer { 172 struct Rasterizer {
173 constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, 173 constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
174 bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face, 174 bool ndc_minus_one_to_one, Maxwell::CullFace cull_face,
175 Maxwell::Cull::FrontFace front_face) 175 Maxwell::FrontFace front_face)
176 : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, 176 : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
177 depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, 177 depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
178 cull_face{cull_face}, front_face{front_face} {} 178 cull_face{cull_face}, front_face{front_face} {}
@@ -182,8 +182,8 @@ struct FixedPipelineState {
182 bool depth_bias_enable; 182 bool depth_bias_enable;
183 bool depth_clamp_enable; 183 bool depth_clamp_enable;
184 bool ndc_minus_one_to_one; 184 bool ndc_minus_one_to_one;
185 Maxwell::Cull::CullFace cull_face; 185 Maxwell::CullFace cull_face;
186 Maxwell::Cull::FrontFace front_face; 186 Maxwell::FrontFace front_face;
187 187
188 std::size_t Hash() const noexcept; 188 std::size_t Hash() const noexcept;
189 189
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index ef66dd141..f93447610 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -120,11 +120,12 @@ struct FormatTuple {
120 {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI 120 {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI
121 {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U 121 {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U
122 {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U 122 {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U
123 {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage}, // A1B5G5R5U (flipped with swizzle) 123 {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle)
124 {vk::Format::eR8Unorm, Attachable | Storage}, // R8U 124 {vk::Format::eR8Unorm, Attachable | Storage}, // R8U
125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI 125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI
126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F 126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U 127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
128 {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
128 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI 129 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
129 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F 130 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
130 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI 131 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -256,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
256 return vk::ShaderStageFlagBits::eGeometry; 257 return vk::ShaderStageFlagBits::eGeometry;
257 case Tegra::Engines::ShaderType::Fragment: 258 case Tegra::Engines::ShaderType::Fragment:
258 return vk::ShaderStageFlagBits::eFragment; 259 return vk::ShaderStageFlagBits::eFragment;
260 case Tegra::Engines::ShaderType::Compute:
261 return vk::ShaderStageFlagBits::eCompute;
259 } 262 }
260 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); 263 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
261 return {}; 264 return {};
@@ -331,6 +334,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
331 return vk::Format::eR16G16B16Unorm; 334 return vk::Format::eR16G16B16Unorm;
332 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 335 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
333 return vk::Format::eR16G16B16A16Unorm; 336 return vk::Format::eR16G16B16A16Unorm;
337 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
338 return vk::Format::eA2B10G10R10UnormPack32;
334 default: 339 default:
335 break; 340 break;
336 } 341 }
@@ -364,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
364 return vk::Format::eR8G8B8A8Uint; 369 return vk::Format::eR8G8B8A8Uint;
365 case Maxwell::VertexAttribute::Size::Size_32: 370 case Maxwell::VertexAttribute::Size::Size_32:
366 return vk::Format::eR32Uint; 371 return vk::Format::eR32Uint;
372 case Maxwell::VertexAttribute::Size::Size_32_32:
373 return vk::Format::eR32G32Uint;
374 case Maxwell::VertexAttribute::Size::Size_32_32_32:
375 return vk::Format::eR32G32B32Uint;
367 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 376 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
368 return vk::Format::eR32G32B32A32Uint; 377 return vk::Format::eR32G32B32A32Uint;
369 default: 378 default:
@@ -371,8 +380,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
371 } 380 }
372 case Maxwell::VertexAttribute::Type::UnsignedScaled: 381 case Maxwell::VertexAttribute::Type::UnsignedScaled:
373 switch (size) { 382 switch (size) {
383 case Maxwell::VertexAttribute::Size::Size_8:
384 return vk::Format::eR8Uscaled;
374 case Maxwell::VertexAttribute::Size::Size_8_8: 385 case Maxwell::VertexAttribute::Size::Size_8_8:
375 return vk::Format::eR8G8Uscaled; 386 return vk::Format::eR8G8Uscaled;
387 case Maxwell::VertexAttribute::Size::Size_8_8_8:
388 return vk::Format::eR8G8B8Uscaled;
389 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
390 return vk::Format::eR8G8B8A8Uscaled;
391 case Maxwell::VertexAttribute::Size::Size_16:
392 return vk::Format::eR16Uscaled;
393 case Maxwell::VertexAttribute::Size::Size_16_16:
394 return vk::Format::eR16G16Uscaled;
395 case Maxwell::VertexAttribute::Size::Size_16_16_16:
396 return vk::Format::eR16G16B16Uscaled;
397 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
398 return vk::Format::eR16G16B16A16Uscaled;
376 default: 399 default:
377 break; 400 break;
378 } 401 }
@@ -572,24 +595,24 @@ vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
572 return {}; 595 return {};
573} 596}
574 597
575vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { 598vk::FrontFace FrontFace(Maxwell::FrontFace front_face) {
576 switch (front_face) { 599 switch (front_face) {
577 case Maxwell::Cull::FrontFace::ClockWise: 600 case Maxwell::FrontFace::ClockWise:
578 return vk::FrontFace::eClockwise; 601 return vk::FrontFace::eClockwise;
579 case Maxwell::Cull::FrontFace::CounterClockWise: 602 case Maxwell::FrontFace::CounterClockWise:
580 return vk::FrontFace::eCounterClockwise; 603 return vk::FrontFace::eCounterClockwise;
581 } 604 }
582 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); 605 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
583 return {}; 606 return {};
584} 607}
585 608
586vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { 609vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) {
587 switch (cull_face) { 610 switch (cull_face) {
588 case Maxwell::Cull::CullFace::Front: 611 case Maxwell::CullFace::Front:
589 return vk::CullModeFlagBits::eFront; 612 return vk::CullModeFlagBits::eFront;
590 case Maxwell::Cull::CullFace::Back: 613 case Maxwell::CullFace::Back:
591 return vk::CullModeFlagBits::eBack; 614 return vk::CullModeFlagBits::eBack;
592 case Maxwell::Cull::CullFace::FrontAndBack: 615 case Maxwell::CullFace::FrontAndBack:
593 return vk::CullModeFlagBits::eFrontAndBack; 616 return vk::CullModeFlagBits::eFrontAndBack;
594 } 617 }
595 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); 618 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7e9678b7b..24f6ab544 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -54,9 +54,9 @@ vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
54 54
55vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); 55vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
56 56
57vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); 57vk::FrontFace FrontFace(Maxwell::FrontFace front_face);
58 58
59vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); 59vk::CullModeFlags CullFace(Maxwell::CullFace cull_face);
60 60
61vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); 61vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
62 62
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index ddc62bc97..42bb01418 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -27,6 +27,7 @@
27#include "video_core/renderer_vulkan/vk_rasterizer.h" 27#include "video_core/renderer_vulkan/vk_rasterizer.h"
28#include "video_core/renderer_vulkan/vk_resource_manager.h" 28#include "video_core/renderer_vulkan/vk_resource_manager.h"
29#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
30#include "video_core/renderer_vulkan/vk_state_tracker.h"
30#include "video_core/renderer_vulkan/vk_swapchain.h" 31#include "video_core/renderer_vulkan/vk_swapchain.h"
31 32
32namespace Vulkan { 33namespace Vulkan {
@@ -177,10 +178,13 @@ bool RendererVulkan::Init() {
177 swapchain = std::make_unique<VKSwapchain>(surface, *device); 178 swapchain = std::make_unique<VKSwapchain>(surface, *device);
178 swapchain->Create(framebuffer.width, framebuffer.height, false); 179 swapchain->Create(framebuffer.width, framebuffer.height, false);
179 180
180 scheduler = std::make_unique<VKScheduler>(*device, *resource_manager); 181 state_tracker = std::make_unique<StateTracker>(system);
182
183 scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
181 184
182 rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, 185 rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
183 *resource_manager, *memory_manager, *scheduler); 186 *resource_manager, *memory_manager,
187 *state_tracker, *scheduler);
184 188
185 blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, 189 blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
186 *resource_manager, *memory_manager, *swapchain, 190 *resource_manager, *memory_manager, *swapchain,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index f513397f0..3da08d2e4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -4,8 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
7#include <optional> 8#include <optional>
8#include <vector> 9#include <vector>
10
9#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
10#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/declarations.h"
11 13
@@ -15,6 +17,7 @@ class System;
15 17
16namespace Vulkan { 18namespace Vulkan {
17 19
20class StateTracker;
18class VKBlitScreen; 21class VKBlitScreen;
19class VKDevice; 22class VKDevice;
20class VKFence; 23class VKFence;
@@ -61,6 +64,7 @@ private:
61 std::unique_ptr<VKSwapchain> swapchain; 64 std::unique_ptr<VKSwapchain> swapchain;
62 std::unique_ptr<VKMemoryManager> memory_manager; 65 std::unique_ptr<VKMemoryManager> memory_manager;
63 std::unique_ptr<VKResourceManager> resource_manager; 66 std::unique_ptr<VKResourceManager> resource_manager;
67 std::unique_ptr<StateTracker> state_tracker;
64 std::unique_ptr<VKScheduler> scheduler; 68 std::unique_ptr<VKScheduler> scheduler;
65 std::unique_ptr<VKBlitScreen> blit_screen; 69 std::unique_ptr<VKBlitScreen> blit_screen;
66}; 70};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 9d5b8de7a..60f57d83e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate
73 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; 73 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
74 u32 binding = 0; 74 u32 binding = 0;
75 u32 offset = 0; 75 u32 offset = 0;
76 FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); 76 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
77 if (template_entries.empty()) { 77 if (template_entries.empty()) {
78 // If the shader doesn't use descriptor sets, skip template creation. 78 // If the shader doesn't use descriptor sets, skip template creation.
79 return UniqueDescriptorUpdateTemplate{}; 79 return UniqueDescriptorUpdateTemplate{};
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 886bde3b9..28d2fbc4f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
107 features.occlusionQueryPrecise = true; 107 features.occlusionQueryPrecise = true;
108 features.fragmentStoresAndAtomics = true; 108 features.fragmentStoresAndAtomics = true;
109 features.shaderImageGatherExtended = true; 109 features.shaderImageGatherExtended = true;
110 features.shaderStorageImageReadWithoutFormat = 110 features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
111 is_shader_storage_img_read_without_format_supported;
112 features.shaderStorageImageWriteWithoutFormat = true; 111 features.shaderStorageImageWriteWithoutFormat = true;
113 features.textureCompressionASTC_LDR = is_optimal_astc_supported; 112 features.textureCompressionASTC_LDR = is_optimal_astc_supported;
114 113
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
148 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); 147 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
149 } 148 }
150 149
150 vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
151 if (ext_transform_feedback) {
152 transform_feedback.transformFeedback = true;
153 transform_feedback.geometryStreams = true;
154 SetNext(next, transform_feedback);
155 } else {
156 LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
157 }
158
151 if (!ext_depth_range_unrestricted) { 159 if (!ext_depth_range_unrestricted) {
152 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 160 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
153 } 161 }
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
385 } 393 }
386 }; 394 };
387 395
388 extensions.reserve(14); 396 extensions.reserve(15);
389 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 397 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
390 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 398 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
391 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); 399 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
397 405
398 [[maybe_unused]] const bool nsight = 406 [[maybe_unused]] const bool nsight =
399 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 407 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
400 bool khr_shader_float16_int8{}; 408 bool has_khr_shader_float16_int8{};
401 bool ext_subgroup_size_control{}; 409 bool has_ext_subgroup_size_control{};
410 bool has_ext_transform_feedback{};
402 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 411 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
403 Test(extension, khr_uniform_buffer_standard_layout, 412 Test(extension, khr_uniform_buffer_standard_layout,
404 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 413 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
405 Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 414 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
415 false);
406 Test(extension, ext_depth_range_unrestricted, 416 Test(extension, ext_depth_range_unrestricted,
407 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 417 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
408 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 418 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
409 Test(extension, ext_shader_viewport_index_layer, 419 Test(extension, ext_shader_viewport_index_layer,
410 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); 420 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
411 Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, 421 Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
422 false);
423 Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
412 false); 424 false);
413 if (Settings::values.renderer_debug) { 425 if (Settings::values.renderer_debug) {
414 Test(extension, nv_device_diagnostic_checkpoints, 426 Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
416 } 428 }
417 } 429 }
418 430
419 if (khr_shader_float16_int8) { 431 if (has_khr_shader_float16_int8) {
420 is_float16_supported = 432 is_float16_supported =
421 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; 433 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
422 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 434 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
423 } 435 }
424 436
425 if (ext_subgroup_size_control) { 437 if (has_ext_subgroup_size_control) {
426 const auto features = 438 const auto features =
427 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); 439 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
428 const auto properties = 440 const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
439 is_warp_potentially_bigger = true; 451 is_warp_potentially_bigger = true;
440 } 452 }
441 453
454 if (has_ext_transform_feedback) {
455 const auto features =
456 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
457 const auto properties =
458 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
459
460 if (features.transformFeedback && features.geometryStreams &&
461 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
462 properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
463 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
464 ext_transform_feedback = true;
465 }
466 }
467
442 return extensions; 468 return extensions;
443} 469}
444 470
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
467 493
468void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { 494void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
469 const auto supported_features{physical.getFeatures(dldi)}; 495 const auto supported_features{physical.getFeatures(dldi)};
470 is_shader_storage_img_read_without_format_supported = 496 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
471 supported_features.shaderStorageImageReadWithoutFormat;
472 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); 497 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
473} 498}
474 499
@@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
510 vk::Format::eR32G32Sfloat, 535 vk::Format::eR32G32Sfloat,
511 vk::Format::eR32G32Uint, 536 vk::Format::eR32G32Uint,
512 vk::Format::eR16G16B16A16Uint, 537 vk::Format::eR16G16B16A16Uint,
538 vk::Format::eR16G16B16A16Snorm,
513 vk::Format::eR16G16B16A16Unorm, 539 vk::Format::eR16G16B16A16Unorm,
514 vk::Format::eR16G16Unorm, 540 vk::Format::eR16G16Unorm,
515 vk::Format::eR16G16Snorm, 541 vk::Format::eR16G16Snorm,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
122 return properties.limits.maxPushConstantsSize; 122 return properties.limits.maxPushConstantsSize;
123 } 123 }
124 124
125 /// Returns true if Shader storage Image Read Without Format supported.
126 bool IsShaderStorageImageReadWithoutFormatSupported() const {
127 return is_shader_storage_img_read_without_format_supported;
128 }
129
130 /// Returns true if ASTC is natively supported. 125 /// Returns true if ASTC is natively supported.
131 bool IsOptimalAstcSupported() const { 126 bool IsOptimalAstcSupported() const {
132 return is_optimal_astc_supported; 127 return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
147 return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; 142 return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
148 } 143 }
149 144
145 /// Returns true if formatless image load is supported.
146 bool IsFormatlessImageLoadSupported() const {
147 return is_formatless_image_load_supported;
148 }
149
150 /// Returns true if the device supports VK_EXT_scalar_block_layout. 150 /// Returns true if the device supports VK_EXT_scalar_block_layout.
151 bool IsKhrUniformBufferStandardLayoutSupported() const { 151 bool IsKhrUniformBufferStandardLayoutSupported() const {
152 return khr_uniform_buffer_standard_layout; 152 return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
167 return ext_shader_viewport_index_layer; 167 return ext_shader_viewport_index_layer;
168 } 168 }
169 169
170 /// Returns true if the device supports VK_EXT_transform_feedback.
171 bool IsExtTransformFeedbackSupported() const {
172 return ext_transform_feedback;
173 }
174
170 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. 175 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
171 bool IsNvDeviceDiagnosticCheckpoints() const { 176 bool IsNvDeviceDiagnosticCheckpoints() const {
172 return nv_device_diagnostic_checkpoints; 177 return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
214 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( 219 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
215 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 220 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
216 221
217 const vk::PhysicalDevice physical; ///< Physical device. 222 const vk::PhysicalDevice physical; ///< Physical device.
218 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 223 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
219 vk::PhysicalDeviceProperties properties; ///< Device properties. 224 vk::PhysicalDeviceProperties properties; ///< Device properties.
220 UniqueDevice logical; ///< Logical device. 225 UniqueDevice logical; ///< Logical device.
221 vk::Queue graphics_queue; ///< Main graphics queue. 226 vk::Queue graphics_queue; ///< Main graphics queue.
222 vk::Queue present_queue; ///< Main present queue. 227 vk::Queue present_queue; ///< Main present queue.
223 u32 graphics_family{}; ///< Main graphics queue family index. 228 u32 graphics_family{}; ///< Main graphics queue family index.
224 u32 present_family{}; ///< Main present queue family index. 229 u32 present_family{}; ///< Main present queue family index.
225 vk::DriverIdKHR driver_id{}; ///< Driver ID. 230 vk::DriverIdKHR driver_id{}; ///< Driver ID.
226 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 231 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
227 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 232 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
228 bool is_float16_supported{}; ///< Support for float16 arithmetics. 233 bool is_float16_supported{}; ///< Support for float16 arithmetics.
229 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 234 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
235 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
230 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 236 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
231 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 237 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
232 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 238 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
233 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 239 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
240 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
234 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. 241 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
235 bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
236 ///< image read without format
237 242
238 // Telemetry parameters 243 // Telemetry parameters
239 std::string vendor_name; ///< Device's driver name. 244 std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index b155dfb49..6a02403c1 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat
97 u32 offset = 0; 97 u32 offset = 0;
98 for (const auto& stage : program) { 98 for (const auto& stage : program) {
99 if (stage) { 99 if (stage) {
100 FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, 100 FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
101 template_entries);
102 } 101 }
103 } 102 }
104 if (template_entries.empty()) { 103 if (template_entries.empty()) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 7ddf7d3ee..557b9d662 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType;
36 36
37namespace { 37namespace {
38 38
39// C++20's using enum
40constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer;
41constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer;
42constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer;
43constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler;
44constexpr auto eStorageImage = vk::DescriptorType::eStorageImage;
45
39constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 46constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
40 VideoCommon::Shader::CompileDepth::FullDecompile}; 47 VideoCommon::Shader::CompileDepth::FullDecompile};
41 48
@@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
119 } 126 }
120} 127}
121 128
129template <vk::DescriptorType descriptor_type, class Container>
130void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding,
131 vk::ShaderStageFlags stage_flags, const Container& container) {
132 const u32 num_entries = static_cast<u32>(std::size(container));
133 for (std::size_t i = 0; i < num_entries; ++i) {
134 u32 count = 1;
135 if constexpr (descriptor_type == eCombinedImageSampler) {
136 // Combined image samplers can be arrayed.
137 count = container[i].Size();
138 }
139 bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr);
140 }
141}
142
122u32 FillDescriptorLayout(const ShaderEntries& entries, 143u32 FillDescriptorLayout(const ShaderEntries& entries,
123 std::vector<vk::DescriptorSetLayoutBinding>& bindings, 144 std::vector<vk::DescriptorSetLayoutBinding>& bindings,
124 Maxwell::ShaderProgram program_type, u32 base_binding) { 145 Maxwell::ShaderProgram program_type, u32 base_binding) {
125 const ShaderType stage = GetStageFromProgram(program_type); 146 const ShaderType stage = GetStageFromProgram(program_type);
126 const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); 147 const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
127 148
128 u32 binding = base_binding; 149 u32 binding = base_binding;
129 const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { 150 AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers);
130 for (std::size_t i = 0; i < num_entries; ++i) { 151 AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers);
131 bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); 152 AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers);
132 } 153 AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers);
133 }; 154 AddBindings<eStorageImage>(bindings, binding, flags, entries.images);
134 AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
135 AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
136 AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
137 AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
138 AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
139 return binding; 155 return binding;
140} 156}
141 157
@@ -145,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag
145 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, 161 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
146 ProgramCode program_code, u32 main_offset) 162 ProgramCode program_code, u32 main_offset)
147 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, 163 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
148 program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, 164 program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
149 shader_ir{this->program_code, main_offset, compiler_settings, locker}, 165 shader_ir{this->program_code, main_offset, compiler_settings, registry},
150 entries{GenerateShaderEntries(shader_ir)} {} 166 entries{GenerateShaderEntries(shader_ir)} {}
151 167
152CachedShader::~CachedShader() = default; 168CachedShader::~CachedShader() = default;
@@ -163,24 +179,19 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
163VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 179VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
164 const VKDevice& device, VKScheduler& scheduler, 180 const VKDevice& device, VKScheduler& scheduler,
165 VKDescriptorPool& descriptor_pool, 181 VKDescriptorPool& descriptor_pool,
166 VKUpdateDescriptorQueue& update_descriptor_queue) 182 VKUpdateDescriptorQueue& update_descriptor_queue,
183 VKRenderPassCache& renderpass_cache)
167 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, 184 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
168 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, 185 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
169 renderpass_cache(device) {} 186 renderpass_cache{renderpass_cache} {}
170 187
171VKPipelineCache::~VKPipelineCache() = default; 188VKPipelineCache::~VKPipelineCache() = default;
172 189
173std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 190std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
174 const auto& gpu = system.GPU().Maxwell3D(); 191 const auto& gpu = system.GPU().Maxwell3D();
175 auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
176 if (!dirty) {
177 return last_shaders;
178 }
179 dirty = false;
180 192
181 std::array<Shader, Maxwell::MaxShaderProgram> shaders; 193 std::array<Shader, Maxwell::MaxShaderProgram> shaders;
182 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 194 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
183 const auto& shader_config = gpu.regs.shader_config[index];
184 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 195 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
185 196
186 // Skip stages that are not enabled 197 // Skip stages that are not enabled
@@ -262,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
262 specialization.workgroup_size = key.workgroup_size; 273 specialization.workgroup_size = key.workgroup_size;
263 specialization.shared_memory_size = key.shared_memory_size; 274 specialization.shared_memory_size = key.shared_memory_size;
264 275
265 const SPIRVShader spirv_shader{ 276 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
266 Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), 277 shader->GetRegistry(), specialization),
267 shader->GetEntries()}; 278 shader->GetEntries()};
268 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, 279 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
269 update_descriptor_queue, spirv_shader); 280 update_descriptor_queue, spirv_shader);
270 return *entry; 281 return *entry;
@@ -313,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
313 const auto& gpu = system.GPU().Maxwell3D(); 324 const auto& gpu = system.GPU().Maxwell3D();
314 325
315 Specialization specialization; 326 Specialization specialization;
316 specialization.primitive_topology = fixed_state.input_assembly.topology; 327 if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
317 if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
318 ASSERT(fixed_state.input_assembly.point_size != 0.0f); 328 ASSERT(fixed_state.input_assembly.point_size != 0.0f);
319 specialization.point_size = fixed_state.input_assembly.point_size; 329 specialization.point_size = fixed_state.input_assembly.point_size;
320 } 330 }
@@ -322,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
322 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; 332 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
323 } 333 }
324 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 334 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
325 specialization.tessellation.primitive = fixed_state.tessellation.primitive;
326 specialization.tessellation.spacing = fixed_state.tessellation.spacing;
327 specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
328 335
329 SPIRVProgram program; 336 SPIRVProgram program;
330 std::vector<vk::DescriptorSetLayoutBinding> bindings; 337 std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -345,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
345 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 352 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
346 const auto program_type = GetShaderType(program_enum); 353 const auto program_type = GetShaderType(program_enum);
347 const auto& entries = shader->GetEntries(); 354 const auto& entries = shader->GetEntries();
348 program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), 355 program[stage] = {
349 entries}; 356 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
357 entries};
350 358
351 if (program_enum == Maxwell::ShaderProgram::VertexA) { 359 if (program_enum == Maxwell::ShaderProgram::VertexA) {
352 // VertexB was combined with VertexA, so we skip the VertexB iteration 360 // VertexB was combined with VertexA, so we skip the VertexB iteration
@@ -361,32 +369,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
361 return {std::move(program), std::move(bindings)}; 369 return {std::move(program), std::move(bindings)};
362} 370}
363 371
364void FillDescriptorUpdateTemplateEntries( 372template <vk::DescriptorType descriptor_type, class Container>
365 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, 373void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding,
366 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { 374 u32& offset, const Container& container) {
367 static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); 375 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
368 const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { 376 const u32 count = static_cast<u32>(std::size(container));
369 const u32 count = static_cast<u32>(count_); 377
370 if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && 378 if constexpr (descriptor_type == eCombinedImageSampler) {
371 device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { 379 for (u32 i = 0; i < count; ++i) {
372 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to 380 const u32 num_samplers = container[i].Size();
373 // crash. 381 template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset,
374 for (u32 i = 0; i < count; ++i) { 382 entry_size);
375 template_entries.emplace_back(binding + i, 0, 1, descriptor_type, 383 ++binding;
376 offset + i * entry_size, entry_size); 384 offset += num_samplers * entry_size;
377 }
378 } else if (count != 0) {
379 template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
380 } 385 }
381 offset += count * entry_size; 386 return;
382 binding += count; 387 }
383 };
384 388
385 AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); 389 if constexpr (descriptor_type == eUniformTexelBuffer) {
386 AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); 390 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
387 AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); 391 // crash.
388 AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); 392 for (u32 i = 0; i < count; ++i) {
389 AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); 393 template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
394 offset + i * entry_size, entry_size);
395 }
396 } else if (count > 0) {
397 template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
398 }
399 offset += count * entry_size;
400 binding += count;
401}
402
403void FillDescriptorUpdateTemplateEntries(
404 const ShaderEntries& entries, u32& binding, u32& offset,
405 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
406 AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers);
407 AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers);
408 AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers);
409 AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers);
410 AddEntry<eStorageImage>(template_entries, offset, binding, entries.images);
390} 411}
391 412
392} // namespace Vulkan 413} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 8678fc9c3..c4c112290 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -25,7 +25,7 @@
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h" 26#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 27#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
28#include "video_core/shader/const_buffer_locker.h" 28#include "video_core/shader/registry.h"
29#include "video_core/shader/shader_ir.h" 29#include "video_core/shader/shader_ir.h"
30#include "video_core/surface.h" 30#include "video_core/surface.h"
31 31
@@ -132,6 +132,10 @@ public:
132 return shader_ir; 132 return shader_ir;
133 } 133 }
134 134
135 const VideoCommon::Shader::Registry& GetRegistry() const {
136 return registry;
137 }
138
135 const VideoCommon::Shader::ShaderIR& GetIR() const { 139 const VideoCommon::Shader::ShaderIR& GetIR() const {
136 return shader_ir; 140 return shader_ir;
137 } 141 }
@@ -147,7 +151,7 @@ private:
147 GPUVAddr gpu_addr{}; 151 GPUVAddr gpu_addr{};
148 VAddr cpu_addr{}; 152 VAddr cpu_addr{};
149 ProgramCode program_code; 153 ProgramCode program_code;
150 VideoCommon::Shader::ConstBufferLocker locker; 154 VideoCommon::Shader::Registry registry;
151 VideoCommon::Shader::ShaderIR shader_ir; 155 VideoCommon::Shader::ShaderIR shader_ir;
152 ShaderEntries entries; 156 ShaderEntries entries;
153}; 157};
@@ -157,7 +161,8 @@ public:
157 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 161 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
158 const VKDevice& device, VKScheduler& scheduler, 162 const VKDevice& device, VKScheduler& scheduler,
159 VKDescriptorPool& descriptor_pool, 163 VKDescriptorPool& descriptor_pool,
160 VKUpdateDescriptorQueue& update_descriptor_queue); 164 VKUpdateDescriptorQueue& update_descriptor_queue,
165 VKRenderPassCache& renderpass_cache);
161 ~VKPipelineCache(); 166 ~VKPipelineCache();
162 167
163 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); 168 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
@@ -180,8 +185,7 @@ private:
180 VKScheduler& scheduler; 185 VKScheduler& scheduler;
181 VKDescriptorPool& descriptor_pool; 186 VKDescriptorPool& descriptor_pool;
182 VKUpdateDescriptorQueue& update_descriptor_queue; 187 VKUpdateDescriptorQueue& update_descriptor_queue;
183 188 VKRenderPassCache& renderpass_cache;
184 VKRenderPassCache renderpass_cache;
185 189
186 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 190 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
187 191
@@ -194,7 +198,7 @@ private:
194}; 198};
195 199
196void FillDescriptorUpdateTemplateEntries( 200void FillDescriptorUpdateTemplateEntries(
197 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, 201 const ShaderEntries& entries, u32& binding, u32& offset,
198 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); 202 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
199 203
200} // namespace Vulkan 204} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 3bf86da87..58c69b786 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -36,6 +36,7 @@
36#include "video_core/renderer_vulkan/vk_sampler_cache.h" 36#include "video_core/renderer_vulkan/vk_sampler_cache.h"
37#include "video_core/renderer_vulkan/vk_scheduler.h" 37#include "video_core/renderer_vulkan/vk_scheduler.h"
38#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 38#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
39#include "video_core/renderer_vulkan/vk_state_tracker.h"
39#include "video_core/renderer_vulkan/vk_texture_cache.h" 40#include "video_core/renderer_vulkan/vk_texture_cache.h"
40#include "video_core/renderer_vulkan/vk_update_descriptor.h" 41#include "video_core/renderer_vulkan/vk_update_descriptor.h"
41 42
@@ -105,17 +106,20 @@ void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlag
105 106
106template <typename Engine, typename Entry> 107template <typename Engine, typename Entry>
107Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 108Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
108 std::size_t stage) { 109 std::size_t stage, std::size_t index = 0) {
109 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 110 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
110 if (entry.IsBindless()) { 111 if (entry.IsBindless()) {
111 const Tegra::Texture::TextureHandle tex_handle = 112 const Tegra::Texture::TextureHandle tex_handle =
112 engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); 113 engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
113 return engine.GetTextureInfo(tex_handle); 114 return engine.GetTextureInfo(tex_handle);
114 } 115 }
116 const auto& gpu_profile = engine.AccessGuestDriverProfile();
117 const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
118 const u32 offset = entry.GetOffset() + entry_offset;
115 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { 119 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
116 return engine.GetStageTexture(stage_type, entry.GetOffset()); 120 return engine.GetStageTexture(stage_type, offset);
117 } else { 121 } else {
118 return engine.GetTexture(entry.GetOffset()); 122 return engine.GetTexture(offset);
119 } 123 }
120} 124}
121 125
@@ -277,17 +281,19 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
277RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, 281RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
278 VKScreenInfo& screen_info, const VKDevice& device, 282 VKScreenInfo& screen_info, const VKDevice& device,
279 VKResourceManager& resource_manager, 283 VKResourceManager& resource_manager,
280 VKMemoryManager& memory_manager, VKScheduler& scheduler) 284 VKMemoryManager& memory_manager, StateTracker& state_tracker,
285 VKScheduler& scheduler)
281 : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, 286 : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
282 screen_info{screen_info}, device{device}, resource_manager{resource_manager}, 287 screen_info{screen_info}, device{device}, resource_manager{resource_manager},
283 memory_manager{memory_manager}, scheduler{scheduler}, 288 memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
284 staging_pool(device, memory_manager, scheduler), descriptor_pool(device), 289 staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
285 update_descriptor_queue(device, scheduler), 290 update_descriptor_queue(device, scheduler), renderpass_cache(device),
286 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 291 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
287 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 292 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
288 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, 293 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
289 staging_pool), 294 staging_pool),
290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), 295 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
296 renderpass_cache),
291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 297 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
292 sampler_cache(device), query_cache(system, *this, device, scheduler) { 298 sampler_cache(device), query_cache(system, *this, device, scheduler) {
293 scheduler.SetQueryCache(query_cache); 299 scheduler.SetQueryCache(query_cache);
@@ -342,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
342 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); 348 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
343 } 349 }
344 350
351 BeginTransformFeedback();
352
345 const auto pipeline_layout = pipeline.GetLayout(); 353 const auto pipeline_layout = pipeline.GetLayout();
346 const auto descriptor_set = pipeline.CommitDescriptorSet(); 354 const auto descriptor_set = pipeline.CommitDescriptorSet();
347 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { 355 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -351,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
351 } 359 }
352 draw_params.Draw(cmdbuf, dld); 360 draw_params.Draw(cmdbuf, dld);
353 }); 361 });
362
363 EndTransformFeedback();
354} 364}
355 365
356void RasterizerVulkan::Clear() { 366void RasterizerVulkan::Clear() {
357 MICROPROFILE_SCOPE(Vulkan_Clearing); 367 MICROPROFILE_SCOPE(Vulkan_Clearing);
358 368
359 query_cache.UpdateCounters();
360
361 const auto& gpu = system.GPU().Maxwell3D(); 369 const auto& gpu = system.GPU().Maxwell3D();
362 if (!system.GPU().Maxwell3D().ShouldExecute()) { 370 if (!system.GPU().Maxwell3D().ShouldExecute()) {
363 return; 371 return;
364 } 372 }
365 373
374 sampled_views.clear();
375 image_views.clear();
376
377 query_cache.UpdateCounters();
378
366 const auto& regs = gpu.regs; 379 const auto& regs = gpu.regs;
367 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 380 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
368 regs.clear_buffers.A; 381 regs.clear_buffers.A;
@@ -371,52 +384,54 @@ void RasterizerVulkan::Clear() {
371 if (!use_color && !use_depth && !use_stencil) { 384 if (!use_color && !use_depth && !use_stencil) {
372 return; 385 return;
373 } 386 }
374 // Clearing images requires to be out of a renderpass
375 scheduler.RequestOutsideRenderPassOperationContext();
376 387
377 // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. 388 [[maybe_unused]] const auto texceptions = UpdateAttachments();
389 DEBUG_ASSERT(texceptions.none());
390 SetupImageTransitions(0, color_attachments, zeta_attachment);
378 391
379 if (use_color) { 392 const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
380 View color_view; 393 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
381 { 394 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
382 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
383 color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
384 }
385 395
386 color_view->Transition(vk::ImageLayout::eTransferDstOptimal, 396 const auto& scissor = regs.scissor_test[0];
387 vk::PipelineStageFlagBits::eTransfer, 397 const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y);
388 vk::AccessFlagBits::eTransferWrite); 398 vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y};
399 scissor_extent.width = std::min(scissor_extent.width, render_area.width);
400 scissor_extent.height = std::min(scissor_extent.height, render_area.height);
389 401
402 const u32 layer = regs.clear_buffers.layer;
403 const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1);
404
405 if (use_color) {
390 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], 406 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
391 regs.clear_color[2], regs.clear_color[3]}; 407 regs.clear_color[2], regs.clear_color[3]};
392 const vk::ClearColorValue clear(clear_color); 408 const vk::ClearValue clear_value{clear_color};
393 scheduler.Record([image = color_view->GetImage(), 409 const u32 color_attachment = regs.clear_buffers.RT;
394 subresource = color_view->GetImageSubresourceRange(), 410 scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) {
395 clear](auto cmdbuf, auto& dld) { 411 const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment,
396 cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, 412 clear_value);
397 dld); 413 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
398 }); 414 });
399 } 415 }
400 if (use_depth || use_stencil) {
401 View zeta_surface;
402 {
403 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
404 zeta_surface = texture_cache.GetDepthBufferSurface(false);
405 }
406 416
407 zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, 417 if (!use_depth && !use_stencil) {
408 vk::PipelineStageFlagBits::eTransfer, 418 return;
409 vk::AccessFlagBits::eTransferWrite); 419 }
410 420 vk::ImageAspectFlags aspect_flags;
411 const vk::ClearDepthStencilValue clear(regs.clear_depth, 421 if (use_depth) {
412 static_cast<u32>(regs.clear_stencil)); 422 aspect_flags |= vk::ImageAspectFlagBits::eDepth;
413 scheduler.Record([image = zeta_surface->GetImage(), 423 }
414 subresource = zeta_surface->GetImageSubresourceRange(), 424 if (use_stencil) {
415 clear](auto cmdbuf, auto& dld) { 425 aspect_flags |= vk::ImageAspectFlagBits::eStencil;
416 cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
417 subresource, dld);
418 });
419 } 426 }
427
428 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
429 clear_rect, aspect_flags](auto cmdbuf, auto& dld) {
430 const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil);
431 const vk::ClearValue clear_value{clear_zeta};
432 const vk::ClearAttachment attachment(aspect_flags, 0, clear_value);
433 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
434 });
420} 435}
421 436
422void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 437void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
@@ -533,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
533 548
534 // Verify that the cached surface is the same size and format as the requested framebuffer 549 // Verify that the cached surface is the same size and format as the requested framebuffer
535 const auto& params{surface->GetSurfaceParams()}; 550 const auto& params{surface->GetSurfaceParams()};
536 const auto& pixel_format{
537 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
538 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 551 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
539 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 552 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
540 553
@@ -545,6 +558,10 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
545 return true; 558 return true;
546} 559}
547 560
561void RasterizerVulkan::SetupDirtyFlags() {
562 state_tracker.Initialize();
563}
564
548void RasterizerVulkan::FlushWork() { 565void RasterizerVulkan::FlushWork() {
549 static constexpr u32 DRAWS_TO_DISPATCH = 4096; 566 static constexpr u32 DRAWS_TO_DISPATCH = 4096;
550 567
@@ -568,9 +585,9 @@ void RasterizerVulkan::FlushWork() {
568 585
569RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { 586RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
570 MICROPROFILE_SCOPE(Vulkan_RenderTargets); 587 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
571 auto& dirty = system.GPU().Maxwell3D().dirty; 588 auto& dirty = system.GPU().Maxwell3D().dirty.flags;
572 const bool update_rendertargets = dirty.render_settings; 589 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
573 dirty.render_settings = false; 590 dirty[VideoCommon::Dirty::RenderTargets] = false;
574 591
575 texture_cache.GuardRenderTargets(true); 592 texture_cache.GuardRenderTargets(true);
576 593
@@ -720,13 +737,51 @@ void RasterizerVulkan::SetupImageTransitions(
720} 737}
721 738
722void RasterizerVulkan::UpdateDynamicStates() { 739void RasterizerVulkan::UpdateDynamicStates() {
723 auto& gpu = system.GPU().Maxwell3D(); 740 auto& regs = system.GPU().Maxwell3D().regs;
724 UpdateViewportsState(gpu); 741 UpdateViewportsState(regs);
725 UpdateScissorsState(gpu); 742 UpdateScissorsState(regs);
726 UpdateDepthBias(gpu); 743 UpdateDepthBias(regs);
727 UpdateBlendConstants(gpu); 744 UpdateBlendConstants(regs);
728 UpdateDepthBounds(gpu); 745 UpdateDepthBounds(regs);
729 UpdateStencilFaces(gpu); 746 UpdateStencilFaces(regs);
747}
748
749void RasterizerVulkan::BeginTransformFeedback() {
750 const auto& regs = system.GPU().Maxwell3D().regs;
751 if (regs.tfb_enabled == 0) {
752 return;
753 }
754
755 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
756 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
757 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
758
759 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
760 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
761 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
762
763 const auto& binding = regs.tfb_bindings[0];
764 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
765 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
766
767 const GPUVAddr gpu_addr = binding.Address();
768 const std::size_t size = binding.buffer_size;
769 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
770
771 scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
772 cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
773 cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
774 });
775}
776
777void RasterizerVulkan::EndTransformFeedback() {
778 const auto& regs = system.GPU().Maxwell3D().regs;
779 if (regs.tfb_enabled == 0) {
780 return;
781 }
782
783 scheduler.Record(
784 [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
730} 785}
731 786
732void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 787void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -836,14 +891,16 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
836 MICROPROFILE_SCOPE(Vulkan_Textures); 891 MICROPROFILE_SCOPE(Vulkan_Textures);
837 const auto& gpu = system.GPU().Maxwell3D(); 892 const auto& gpu = system.GPU().Maxwell3D();
838 for (const auto& entry : entries.samplers) { 893 for (const auto& entry : entries.samplers) {
839 const auto texture = GetTextureInfo(gpu, entry, stage); 894 for (std::size_t i = 0; i < entry.Size(); ++i) {
840 SetupTexture(texture, entry); 895 const auto texture = GetTextureInfo(gpu, entry, stage, i);
896 SetupTexture(texture, entry);
897 }
841 } 898 }
842} 899}
843 900
844void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { 901void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
845 MICROPROFILE_SCOPE(Vulkan_Images); 902 MICROPROFILE_SCOPE(Vulkan_Images);
846 const auto& gpu = system.GPU().KeplerCompute(); 903 const auto& gpu = system.GPU().Maxwell3D();
847 for (const auto& entry : entries.images) { 904 for (const auto& entry : entries.images) {
848 const auto tic = GetTextureInfo(gpu, entry, stage).tic; 905 const auto tic = GetTextureInfo(gpu, entry, stage).tic;
849 SetupImage(tic, entry); 906 SetupImage(tic, entry);
@@ -886,8 +943,10 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
886 MICROPROFILE_SCOPE(Vulkan_Textures); 943 MICROPROFILE_SCOPE(Vulkan_Textures);
887 const auto& gpu = system.GPU().KeplerCompute(); 944 const auto& gpu = system.GPU().KeplerCompute();
888 for (const auto& entry : entries.samplers) { 945 for (const auto& entry : entries.samplers) {
889 const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); 946 for (std::size_t i = 0; i < entry.Size(); ++i) {
890 SetupTexture(texture, entry); 947 const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
948 SetupTexture(texture, entry);
949 }
891 } 950 }
892} 951}
893 952
@@ -902,6 +961,13 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
902 961
903void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, 962void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
904 const Tegra::Engines::ConstBufferInfo& buffer) { 963 const Tegra::Engines::ConstBufferInfo& buffer) {
964 if (!buffer.enabled) {
965 // Set values to zero to unbind buffers
966 update_descriptor_queue.AddBuffer(buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
967 sizeof(float));
968 return;
969 }
970
905 // Align the size to avoid bad std140 interactions 971 // Align the size to avoid bad std140 interactions
906 const std::size_t size = 972 const std::size_t size =
907 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); 973 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
@@ -972,12 +1038,10 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
972 image_views.push_back(ImageView{std::move(view), image_layout}); 1038 image_views.push_back(ImageView{std::move(view), image_layout});
973} 1039}
974 1040
975void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { 1041void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
976 if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { 1042 if (!state_tracker.TouchViewports()) {
977 return; 1043 return;
978 } 1044 }
979 gpu.dirty.viewport_transform = false;
980 const auto& regs = gpu.regs;
981 const std::array viewports{ 1045 const std::array viewports{
982 GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), 1046 GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
983 GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), 1047 GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
@@ -992,12 +1056,10 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
992 }); 1056 });
993} 1057}
994 1058
995void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { 1059void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
996 if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { 1060 if (!state_tracker.TouchScissors()) {
997 return; 1061 return;
998 } 1062 }
999 gpu.dirty.scissor_test = false;
1000 const auto& regs = gpu.regs;
1001 const std::array scissors = { 1063 const std::array scissors = {
1002 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), 1064 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
1003 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), 1065 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
@@ -1010,46 +1072,39 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
1010 }); 1072 });
1011} 1073}
1012 1074
1013void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { 1075void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
1014 if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { 1076 if (!state_tracker.TouchDepthBias()) {
1015 return; 1077 return;
1016 } 1078 }
1017 gpu.dirty.polygon_offset = false;
1018 const auto& regs = gpu.regs;
1019 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, 1079 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
1020 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { 1080 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
1021 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); 1081 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
1022 }); 1082 });
1023} 1083}
1024 1084
1025void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { 1085void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
1026 if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { 1086 if (!state_tracker.TouchBlendConstants()) {
1027 return; 1087 return;
1028 } 1088 }
1029 gpu.dirty.blend_state = false; 1089 const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
1030 const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, 1090 regs.blend_color.a};
1031 gpu.regs.blend_color.b, gpu.regs.blend_color.a};
1032 scheduler.Record([blend_color](auto cmdbuf, auto& dld) { 1091 scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
1033 cmdbuf.setBlendConstants(blend_color.data(), dld); 1092 cmdbuf.setBlendConstants(blend_color.data(), dld);
1034 }); 1093 });
1035} 1094}
1036 1095
1037void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { 1096void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
1038 if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { 1097 if (!state_tracker.TouchDepthBounds()) {
1039 return; 1098 return;
1040 } 1099 }
1041 gpu.dirty.depth_bounds_values = false;
1042 const auto& regs = gpu.regs;
1043 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( 1100 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
1044 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); 1101 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
1045} 1102}
1046 1103
1047void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { 1104void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
1048 if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { 1105 if (!state_tracker.TouchStencilProperties()) {
1049 return; 1106 return;
1050 } 1107 }
1051 gpu.dirty.stencil_test = false;
1052 const auto& regs = gpu.regs;
1053 if (regs.stencil_two_side_enable) { 1108 if (regs.stencil_two_side_enable) {
1054 // Separate values per face 1109 // Separate values per face
1055 scheduler.Record( 1110 scheduler.Record(
@@ -1100,7 +1155,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1100 // This implementation assumes that all attributes are used in the shader. 1155 // This implementation assumes that all attributes are used in the shader.
1101 const GPUVAddr start{regs.vertex_array[index].StartAddress()}; 1156 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1102 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 1157 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1103 DEBUG_ASSERT(end > start); 1158 DEBUG_ASSERT(end >= start);
1104 1159
1105 size += (end - start + 1) * regs.vertex_array[index].enable; 1160 size += (end - start + 1) * regs.vertex_array[index].enable;
1106 } 1161 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 4dc8af6e8..3185868e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -96,6 +96,7 @@ struct hash<Vulkan::FramebufferCacheKey> {
96 96
97namespace Vulkan { 97namespace Vulkan {
98 98
99class StateTracker;
99class BufferBindings; 100class BufferBindings;
100 101
101struct ImageView { 102struct ImageView {
@@ -108,7 +109,7 @@ public:
108 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, 109 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
109 VKScreenInfo& screen_info, const VKDevice& device, 110 VKScreenInfo& screen_info, const VKDevice& device,
110 VKResourceManager& resource_manager, VKMemoryManager& memory_manager, 111 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
111 VKScheduler& scheduler); 112 StateTracker& state_tracker, VKScheduler& scheduler);
112 ~RasterizerVulkan() override; 113 ~RasterizerVulkan() override;
113 114
114 void Draw(bool is_indexed, bool is_instanced) override; 115 void Draw(bool is_indexed, bool is_instanced) override;
@@ -127,6 +128,7 @@ public:
127 const Tegra::Engines::Fermi2D::Config& copy_config) override; 128 const Tegra::Engines::Fermi2D::Config& copy_config) override;
128 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 129 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
129 u32 pixel_stride) override; 130 u32 pixel_stride) override;
131 void SetupDirtyFlags() override;
130 132
131 /// Maximum supported size that a constbuffer can have in bytes. 133 /// Maximum supported size that a constbuffer can have in bytes.
132 static constexpr std::size_t MaxConstbufferSize = 0x10000; 134 static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -167,6 +169,10 @@ private:
167 169
168 void UpdateDynamicStates(); 170 void UpdateDynamicStates();
169 171
172 void BeginTransformFeedback();
173
174 void EndTransformFeedback();
175
170 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); 176 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
171 177
172 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 178 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -215,12 +221,12 @@ private:
215 221
216 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); 222 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
217 223
218 void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); 224 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
219 void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); 225 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
220 void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); 226 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
221 void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); 227 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
222 void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); 228 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
223 void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); 229 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
224 230
225 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; 231 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
226 232
@@ -241,11 +247,13 @@ private:
241 const VKDevice& device; 247 const VKDevice& device;
242 VKResourceManager& resource_manager; 248 VKResourceManager& resource_manager;
243 VKMemoryManager& memory_manager; 249 VKMemoryManager& memory_manager;
250 StateTracker& state_tracker;
244 VKScheduler& scheduler; 251 VKScheduler& scheduler;
245 252
246 VKStagingBufferPool staging_pool; 253 VKStagingBufferPool staging_pool;
247 VKDescriptorPool descriptor_pool; 254 VKDescriptorPool descriptor_pool;
248 VKUpdateDescriptorQueue update_descriptor_queue; 255 VKUpdateDescriptorQueue update_descriptor_queue;
256 VKRenderPassCache renderpass_cache;
249 QuadArrayPass quad_array_pass; 257 QuadArrayPass quad_array_pass;
250 Uint8Pass uint8_pass; 258 Uint8Pass uint8_pass;
251 259
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 92bd6c344..b61d4fe63 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -2,6 +2,12 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory>
6#include <mutex>
7#include <optional>
8#include <thread>
9#include <utility>
10
5#include "common/assert.h" 11#include "common/assert.h"
6#include "common/microprofile.h" 12#include "common/microprofile.h"
7#include "video_core/renderer_vulkan/declarations.h" 13#include "video_core/renderer_vulkan/declarations.h"
@@ -9,6 +15,7 @@
9#include "video_core/renderer_vulkan/vk_query_cache.h" 15#include "video_core/renderer_vulkan/vk_query_cache.h"
10#include "video_core/renderer_vulkan/vk_resource_manager.h" 16#include "video_core/renderer_vulkan/vk_resource_manager.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_state_tracker.h"
12 19
13namespace Vulkan { 20namespace Vulkan {
14 21
@@ -29,9 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
29 last = nullptr; 36 last = nullptr;
30} 37}
31 38
32VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) 39VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
33 : device{device}, resource_manager{resource_manager}, next_fence{ 40 StateTracker& state_tracker)
34 &resource_manager.CommitFence()} { 41 : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
42 next_fence{&resource_manager.CommitFence()} {
35 AcquireNewChunk(); 43 AcquireNewChunk();
36 AllocateNewContext(); 44 AllocateNewContext();
37 worker_thread = std::thread(&VKScheduler::WorkerThread, this); 45 worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@@ -157,12 +165,7 @@ void VKScheduler::AllocateNewContext() {
157 165
158void VKScheduler::InvalidateState() { 166void VKScheduler::InvalidateState() {
159 state.graphics_pipeline = nullptr; 167 state.graphics_pipeline = nullptr;
160 state.viewports = false; 168 state_tracker.InvalidateCommandBufferState();
161 state.scissors = false;
162 state.depth_bias = false;
163 state.blend_constants = false;
164 state.depth_bounds = false;
165 state.stencil_values = false;
166} 169}
167 170
168void VKScheduler::EndPendingOperations() { 171void VKScheduler::EndPendingOperations() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 62fd7858b..c7cc291c3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -17,6 +17,7 @@
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
20class StateTracker;
20class VKDevice; 21class VKDevice;
21class VKFence; 22class VKFence;
22class VKQueryCache; 23class VKQueryCache;
@@ -43,7 +44,8 @@ private:
43/// OpenGL-like operations on Vulkan command buffers. 44/// OpenGL-like operations on Vulkan command buffers.
44class VKScheduler { 45class VKScheduler {
45public: 46public:
46 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); 47 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
48 StateTracker& state_tracker);
47 ~VKScheduler(); 49 ~VKScheduler();
48 50
49 /// Sends the current execution context to the GPU. 51 /// Sends the current execution context to the GPU.
@@ -74,36 +76,6 @@ public:
74 query_cache = &query_cache_; 76 query_cache = &query_cache_;
75 } 77 }
76 78
77 /// Returns true when viewports have been set in the current command buffer.
78 bool TouchViewports() {
79 return std::exchange(state.viewports, true);
80 }
81
82 /// Returns true when scissors have been set in the current command buffer.
83 bool TouchScissors() {
84 return std::exchange(state.scissors, true);
85 }
86
87 /// Returns true when depth bias have been set in the current command buffer.
88 bool TouchDepthBias() {
89 return std::exchange(state.depth_bias, true);
90 }
91
92 /// Returns true when blend constants have been set in the current command buffer.
93 bool TouchBlendConstants() {
94 return std::exchange(state.blend_constants, true);
95 }
96
97 /// Returns true when depth bounds have been set in the current command buffer.
98 bool TouchDepthBounds() {
99 return std::exchange(state.depth_bounds, true);
100 }
101
102 /// Returns true when stencil values have been set in the current command buffer.
103 bool TouchStencilValues() {
104 return std::exchange(state.stencil_values, true);
105 }
106
107 /// Send work to a separate thread. 79 /// Send work to a separate thread.
108 template <typename T> 80 template <typename T>
109 void Record(T&& command) { 81 void Record(T&& command) {
@@ -217,6 +189,8 @@ private:
217 189
218 const VKDevice& device; 190 const VKDevice& device;
219 VKResourceManager& resource_manager; 191 VKResourceManager& resource_manager;
192 StateTracker& state_tracker;
193
220 VKQueryCache* query_cache = nullptr; 194 VKQueryCache* query_cache = nullptr;
221 195
222 vk::CommandBuffer current_cmdbuf; 196 vk::CommandBuffer current_cmdbuf;
@@ -226,12 +200,6 @@ private:
226 struct State { 200 struct State {
227 std::optional<vk::RenderPassBeginInfo> renderpass; 201 std::optional<vk::RenderPassBeginInfo> renderpass;
228 vk::Pipeline graphics_pipeline; 202 vk::Pipeline graphics_pipeline;
229 bool viewports = false;
230 bool scissors = false;
231 bool depth_bias = false;
232 bool blend_constants = false;
233 bool depth_bounds = false;
234 bool stencil_values = false;
235 } state; 203 } state;
236 204
237 std::unique_ptr<CommandChunk> chunk; 205 std::unique_ptr<CommandChunk> chunk;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 2da622d15..51ecb5567 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <functional> 5#include <functional>
6#include <limits> 6#include <limits>
7#include <map> 7#include <map>
8#include <optional>
8#include <type_traits> 9#include <type_traits>
10#include <unordered_map>
9#include <utility> 11#include <utility>
10 12
11#include <fmt/format.h> 13#include <fmt/format.h>
@@ -24,6 +26,7 @@
24#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 26#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
25#include "video_core/shader/node.h" 27#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h" 28#include "video_core/shader/shader_ir.h"
29#include "video_core/shader/transform_feedback.h"
27 30
28namespace Vulkan { 31namespace Vulkan {
29 32
@@ -69,8 +72,9 @@ struct TexelBuffer {
69 72
70struct SampledImage { 73struct SampledImage {
71 Id image_type{}; 74 Id image_type{};
72 Id sampled_image_type{}; 75 Id sampler_type{};
73 Id sampler{}; 76 Id sampler_pointer_type{};
77 Id variable{};
74}; 78};
75 79
76struct StorageImage { 80struct StorageImage {
@@ -92,6 +96,12 @@ struct VertexIndices {
92 std::optional<u32> clip_distances; 96 std::optional<u32> clip_distances;
93}; 97};
94 98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
95spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const Sampler& sampler) {
96 ASSERT(!sampler.IsBuffer()); 106 ASSERT(!sampler.IsBuffer());
97 switch (sampler.GetType()) { 107 switch (sampler.GetType()) {
@@ -265,9 +275,13 @@ bool IsPrecise(Operation operand) {
265class SPIRVDecompiler final : public Sirit::Module { 275class SPIRVDecompiler final : public Sirit::Module {
266public: 276public:
267 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, 277 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
268 const Specialization& specialization) 278 const Registry& registry, const Specialization& specialization)
269 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, 279 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
270 specialization{specialization} { 280 registry{registry}, specialization{specialization} {
281 if (stage != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
283 }
284
271 AddCapability(spv::Capability::Shader); 285 AddCapability(spv::Capability::Shader);
272 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); 286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
273 AddCapability(spv::Capability::ImageQuery); 287 AddCapability(spv::Capability::ImageQuery);
@@ -285,6 +299,15 @@ public:
285 AddExtension("SPV_KHR_variable_pointers"); 299 AddExtension("SPV_KHR_variable_pointers");
286 AddExtension("SPV_KHR_shader_draw_parameters"); 300 AddExtension("SPV_KHR_shader_draw_parameters");
287 301
302 if (!transform_feedback.empty()) {
303 if (device.IsExtTransformFeedbackSupported()) {
304 AddCapability(spv::Capability::TransformFeedback);
305 } else {
306 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
307 "supported on this device");
308 }
309 }
310
288 if (ir.UsesLayer() || ir.UsesViewportIndex()) { 311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
289 if (ir.UsesViewportIndex()) { 312 if (ir.UsesViewportIndex()) {
290 AddCapability(spv::Capability::MultiViewport); 313 AddCapability(spv::Capability::MultiViewport);
@@ -295,7 +318,7 @@ public:
295 } 318 }
296 } 319 }
297 320
298 if (device.IsShaderStorageImageReadWithoutFormatSupported()) { 321 if (device.IsFormatlessImageLoadSupported()) {
299 AddCapability(spv::Capability::StorageImageReadWithoutFormat); 322 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
300 } 323 }
301 324
@@ -317,25 +340,29 @@ public:
317 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 340 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
318 header.common2.threads_per_input_primitive); 341 header.common2.threads_per_input_primitive);
319 break; 342 break;
320 case ShaderType::TesselationEval: 343 case ShaderType::TesselationEval: {
344 const auto& info = registry.GetGraphicsInfo();
321 AddCapability(spv::Capability::Tessellation); 345 AddCapability(spv::Capability::Tessellation);
322 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); 346 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
323 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); 347 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
324 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); 348 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
325 AddExecutionMode(main, specialization.tessellation.clockwise 349 AddExecutionMode(main, info.tessellation_clockwise
326 ? spv::ExecutionMode::VertexOrderCw 350 ? spv::ExecutionMode::VertexOrderCw
327 : spv::ExecutionMode::VertexOrderCcw); 351 : spv::ExecutionMode::VertexOrderCcw);
328 break; 352 break;
329 case ShaderType::Geometry: 353 }
354 case ShaderType::Geometry: {
355 const auto& info = registry.GetGraphicsInfo();
330 AddCapability(spv::Capability::Geometry); 356 AddCapability(spv::Capability::Geometry);
331 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); 357 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
332 AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); 358 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
333 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); 359 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
334 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 360 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
335 header.common4.max_output_vertices); 361 header.common4.max_output_vertices);
336 // TODO(Rodrigo): Where can we get this info from? 362 // TODO(Rodrigo): Where can we get this info from?
337 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); 363 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
338 break; 364 break;
365 }
339 case ShaderType::Fragment: 366 case ShaderType::Fragment:
340 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); 367 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
341 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); 368 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -544,7 +571,8 @@ private:
544 if (stage != ShaderType::Geometry) { 571 if (stage != ShaderType::Geometry) {
545 return; 572 return;
546 } 573 }
547 const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 574 const auto& info = registry.GetGraphicsInfo();
575 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
548 DeclareInputVertexArray(num_input); 576 DeclareInputVertexArray(num_input);
549 DeclareOutputVertex(); 577 DeclareOutputVertex();
550 } 578 }
@@ -741,12 +769,34 @@ private:
741 } 769 }
742 770
743 void DeclareOutputAttributes() { 771 void DeclareOutputAttributes() {
772 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
773 return;
774 }
775
776 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
744 for (const auto index : ir.GetOutputAttributes()) { 777 for (const auto index : ir.GetOutputAttributes()) {
745 if (!IsGenericAttribute(index)) { 778 if (!IsGenericAttribute(index)) {
746 continue; 779 continue;
747 } 780 }
748 const u32 location = GetGenericAttributeLocation(index); 781 DeclareOutputAttribute(index);
749 Id type = t_float4; 782 }
783 }
784
785 void DeclareOutputAttribute(Attribute::Index index) {
786 static constexpr std::string_view swizzle = "xyzw";
787
788 const u32 location = GetGenericAttributeLocation(index);
789 u8 element = 0;
790 while (element < 4) {
791 const std::size_t remainder = 4 - element;
792
793 std::size_t num_components = remainder;
794 const std::optional tfb = GetTransformFeedbackInfo(index, element);
795 if (tfb) {
796 num_components = tfb->components;
797 }
798
799 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
750 Id varying_default = v_varying_default; 800 Id varying_default = v_varying_default;
751 if (IsOutputAttributeArray()) { 801 if (IsOutputAttributeArray()) {
752 const u32 num = GetNumOutputVertices(); 802 const u32 num = GetNumOutputVertices();
@@ -759,15 +809,47 @@ private:
759 } 809 }
760 type = TypePointer(spv::StorageClass::Output, type); 810 type = TypePointer(spv::StorageClass::Output, type);
761 811
812 std::string name = fmt::format("out_attr{}", location);
813 if (num_components < 4 || element > 0) {
814 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
815 }
816
762 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); 817 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
763 Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); 818 Name(AddGlobalVariable(id), name);
764 output_attributes.emplace(index, id); 819
820 GenericVaryingDescription description;
821 description.id = id;
822 description.first_element = element;
823 description.is_scalar = num_components == 1;
824 for (u32 i = 0; i < num_components; ++i) {
825 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
826 output_attributes.emplace(offset, description);
827 }
765 interfaces.push_back(id); 828 interfaces.push_back(id);
766 829
767 Decorate(id, spv::Decoration::Location, location); 830 Decorate(id, spv::Decoration::Location, location);
831 if (element > 0) {
832 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
833 }
834 if (tfb && device.IsExtTransformFeedbackSupported()) {
835 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
836 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
837 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
838 }
839
840 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
768 } 841 }
769 } 842 }
770 843
844 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
845 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
846 const auto it = transform_feedback.find(location);
847 if (it == transform_feedback.end()) {
848 return {};
849 }
850 return it->second;
851 }
852
771 u32 DeclareConstantBuffers(u32 binding) { 853 u32 DeclareConstantBuffers(u32 binding) {
772 for (const auto& [index, size] : ir.GetConstantBuffers()) { 854 for (const auto& [index, size] : ir.GetConstantBuffers()) {
773 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo 855 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
@@ -833,16 +915,20 @@ private:
833 constexpr int sampled = 1; 915 constexpr int sampled = 1;
834 constexpr auto format = spv::ImageFormat::Unknown; 916 constexpr auto format = spv::ImageFormat::Unknown;
835 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); 917 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
836 const Id sampled_image_type = TypeSampledImage(image_type); 918 const Id sampler_type = TypeSampledImage(image_type);
837 const Id pointer_type = 919 const Id sampler_pointer_type =
838 TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); 920 TypePointer(spv::StorageClass::UniformConstant, sampler_type);
921 const Id type = sampler.IsIndexed()
922 ? TypeArray(sampler_type, Constant(t_uint, sampler.Size()))
923 : sampler_type;
924 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
839 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); 925 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
840 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); 926 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
841 Decorate(id, spv::Decoration::Binding, binding++); 927 Decorate(id, spv::Decoration::Binding, binding++);
842 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); 928 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
843 929
844 sampled_images.emplace(sampler.GetIndex(), 930 sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type,
845 SampledImage{image_type, sampled_image_type, id}); 931 sampler_pointer_type, id});
846 } 932 }
847 return binding; 933 return binding;
848 } 934 }
@@ -893,7 +979,7 @@ private:
893 u32 GetNumInputVertices() const { 979 u32 GetNumInputVertices() const {
894 switch (stage) { 980 switch (stage) {
895 case ShaderType::Geometry: 981 case ShaderType::Geometry:
896 return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 982 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
897 case ShaderType::TesselationControl: 983 case ShaderType::TesselationControl:
898 case ShaderType::TesselationEval: 984 case ShaderType::TesselationEval:
899 return NumInputPatches; 985 return NumInputPatches;
@@ -1341,8 +1427,14 @@ private:
1341 } 1427 }
1342 default: 1428 default:
1343 if (IsGenericAttribute(attribute)) { 1429 if (IsGenericAttribute(attribute)) {
1344 const Id composite = output_attributes.at(attribute); 1430 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1345 return {ArrayPass(t_out_float, composite, {element}), Type::Float}; 1431 const GenericVaryingDescription description = output_attributes.at(offset);
1432 const Id composite = description.id;
1433 std::vector<u32> indices;
1434 if (!description.is_scalar) {
1435 indices.push_back(element - description.first_element);
1436 }
1437 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1346 } 1438 }
1347 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", 1439 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1348 static_cast<u32>(attribute)); 1440 static_cast<u32>(attribute));
@@ -1525,7 +1617,12 @@ private:
1525 ASSERT(!meta.sampler.IsBuffer()); 1617 ASSERT(!meta.sampler.IsBuffer());
1526 1618
1527 const auto& entry = sampled_images.at(meta.sampler.GetIndex()); 1619 const auto& entry = sampled_images.at(meta.sampler.GetIndex());
1528 return OpLoad(entry.sampled_image_type, entry.sampler); 1620 Id sampler = entry.variable;
1621 if (meta.sampler.IsIndexed()) {
1622 const Id index = AsInt(Visit(meta.index));
1623 sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
1624 }
1625 return OpLoad(entry.sampler_type, sampler);
1529 } 1626 }
1530 1627
1531 Id GetTextureImage(Operation operation) { 1628 Id GetTextureImage(Operation operation) {
@@ -1783,7 +1880,7 @@ private:
1783 } 1880 }
1784 1881
1785 Expression ImageLoad(Operation operation) { 1882 Expression ImageLoad(Operation operation) {
1786 if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { 1883 if (!device.IsFormatlessImageLoadSupported()) {
1787 return {v_float_zero, Type::Float}; 1884 return {v_float_zero, Type::Float};
1788 } 1885 }
1789 1886
@@ -2211,16 +2308,14 @@ private:
2211 switch (specialization.attribute_types.at(location)) { 2308 switch (specialization.attribute_types.at(location)) {
2212 case Maxwell::VertexAttribute::Type::SignedNorm: 2309 case Maxwell::VertexAttribute::Type::SignedNorm:
2213 case Maxwell::VertexAttribute::Type::UnsignedNorm: 2310 case Maxwell::VertexAttribute::Type::UnsignedNorm:
2311 case Maxwell::VertexAttribute::Type::UnsignedScaled:
2312 case Maxwell::VertexAttribute::Type::SignedScaled:
2214 case Maxwell::VertexAttribute::Type::Float: 2313 case Maxwell::VertexAttribute::Type::Float:
2215 return {Type::Float, t_in_float, t_in_float4}; 2314 return {Type::Float, t_in_float, t_in_float4};
2216 case Maxwell::VertexAttribute::Type::SignedInt: 2315 case Maxwell::VertexAttribute::Type::SignedInt:
2217 return {Type::Int, t_in_int, t_in_int4}; 2316 return {Type::Int, t_in_int, t_in_int4};
2218 case Maxwell::VertexAttribute::Type::UnsignedInt: 2317 case Maxwell::VertexAttribute::Type::UnsignedInt:
2219 return {Type::Uint, t_in_uint, t_in_uint4}; 2318 return {Type::Uint, t_in_uint, t_in_uint4};
2220 case Maxwell::VertexAttribute::Type::UnsignedScaled:
2221 case Maxwell::VertexAttribute::Type::SignedScaled:
2222 UNIMPLEMENTED();
2223 return {Type::Float, t_in_float, t_in_float4};
2224 default: 2319 default:
2225 UNREACHABLE(); 2320 UNREACHABLE();
2226 return {Type::Float, t_in_float, t_in_float4}; 2321 return {Type::Float, t_in_float, t_in_float4};
@@ -2250,11 +2345,11 @@ private:
2250 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { 2345 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2251 switch (type) { 2346 switch (type) {
2252 case Type::Float: 2347 case Type::Float:
2253 return {nullptr, t_float2, t_float3, t_float4}; 2348 return {t_float, t_float2, t_float3, t_float4};
2254 case Type::Int: 2349 case Type::Int:
2255 return {nullptr, t_int2, t_int3, t_int4}; 2350 return {t_int, t_int2, t_int3, t_int4};
2256 case Type::Uint: 2351 case Type::Uint:
2257 return {nullptr, t_uint2, t_uint3, t_uint4}; 2352 return {t_uint, t_uint2, t_uint3, t_uint4};
2258 default: 2353 default:
2259 UNIMPLEMENTED(); 2354 UNIMPLEMENTED();
2260 return {}; 2355 return {};
@@ -2487,7 +2582,9 @@ private:
2487 const ShaderIR& ir; 2582 const ShaderIR& ir;
2488 const ShaderType stage; 2583 const ShaderType stage;
2489 const Tegra::Shader::Header header; 2584 const Tegra::Shader::Header header;
2585 const Registry& registry;
2490 const Specialization& specialization; 2586 const Specialization& specialization;
2587 std::unordered_map<u8, VaryingTFB> transform_feedback;
2491 2588
2492 const Id t_void = Name(TypeVoid(), "void"); 2589 const Id t_void = Name(TypeVoid(), "void");
2493 2590
@@ -2576,7 +2673,7 @@ private:
2576 Id shared_memory{}; 2673 Id shared_memory{};
2577 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; 2674 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2578 std::map<Attribute::Index, Id> input_attributes; 2675 std::map<Attribute::Index, Id> input_attributes;
2579 std::map<Attribute::Index, Id> output_attributes; 2676 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2580 std::map<u32, Id> constant_buffers; 2677 std::map<u32, Id> constant_buffers;
2581 std::map<GlobalMemoryBase, Id> global_buffers; 2678 std::map<GlobalMemoryBase, Id> global_buffers;
2582 std::map<u32, TexelBuffer> texel_buffers; 2679 std::map<u32, TexelBuffer> texel_buffers;
@@ -2862,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
2862} 2959}
2863 2960
2864std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 2961std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
2865 ShaderType stage, const Specialization& specialization) { 2962 ShaderType stage, const VideoCommon::Shader::Registry& registry,
2866 return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); 2963 const Specialization& specialization) {
2964 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
2867} 2965}
2868 2966
2869} // namespace Vulkan 2967} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h" 17#include "video_core/engines/shader_type.h"
18#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h" 19#include "video_core/shader/shader_ir.h"
19 20
20namespace Vulkan { 21namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
91 u32 shared_memory_size{}; 92 u32 shared_memory_size{};
92 93
93 // Graphics specific 94 // Graphics specific
94 Maxwell::PrimitiveTopology primitive_topology{};
95 std::optional<float> point_size{}; 95 std::optional<float> point_size{};
96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
97 bool ndc_minus_one_to_one{}; 97 bool ndc_minus_one_to_one{};
98
99 // Tessellation specific
100 struct {
101 Maxwell::TessellationPrimitive primitive{};
102 Maxwell::TessellationSpacing spacing{};
103 bool clockwise{};
104 } tessellation;
105}; 98};
106// Old gcc versions don't consider this trivially copyable. 99// Old gcc versions don't consider this trivially copyable.
107// static_assert(std::is_trivially_copyable_v<Specialization>); 100// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
114ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); 107ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
115 108
116std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 109std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
117 Tegra::Engines::ShaderType stage, const Specialization& specialization); 110 Tegra::Engines::ShaderType stage,
111 const VideoCommon::Shader::Registry& registry,
112 const Specialization& specialization);
118 113
119} // namespace Vulkan 114} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 171d78afc..374959f82 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -73,7 +73,8 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
73VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { 73VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
74 const auto usage = 74 const auto usage =
75 vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | 75 vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
76 vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer; 76 vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
77 vk::BufferUsageFlagBits::eIndexBuffer;
77 const u32 log2 = Common::Log2Ceil64(size); 78 const u32 log2 = Common::Log2Ceil64(size);
78 const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, 79 const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
79 nullptr); 80 nullptr);
@@ -99,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) {
99} 100}
100 101
101u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { 102u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
102 static constexpr u64 epochs_to_destroy = 180;
103 static constexpr std::size_t deletions_per_tick = 16; 103 static constexpr std::size_t deletions_per_tick = 16;
104 104
105 auto& staging = cache[log2]; 105 auto& staging = cache[log2];
@@ -107,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo
107 const std::size_t old_size = entries.size(); 107 const std::size_t old_size = entries.size();
108 108
109 const auto is_deleteable = [this](const auto& entry) { 109 const auto is_deleteable = [this](const auto& entry) {
110 static constexpr u64 epochs_to_destroy = 180;
110 return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); 111 return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
111 }; 112 };
112 const std::size_t begin_offset = staging.delete_index; 113 const std::size_t begin_offset = staging.delete_index;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
new file mode 100644
index 000000000..94a89e388
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -0,0 +1,99 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7#include <iterator>
8
9#include "common/common_types.h"
10#include "core/core.h"
11#include "video_core/dirty_flags.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/gpu.h"
14#include "video_core/renderer_vulkan/vk_state_tracker.h"
15
16#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
17#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
18
19namespace Vulkan {
20
21namespace {
22
23using namespace Dirty;
24using namespace VideoCommon::Dirty;
25using Tegra::Engines::Maxwell3D;
26using Regs = Maxwell3D::Regs;
27using Tables = Maxwell3D::DirtyState::Tables;
28using Table = Maxwell3D::DirtyState::Table;
29using Flags = Maxwell3D::DirtyState::Flags;
30
31Flags MakeInvalidationFlags() {
32 Flags flags{};
33 flags[Viewports] = true;
34 flags[Scissors] = true;
35 flags[DepthBias] = true;
36 flags[BlendConstants] = true;
37 flags[DepthBounds] = true;
38 flags[StencilProperties] = true;
39 return flags;
40}
41
42void SetupDirtyViewports(Tables& tables) {
43 FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
44 FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
45 tables[0][OFF(viewport_transform_enabled)] = Viewports;
46}
47
48void SetupDirtyScissors(Tables& tables) {
49 FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
50}
51
52void SetupDirtyDepthBias(Tables& tables) {
53 auto& table = tables[0];
54 table[OFF(polygon_offset_units)] = DepthBias;
55 table[OFF(polygon_offset_clamp)] = DepthBias;
56 table[OFF(polygon_offset_factor)] = DepthBias;
57}
58
59void SetupDirtyBlendConstants(Tables& tables) {
60 FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
61}
62
63void SetupDirtyDepthBounds(Tables& tables) {
64 FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
65}
66
67void SetupDirtyStencilProperties(Tables& tables) {
68 auto& table = tables[0];
69 table[OFF(stencil_two_side_enable)] = StencilProperties;
70 table[OFF(stencil_front_func_ref)] = StencilProperties;
71 table[OFF(stencil_front_mask)] = StencilProperties;
72 table[OFF(stencil_front_func_mask)] = StencilProperties;
73 table[OFF(stencil_back_func_ref)] = StencilProperties;
74 table[OFF(stencil_back_mask)] = StencilProperties;
75 table[OFF(stencil_back_func_mask)] = StencilProperties;
76}
77
78} // Anonymous namespace
79
80StateTracker::StateTracker(Core::System& system)
81 : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
82
83void StateTracker::Initialize() {
84 auto& dirty = system.GPU().Maxwell3D().dirty;
85 auto& tables = dirty.tables;
86 SetupDirtyRenderTargets(tables);
87 SetupDirtyViewports(tables);
88 SetupDirtyScissors(tables);
89 SetupDirtyDepthBias(tables);
90 SetupDirtyBlendConstants(tables);
91 SetupDirtyDepthBounds(tables);
92 SetupDirtyStencilProperties(tables);
93}
94
95void StateTracker::InvalidateCommandBufferState() {
96 system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
97}
98
99} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
new file mode 100644
index 000000000..03bc415b2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -0,0 +1,79 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <limits>
9
10#include "common/common_types.h"
11#include "core/core.h"
12#include "video_core/dirty_flags.h"
13#include "video_core/engines/maxwell_3d.h"
14
15namespace Vulkan {
16
17namespace Dirty {
18
19enum : u8 {
20 First = VideoCommon::Dirty::LastCommonEntry,
21
22 Viewports,
23 Scissors,
24 DepthBias,
25 BlendConstants,
26 DepthBounds,
27 StencilProperties,
28
29 Last
30};
31static_assert(Last <= std::numeric_limits<u8>::max());
32
33} // namespace Dirty
34
35class StateTracker {
36public:
37 explicit StateTracker(Core::System& system);
38
39 void Initialize();
40
41 void InvalidateCommandBufferState();
42
43 bool TouchViewports() {
44 return Exchange(Dirty::Viewports, false);
45 }
46
47 bool TouchScissors() {
48 return Exchange(Dirty::Scissors, false);
49 }
50
51 bool TouchDepthBias() {
52 return Exchange(Dirty::DepthBias, false);
53 }
54
55 bool TouchBlendConstants() {
56 return Exchange(Dirty::BlendConstants, false);
57 }
58
59 bool TouchDepthBounds() {
60 return Exchange(Dirty::DepthBounds, false);
61 }
62
63 bool TouchStencilProperties() {
64 return Exchange(Dirty::StencilProperties, false);
65 }
66
67private:
68 bool Exchange(std::size_t id, bool new_value) const noexcept {
69 auto& flags = system.GPU().Maxwell3D().dirty.flags;
70 const bool is_dirty = flags[id];
71 flags[id] = new_value;
72 return is_dirty;
73 }
74
75 Core::System& system;
76 Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
77};
78
79} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index f47b691a8..9e73fa9cd 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -141,11 +141,6 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
141 141
142 const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; 142 const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
143 const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; 143 const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
144 extent = ChooseSwapExtent(capabilities, width, height);
145
146 current_width = extent.width;
147 current_height = extent.height;
148 current_srgb = srgb;
149 144
150 u32 requested_image_count{capabilities.minImageCount + 1}; 145 u32 requested_image_count{capabilities.minImageCount + 1};
151 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { 146 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
@@ -153,10 +148,9 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
153 } 148 }
154 149
155 vk::SwapchainCreateInfoKHR swapchain_ci( 150 vk::SwapchainCreateInfoKHR swapchain_ci(
156 {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, 151 {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1,
157 extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, 152 vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform,
158 capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, 153 vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {});
159 {});
160 154
161 const u32 graphics_family{device.GetGraphicsFamily()}; 155 const u32 graphics_family{device.GetGraphicsFamily()};
162 const u32 present_family{device.GetPresentFamily()}; 156 const u32 present_family{device.GetPresentFamily()};
@@ -169,9 +163,18 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
169 swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; 163 swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
170 } 164 }
171 165
166 // Request the size again to reduce the possibility of a TOCTOU race condition.
167 const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld);
168 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
169 // Don't add code within this and the swapchain creation.
172 const auto dev{device.GetLogical()}; 170 const auto dev{device.GetLogical()};
173 swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); 171 swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
174 172
173 extent = swapchain_ci.imageExtent;
174 current_width = extent.width;
175 current_height = extent.height;
176 current_srgb = srgb;
177
175 images = dev.getSwapchainImagesKHR(*swapchain, dld); 178 images = dev.getSwapchainImagesKHR(*swapchain, dld);
176 image_count = static_cast<u32>(images.size()); 179 image_count = static_cast<u32>(images.size());
177 image_format = surface_format.format; 180 image_format = surface_format.format;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 51b0d38a6..26175921b 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -22,6 +22,7 @@
22#include "video_core/renderer_vulkan/vk_device.h" 22#include "video_core/renderer_vulkan/vk_device.h"
23#include "video_core/renderer_vulkan/vk_memory_manager.h" 23#include "video_core/renderer_vulkan/vk_memory_manager.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 24#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_scheduler.h"
25#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 26#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
26#include "video_core/renderer_vulkan/vk_texture_cache.h" 27#include "video_core/renderer_vulkan/vk_texture_cache.h"
27#include "video_core/surface.h" 28#include "video_core/surface.h"
@@ -51,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
51 return vk::ImageType::e2D; 52 return vk::ImageType::e2D;
52 case SurfaceTarget::Texture3D: 53 case SurfaceTarget::Texture3D:
53 return vk::ImageType::e3D; 54 return vk::ImageType::e3D;
55 case SurfaceTarget::TextureBuffer:
56 UNREACHABLE();
57 return {};
54 } 58 }
55 UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); 59 UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
56 return {}; 60 return {};
@@ -272,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
272 276
273 for (u32 level = 0; level < params.num_levels; ++level) { 277 for (u32 level = 0; level < params.num_levels; ++level) {
274 vk::BufferImageCopy copy = GetBufferImageCopy(level); 278 vk::BufferImageCopy copy = GetBufferImageCopy(level);
275 const auto& dld = device.GetDispatchLoader();
276 if (image->GetAspectMask() == 279 if (image->GetAspectMask() ==
277 (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { 280 (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
278 vk::BufferImageCopy depth = copy; 281 vk::BufferImageCopy depth = copy;
@@ -421,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
421 dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, 424 dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
422 vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); 425 vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
423 426
424 const auto& dld{device.GetDispatchLoader()};
425 const vk::ImageSubresourceLayers src_subresource( 427 const vk::ImageSubresourceLayers src_subresource(
426 src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); 428 src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
427 const vk::ImageSubresourceLayers dst_subresource( 429 const vk::ImageSubresourceLayers dst_subresource(
@@ -457,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
457 dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); 459 dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
458 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; 460 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
459 461
460 const auto& dld{device.GetDispatchLoader()};
461 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, 462 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
462 is_linear](auto cmdbuf, auto& dld) { 463 is_linear](auto cmdbuf, auto& dld) {
463 cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, 464 cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
deleted file mode 100644
index 0638be8cb..000000000
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/shader/const_buffer_locker.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Engines::SamplerDescriptor;
16
17ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
18 : stage{shader_stage} {}
19
20ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
21 Tegra::Engines::ConstBufferEngineInterface& engine)
22 : stage{shader_stage}, engine{&engine} {}
23
24ConstBufferLocker::~ConstBufferLocker() = default;
25
26std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
27 const std::pair<u32, u32> key = {buffer, offset};
28 const auto iter = keys.find(key);
29 if (iter != keys.end()) {
30 return iter->second;
31 }
32 if (!engine) {
33 return std::nullopt;
34 }
35 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
36 keys.emplace(key, value);
37 return value;
38}
39
40std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
41 const u32 key = offset;
42 const auto iter = bound_samplers.find(key);
43 if (iter != bound_samplers.end()) {
44 return iter->second;
45 }
46 if (!engine) {
47 return std::nullopt;
48 }
49 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
50 bound_samplers.emplace(key, value);
51 return value;
52}
53
54std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
55 u32 buffer, u32 offset) {
56 const std::pair key = {buffer, offset};
57 const auto iter = bindless_samplers.find(key);
58 if (iter != bindless_samplers.end()) {
59 return iter->second;
60 }
61 if (!engine) {
62 return std::nullopt;
63 }
64 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
65 bindless_samplers.emplace(key, value);
66 return value;
67}
68
69std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
70 if (bound_buffer_saved) {
71 return bound_buffer;
72 }
73 if (!engine) {
74 return std::nullopt;
75 }
76 bound_buffer_saved = true;
77 bound_buffer = engine->GetBoundBuffer();
78 return bound_buffer;
79}
80
81void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
82 keys.insert_or_assign({buffer, offset}, value);
83}
84
85void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
86 bound_samplers.insert_or_assign(offset, sampler);
87}
88
89void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
90 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
91}
92
93void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
94 bound_buffer_saved = true;
95 bound_buffer = buffer;
96}
97
98bool ConstBufferLocker::IsConsistent() const {
99 if (!engine) {
100 return false;
101 }
102 return std::all_of(keys.begin(), keys.end(),
103 [this](const auto& pair) {
104 const auto [cbuf, offset] = pair.first;
105 const auto value = pair.second;
106 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
107 }) &&
108 std::all_of(bound_samplers.begin(), bound_samplers.end(),
109 [this](const auto& sampler) {
110 const auto [key, value] = sampler;
111 return value == engine->AccessBoundSampler(stage, key);
112 }) &&
113 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
114 [this](const auto& sampler) {
115 const auto [cbuf, offset] = sampler.first;
116 const auto value = sampler.second;
117 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
118 });
119}
120
121bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
122 return std::tie(keys, bound_samplers, bindless_samplers) ==
123 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
124}
125
126} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
deleted file mode 100644
index d3ea11087..000000000
--- a/src/video_core/shader/const_buffer_locker.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <unordered_map>
9#include "common/common_types.h"
10#include "common/hash.h"
11#include "video_core/engines/const_buffer_engine_interface.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/guest_driver.h"
14
15namespace VideoCommon::Shader {
16
17using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
18using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
19using BindlessSamplerMap =
20 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
21
22/**
23 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
24 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
25 * shader compilation.
26 */
27class ConstBufferLocker {
28public:
29 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
30
31 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
32 Tegra::Engines::ConstBufferEngineInterface& engine);
33
34 ~ConstBufferLocker();
35
36 /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
37 /// not it will obtain it from maxwell3d and register it.
38 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
39
40 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
41
42 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
43
44 std::optional<u32> ObtainBoundBuffer();
45
46 /// Inserts a key.
47 void InsertKey(u32 buffer, u32 offset, u32 value);
48
49 /// Inserts a bound sampler key.
50 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
51
52 /// Inserts a bindless sampler key.
53 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
54
55 /// Set the bound buffer for this locker.
56 void SetBoundBuffer(u32 buffer);
57
58 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
59 /// the same value, false otherwise;
60 bool IsConsistent() const;
61
62 /// Returns true if the keys are equal to the other ones in the locker.
63 bool HasEqualKeys(const ConstBufferLocker& rhs) const;
64
65 /// Gives an getter to the const buffer keys in the database.
66 const KeyMap& GetKeys() const {
67 return keys;
68 }
69
70 /// Gets samplers database.
71 const BoundSamplerMap& GetBoundSamplers() const {
72 return bound_samplers;
73 }
74
75 /// Gets bindless samplers database.
76 const BindlessSamplerMap& GetBindlessSamplers() const {
77 return bindless_samplers;
78 }
79
80 /// Gets bound buffer used on this shader
81 u32 GetBoundBuffer() const {
82 return bound_buffer;
83 }
84
85 /// Obtains access to the guest driver's profile.
86 VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
87 if (engine) {
88 return &engine->AccessGuestDriverProfile();
89 }
90 return nullptr;
91 }
92
93private:
94 const Tegra::Engines::ShaderType stage;
95 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
96 KeyMap keys;
97 BoundSamplerMap bound_samplers;
98 BindlessSamplerMap bindless_samplers;
99 bool bound_buffer_saved{};
100 u32 bound_buffer{};
101};
102
103} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 0229733b6..2e2711350 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/shader/ast.h" 14#include "video_core/shader/ast.h"
15#include "video_core/shader/control_flow.h" 15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h" 17#include "video_core/shader/shader_ir.h"
17 18
18namespace VideoCommon::Shader { 19namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
64}; 65};
65 66
66struct CFGRebuildState { 67struct CFGRebuildState {
67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) 68 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
68 : program_code{program_code}, locker{locker}, start{start} {} 69 : program_code{program_code}, registry{registry}, start{start} {}
69 70
70 const ProgramCode& program_code; 71 const ProgramCode& program_code;
71 ConstBufferLocker& locker; 72 Registry& registry;
72 u32 start{}; 73 u32 start{};
73 std::vector<BlockInfo> block_info; 74 std::vector<BlockInfo> block_info;
74 std::list<u32> inspect_queries; 75 std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
438 const s32 pc_target = offset + result.relative_position; 439 const s32 pc_target = offset + result.relative_position;
439 std::vector<CaseBranch> branches; 440 std::vector<CaseBranch> branches;
440 for (u32 i = 0; i < result.entries; i++) { 441 for (u32 i = 0; i < result.entries; i++) {
441 auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); 442 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
442 if (!key) { 443 if (!key) {
443 return {ParseResult::AbnormalFlow, parse_info}; 444 return {ParseResult::AbnormalFlow, parse_info};
444 } 445 }
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
656 657
657std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 658std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
658 const CompilerSettings& settings, 659 const CompilerSettings& settings,
659 ConstBufferLocker& locker) { 660 Registry& registry) {
660 auto result_out = std::make_unique<ShaderCharacteristics>(); 661 auto result_out = std::make_unique<ShaderCharacteristics>();
661 if (settings.depth == CompileDepth::BruteForce) { 662 if (settings.depth == CompileDepth::BruteForce) {
662 result_out->settings.depth = CompileDepth::BruteForce; 663 result_out->settings.depth = CompileDepth::BruteForce;
663 return result_out; 664 return result_out;
664 } 665 }
665 666
666 CFGRebuildState state{program_code, start_address, locker}; 667 CFGRebuildState state{program_code, start_address, registry};
667 // Inspect Code and generate blocks 668 // Inspect Code and generate blocks
668 state.labels.clear(); 669 state.labels.clear();
669 state.labels.emplace(start_address); 670 state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5304998b9..62a3510d8 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -12,6 +12,7 @@
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/ast.h" 13#include "video_core/shader/ast.h"
14#include "video_core/shader/compiler_settings.h" 14#include "video_core/shader/compiler_settings.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
111 112
112std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
113 const CompilerSettings& settings, 114 const CompilerSettings& settings,
114 ConstBufferLocker& locker); 115 Registry& registry);
115 116
116} // namespace VideoCommon::Shader 117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6b697ed5d..87ac9ac6c 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
34 return (absolute_offset % SchedPeriod) == 0; 34 return (absolute_offset % SchedPeriod) == 0;
35} 35}
36 36
37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, 37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
38 const std::list<Sampler>& used_samplers) { 38 const std::list<Sampler>& used_samplers) {
39 if (gpu_driver == nullptr) { 39 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
40 LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
41 return;
42 }
43 if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
44 return; 40 return;
45 } 41 }
46 u32 count{}; 42 u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
53 bound_offsets.emplace_back(sampler.GetOffset()); 49 bound_offsets.emplace_back(sampler.GetOffset());
54 } 50 }
55 if (count > 1) { 51 if (count > 1) {
56 gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); 52 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
57 } 53 }
58} 54}
59 55
60std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, 56std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
61 VideoCore::GuestDriverProfile* gpu_driver, 57 VideoCore::GuestDriverProfile& gpu_driver,
62 const std::list<Sampler>& used_samplers) { 58 const std::list<Sampler>& used_samplers) {
63 if (gpu_driver == nullptr) {
64 LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
65 return std::nullopt;
66 }
67 const u32 base_offset = sampler_to_deduce.GetOffset(); 59 const u32 base_offset = sampler_to_deduce.GetOffset();
68 u32 max_offset{std::numeric_limits<u32>::max()}; 60 u32 max_offset{std::numeric_limits<u32>::max()};
69 for (const auto& sampler : used_samplers) { 61 for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
77 if (max_offset == std::numeric_limits<u32>::max()) { 69 if (max_offset == std::numeric_limits<u32>::max()) {
78 return std::nullopt; 70 return std::nullopt;
79 } 71 }
80 return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); 72 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
81} 73}
82 74
83} // Anonymous namespace 75} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
149 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 141 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
150 142
151 decompiled = false; 143 decompiled = false;
152 auto info = ScanFlow(program_code, main_offset, settings, locker); 144 auto info = ScanFlow(program_code, main_offset, settings, registry);
153 auto& shader_info = *info; 145 auto& shader_info = *info;
154 coverage_begin = shader_info.start; 146 coverage_begin = shader_info.start;
155 coverage_end = shader_info.end; 147 coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
364 356
365void ShaderIR::PostDecode() { 357void ShaderIR::PostDecode() {
366 // Deduce texture handler size if needed 358 // Deduce texture handler size if needed
367 auto gpu_driver = locker.AccessGuestDriverProfile(); 359 auto gpu_driver = registry.AccessGuestDriverProfile();
368 DeduceTextureHandlerSize(gpu_driver, used_samplers); 360 DeduceTextureHandlerSize(gpu_driver, used_samplers);
369 // Deduce Indexed Samplers 361 // Deduce Indexed Samplers
370 if (!uses_indexed_samplers) { 362 if (!uses_indexed_samplers) {
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF(instr.bfe.negate_b);
21
22 Node op_a = GetRegister(instr.gpr8); 20 Node op_a = GetRegister(instr.gpr8);
23 op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); 21 Node op_b = [&] {
24 22 switch (opcode->get().GetId()) {
25 switch (opcode->get().GetId()) { 23 case OpCode::Id::BFE_R:
26 case OpCode::Id::BFE_IMM: { 24 return GetRegister(instr.gpr20);
27 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 25 case OpCode::Id::BFE_C:
28 "Condition codes generation in BFE is not implemented"); 26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
29 34
30 const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); 35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
31 const Node outer_shift_imm =
32 Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
33 36
34 const Node inner_shift = 37 const bool is_signed = instr.bfe.is_signed;
35 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
36 const Node outer_shift =
37 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
38 38
39 SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); 39 // using reverse parallel method in
40 SetRegister(bb, instr.gpr0, outer_shift); 40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 break; 41 // note for later if possible to implement faster method.
42 } 42 if (instr.bfe.brev) {
43 default: 43 const auto swap = [&](u32 s, u32 mask) {
44 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); 44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
45 } 65 }
46 66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
47 return pc; 74 return pc;
48} 75}
49 76
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bee7d8cad..48350e042 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -12,6 +12,7 @@
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h" 14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
359 if (sampler_info) { 360 if (sampler_info) {
360 return *sampler_info; 361 return *sampler_info;
361 } 362 }
362 const auto sampler = 363 const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
363 buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); 364 : registry.ObtainBoundSampler(offset);
364 if (!sampler) { 365 if (!sampler) {
365 LOG_WARNING(HW_GPU, "Unknown sampler info"); 366 LOG_WARNING(HW_GPU, "Unknown sampler info");
366 return SamplerInfo{TextureType::Texture2D, false, false, false}; 367 return SamplerInfo{TextureType::Texture2D, false, false, false};
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 206961909..fbd7e9a17 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -12,6 +12,7 @@ namespace VideoCommon::Shader {
12 12
13using Tegra::Shader::Instruction; 13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::PredCondition;
15 16
16u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
@@ -63,15 +64,18 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
63 } 64 }
64 }(); 65 }();
65 66
66 op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); 67 op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
68 instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
67 69
68 const Node original_b = op_b; 70 const Node original_b = op_b;
69 op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); 71 op_b = SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b),
72 is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
70 73
71 // TODO(Rodrigo): Use an appropiate sign for this operation 74 // we already check sign_a and sign_b is difference or not before so just use one in here.
72 Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); 75 Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
73 if (is_psl) { 76 if (is_psl) {
74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); 77 product =
78 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
75 } 79 }
76 SetTemporary(bb, 0, product); 80 SetTemporary(bb, 0, product);
77 product = GetTemporary(0); 81 product = GetTemporary(0);
@@ -88,12 +92,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
88 return BitfieldExtract(original_c, 16, 16); 92 return BitfieldExtract(original_c, 16, 16);
89 case Tegra::Shader::XmadMode::CBcc: { 93 case Tegra::Shader::XmadMode::CBcc: {
90 const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, 94 const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
91 NO_PRECISE, original_b, Immediate(16)); 95 original_b, Immediate(16));
92 return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, 96 return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
93 shifted_b); 97 }
98 case Tegra::Shader::XmadMode::CSfu: {
99 const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
100 op_a, Immediate(0));
101 const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
102 op_b, Immediate(0));
103 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
104
105 const Node comp_minus_a = GetPredicateComparisonInteger(
106 PredCondition::NotEqual, is_signed_a,
107 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
108 Immediate(0x80000000)),
109 Immediate(0));
110 const Node comp_minus_b = GetPredicateComparisonInteger(
111 PredCondition::NotEqual, is_signed_b,
112 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
113 Immediate(0x80000000)),
114 Immediate(0));
115
116 Node new_c = Operation(
117 OperationCode::Select, comp_minus_a,
118 SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
119 original_c);
120 new_c = Operation(
121 OperationCode::Select, comp_minus_b,
122 SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
123 std::move(new_c));
124
125 return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
94 } 126 }
95 default: 127 default:
96 UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); 128 UNREACHABLE();
97 return Immediate(0); 129 return Immediate(0);
98 } 130 }
99 }(); 131 }();
@@ -102,18 +134,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
102 op_c = GetTemporary(1); 134 op_c = GetTemporary(1);
103 135
104 // TODO(Rodrigo): Use an appropiate sign for this operation 136 // TODO(Rodrigo): Use an appropiate sign for this operation
105 Node sum = Operation(OperationCode::IAdd, product, op_c); 137 Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
106 SetTemporary(bb, 2, sum); 138 SetTemporary(bb, 2, sum);
107 sum = GetTemporary(2); 139 sum = GetTemporary(2);
108 if (is_merge) { 140 if (is_merge) {
109 const Node a = BitfieldExtract(sum, 0, 16); 141 const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
110 const Node b = 142 Immediate(0), Immediate(16));
111 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); 143 const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
112 sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); 144 Immediate(16));
145 sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
113 } 146 }
114 147
115 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); 148 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
116 SetRegister(bb, instr.gpr0, sum); 149 SetRegister(bb, instr.gpr0, std::move(sum));
117 150
118 return pc; 151 return pc;
119} 152}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a0a7b9111..a1828546e 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -299,7 +299,7 @@ private:
299 u32 index{}; ///< Emulated index given for the this sampler. 299 u32 index{}; ///< Emulated index given for the this sampler.
300 u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. 300 u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
301 u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). 301 u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
302 u32 size{}; ///< Size of the sampler if indexed. 302 u32 size{1}; ///< Size of the sampler.
303 303
304 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) 304 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
305 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. 305 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
68 return OperationCode::UBitwiseXor; 68 return OperationCode::UBitwiseXor;
69 case OperationCode::IBitwiseNot: 69 case OperationCode::IBitwiseNot:
70 return OperationCode::UBitwiseNot; 70 return OperationCode::UBitwiseNot;
71 case OperationCode::IBitfieldExtract:
72 return OperationCode::UBitfieldExtract;
71 case OperationCode::IBitfieldInsert: 73 case OperationCode::IBitfieldInsert:
72 return OperationCode::UBitfieldInsert; 74 return OperationCode::UBitfieldInsert;
73 case OperationCode::IBitCount: 75 case OperationCode::IBitCount:
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
new file mode 100644
index 000000000..af70b3f35
--- /dev/null
+++ b/src/video_core/shader/registry.cpp
@@ -0,0 +1,161 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/engines/kepler_compute.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/shader/registry.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Engines::ConstBufferEngineInterface;
18using Tegra::Engines::SamplerDescriptor;
19using Tegra::Engines::ShaderType;
20
21namespace {
22
23GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
24 if (shader_stage == ShaderType::Compute) {
25 return {};
26 }
27 auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
28
29 GraphicsInfo info;
30 info.tfb_layouts = graphics.regs.tfb_layouts;
31 info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
32 info.primitive_topology = graphics.regs.draw.topology;
33 info.tessellation_primitive = graphics.regs.tess_mode.prim;
34 info.tessellation_spacing = graphics.regs.tess_mode.spacing;
35 info.tfb_enabled = graphics.regs.tfb_enabled;
36 info.tessellation_clockwise = graphics.regs.tess_mode.cw;
37 return info;
38}
39
40ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
41 if (shader_stage != ShaderType::Compute) {
42 return {};
43 }
44 auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
45 const auto& launch = compute.launch_description;
46
47 ComputeInfo info;
48 info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
49 info.local_memory_size_in_words = launch.local_pos_alloc;
50 info.shared_memory_size_in_words = launch.shared_alloc;
51 return info;
52}
53
54} // Anonymous namespace
55
56Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
57 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
58 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
59
60Registry::Registry(Tegra::Engines::ShaderType shader_stage,
61 Tegra::Engines::ConstBufferEngineInterface& engine)
62 : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
63 graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
64 shader_stage, engine)} {}
65
66Registry::~Registry() = default;
67
68std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
69 const std::pair<u32, u32> key = {buffer, offset};
70 const auto iter = keys.find(key);
71 if (iter != keys.end()) {
72 return iter->second;
73 }
74 if (!engine) {
75 return std::nullopt;
76 }
77 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
78 keys.emplace(key, value);
79 return value;
80}
81
82std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
83 const u32 key = offset;
84 const auto iter = bound_samplers.find(key);
85 if (iter != bound_samplers.end()) {
86 return iter->second;
87 }
88 if (!engine) {
89 return std::nullopt;
90 }
91 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
92 bound_samplers.emplace(key, value);
93 return value;
94}
95
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
97 u32 offset) {
98 const std::pair key = {buffer, offset};
99 const auto iter = bindless_samplers.find(key);
100 if (iter != bindless_samplers.end()) {
101 return iter->second;
102 }
103 if (!engine) {
104 return std::nullopt;
105 }
106 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
107 bindless_samplers.emplace(key, value);
108 return value;
109}
110
111void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
112 keys.insert_or_assign({buffer, offset}, value);
113}
114
115void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
116 bound_samplers.insert_or_assign(offset, sampler);
117}
118
119void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
120 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
121}
122
123bool Registry::IsConsistent() const {
124 if (!engine) {
125 return true;
126 }
127 return std::all_of(keys.begin(), keys.end(),
128 [this](const auto& pair) {
129 const auto [cbuf, offset] = pair.first;
130 const auto value = pair.second;
131 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
132 }) &&
133 std::all_of(bound_samplers.begin(), bound_samplers.end(),
134 [this](const auto& sampler) {
135 const auto [key, value] = sampler;
136 return value == engine->AccessBoundSampler(stage, key);
137 }) &&
138 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
139 [this](const auto& sampler) {
140 const auto [cbuf, offset] = sampler.first;
141 const auto value = sampler.second;
142 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
143 });
144}
145
146bool Registry::HasEqualKeys(const Registry& rhs) const {
147 return std::tie(keys, bound_samplers, bindless_samplers) ==
148 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
149}
150
151const GraphicsInfo& Registry::GetGraphicsInfo() const {
152 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
153 return graphics_info;
154}
155
156const ComputeInfo& Registry::GetComputeInfo() const {
157 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
158 return compute_info;
159}
160
161} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
new file mode 100644
index 000000000..0c80d35fd
--- /dev/null
+++ b/src/video_core/shader/registry.h
@@ -0,0 +1,137 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h"
18#include "video_core/guest_driver.h"
19
20namespace VideoCommon::Shader {
21
22using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
23using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
24using BindlessSamplerMap =
25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
26
27struct GraphicsInfo {
28 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
29
30 std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
31 tfb_layouts{};
32 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
33 Maxwell::PrimitiveTopology primitive_topology{};
34 Maxwell::TessellationPrimitive tessellation_primitive{};
35 Maxwell::TessellationSpacing tessellation_spacing{};
36 bool tfb_enabled = false;
37 bool tessellation_clockwise = false;
38};
39static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
40 std::is_standard_layout_v<GraphicsInfo>);
41
42struct ComputeInfo {
43 std::array<u32, 3> workgroup_size{};
44 u32 shared_memory_size_in_words = 0;
45 u32 local_memory_size_in_words = 0;
46};
47static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
48
49struct SerializedRegistryInfo {
50 VideoCore::GuestDriverProfile guest_driver_profile;
51 u32 bound_buffer = 0;
52 GraphicsInfo graphics;
53 ComputeInfo compute;
54};
55
56/**
57 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
58 * With it, the shader can obtain required data from GPU state and store it for disk shader
59 * compilation.
60 */
61class Registry {
62public:
63 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
64
65 explicit Registry(Tegra::Engines::ShaderType shader_stage,
66 Tegra::Engines::ConstBufferEngineInterface& engine);
67
68 ~Registry();
69
70 /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
71 /// not it will obtain it from maxwell3d and register it.
72 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
73
74 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
75
76 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
77
78 /// Inserts a key.
79 void InsertKey(u32 buffer, u32 offset, u32 value);
80
81 /// Inserts a bound sampler key.
82 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
83
84 /// Inserts a bindless sampler key.
85 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
86
87 /// Checks keys and samplers against engine's current const buffers.
88 /// Returns true if they are the same value, false otherwise.
89 bool IsConsistent() const;
90
91 /// Returns true if the keys are equal to the other ones in the registry.
92 bool HasEqualKeys(const Registry& rhs) const;
93
94 /// Returns graphics information from this shader
95 const GraphicsInfo& GetGraphicsInfo() const;
96
97 /// Returns compute information from this shader
98 const ComputeInfo& GetComputeInfo() const;
99
100 /// Gives an getter to the const buffer keys in the database.
101 const KeyMap& GetKeys() const {
102 return keys;
103 }
104
105 /// Gets samplers database.
106 const BoundSamplerMap& GetBoundSamplers() const {
107 return bound_samplers;
108 }
109
110 /// Gets bindless samplers database.
111 const BindlessSamplerMap& GetBindlessSamplers() const {
112 return bindless_samplers;
113 }
114
115 /// Gets bound buffer used on this shader
116 u32 GetBoundBuffer() const {
117 return bound_buffer;
118 }
119
120 /// Obtains access to the guest driver's profile.
121 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
122 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
123 }
124
125private:
126 const Tegra::Engines::ShaderType stage;
127 VideoCore::GuestDriverProfile stored_guest_driver_profile;
128 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
129 KeyMap keys;
130 BoundSamplerMap bound_samplers;
131 BindlessSamplerMap bindless_samplers;
132 u32 bound_buffer;
133 GraphicsInfo graphics_info;
134 ComputeInfo compute_info;
135};
136
137} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 3a5d280a9..425927777 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -11,6 +11,7 @@
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node_helper.h" 13#include "video_core/shader/node_helper.h"
14#include "video_core/shader/registry.h"
14#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
15 16
16namespace VideoCommon::Shader { 17namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
24using Tegra::Shader::Register; 25using Tegra::Shader::Register;
25 26
26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 27ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
27 ConstBufferLocker& locker) 28 Registry& registry)
28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { 29 : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
29 Decode(); 30 Decode();
30 PostDecode(); 31 PostDecode();
31} 32}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index b0851c3be..dde036b40 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,8 +18,8 @@
18#include "video_core/engines/shader_header.h" 18#include "video_core/engines/shader_header.h"
19#include "video_core/shader/ast.h" 19#include "video_core/shader/ast.h"
20#include "video_core/shader/compiler_settings.h" 20#include "video_core/shader/compiler_settings.h"
21#include "video_core/shader/const_buffer_locker.h"
22#include "video_core/shader/node.h" 21#include "video_core/shader/node.h"
22#include "video_core/shader/registry.h"
23 23
24namespace VideoCommon::Shader { 24namespace VideoCommon::Shader {
25 25
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
69class ShaderIR final { 69class ShaderIR final {
70public: 70public:
71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
72 ConstBufferLocker& locker); 72 Registry& registry);
73 ~ShaderIR(); 73 ~ShaderIR();
74 74
75 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 75 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -414,7 +414,7 @@ private:
414 const ProgramCode& program_code; 414 const ProgramCode& program_code;
415 const u32 main_offset; 415 const u32 main_offset;
416 const CompilerSettings settings; 416 const CompilerSettings settings;
417 ConstBufferLocker& locker; 417 Registry& registry;
418 418
419 bool decompiled{}; 419 bool decompiled{};
420 bool disable_flow_stack{}; 420 bool disable_flow_stack{};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index face8c943..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); 81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
82 return {tracked, track}; 82 return {tracked, track};
83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { 83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
84 auto bound_buffer = locker.ObtainBoundBuffer(); 84 const u32 bound_buffer = registry.GetBoundBuffer();
85 if (!bound_buffer) { 85 if (bound_buffer != cbuf->GetIndex()) {
86 return {}; 86 return {};
87 } 87 }
88 if (*bound_buffer != cbuf->GetIndex()) { 88 const auto pair = DecoupleIndirectRead(*operation);
89 return {};
90 }
91 auto pair = DecoupleIndirectRead(*operation);
92 if (!pair) { 89 if (!pair) {
93 return {}; 90 return {};
94 } 91 }
95 auto [gpr, base_offset] = *pair; 92 auto [gpr, base_offset] = *pair;
96 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); 93 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
97 auto gpu_driver = locker.AccessGuestDriverProfile(); 94 const auto& gpu_driver = registry.AccessGuestDriverProfile();
98 if (gpu_driver == nullptr) {
99 return {};
100 }
101 const u32 bindless_cv = NewCustomVariable(); 95 const u32 bindless_cv = NewCustomVariable();
102 const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, 96 const Node op =
103 Immediate(gpu_driver->GetTextureHandlerSize())); 97 Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
104 98
105 const Node cv_node = GetCustomVariable(bindless_cv); 99 const Node cv_node = GetCustomVariable(bindless_cv);
106 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); 100 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
@@ -157,13 +151,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
157 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { 151 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
158 return {}; 152 return {};
159 } 153 }
160 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same 154 s64 current_cursor = cursor;
161 // register that it uses as operand 155 while (current_cursor > 0) {
162 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); 156 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
163 if (!source) { 157 // register that it uses as operand
164 return {}; 158 const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1);
159 current_cursor = new_cursor;
160 if (!source) {
161 continue;
162 }
163 const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor);
164 if (base_address != nullptr) {
165 return {base_address, index, offset};
166 }
165 } 167 }
166 return TrackCbuf(source, code, new_cursor); 168 return {};
167 } 169 }
168 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 170 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
169 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { 171 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/registry.h"
13#include "video_core/shader/transform_feedback.h"
14
15namespace VideoCommon::Shader {
16
17namespace {
18
19using Maxwell = Tegra::Engines::Maxwell3D::Regs;
20
21// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
22
23/// Attribute offsets that describe a vector
24constexpr std::array VECTORS = {
25 28, // gl_Position
26 32, // Generic 0
27 36, // Generic 1
28 40, // Generic 2
29 44, // Generic 3
30 48, // Generic 4
31 52, // Generic 5
32 56, // Generic 6
33 60, // Generic 7
34 64, // Generic 8
35 68, // Generic 9
36 72, // Generic 10
37 76, // Generic 11
38 80, // Generic 12
39 84, // Generic 13
40 88, // Generic 14
41 92, // Generic 15
42 96, // Generic 16
43 100, // Generic 17
44 104, // Generic 18
45 108, // Generic 19
46 112, // Generic 20
47 116, // Generic 21
48 120, // Generic 22
49 124, // Generic 23
50 128, // Generic 24
51 132, // Generic 25
52 136, // Generic 26
53 140, // Generic 27
54 144, // Generic 28
55 148, // Generic 29
56 152, // Generic 30
57 156, // Generic 31
58 160, // gl_FrontColor
59 164, // gl_FrontSecondaryColor
60 160, // gl_BackColor
61 164, // gl_BackSecondaryColor
62 192, // gl_TexCoord[0]
63 196, // gl_TexCoord[1]
64 200, // gl_TexCoord[2]
65 204, // gl_TexCoord[3]
66 208, // gl_TexCoord[4]
67 212, // gl_TexCoord[5]
68 216, // gl_TexCoord[6]
69 220, // gl_TexCoord[7]
70};
71} // namespace
72
73std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
74
75 std::unordered_map<u8, VaryingTFB> tfb;
76
77 for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
78 const auto& locations = info.tfb_varying_locs[buffer];
79 const auto& layout = info.tfb_layouts[buffer];
80 const std::size_t varying_count = layout.varying_count;
81
82 std::size_t highest = 0;
83
84 for (std::size_t offset = 0; offset < varying_count; ++offset) {
85 const std::size_t base_offset = offset;
86 const u8 location = locations[offset];
87
88 VaryingTFB varying;
89 varying.buffer = layout.stream;
90 varying.stride = layout.stride;
91 varying.offset = offset * sizeof(u32);
92 varying.components = 1;
93
94 if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
95 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
96
97 const u8 base_index = location / 4;
98 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
99 ++offset;
100 ++varying.components;
101 }
102 }
103
104 [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
105 UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
106
107 highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
108 }
109
110 UNIMPLEMENTED_IF(highest != layout.stride);
111 }
112 return tfb;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/shader/registry.h"
11
12namespace VideoCommon::Shader {
13
14struct VaryingTFB {
15 std::size_t buffer;
16 std::size_t stride;
17 std::size_t offset;
18 std::size_t components;
19};
20
21std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
22
23} // namespace VideoCommon::Shader
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 9707c353d..cc7181229 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
111 return PixelFormat::RGBA16F; 111 return PixelFormat::RGBA16F;
112 case Tegra::RenderTargetFormat::RGBA16_UNORM: 112 case Tegra::RenderTargetFormat::RGBA16_UNORM:
113 return PixelFormat::RGBA16U; 113 return PixelFormat::RGBA16U;
114 case Tegra::RenderTargetFormat::RGBA16_SNORM:
115 return PixelFormat::RGBA16S;
114 case Tegra::RenderTargetFormat::RGBA16_UINT: 116 case Tegra::RenderTargetFormat::RGBA16_UINT:
115 return PixelFormat::RGBA16UI; 117 return PixelFormat::RGBA16UI;
116 case Tegra::RenderTargetFormat::RGBA32_FLOAT: 118 case Tegra::RenderTargetFormat::RGBA32_FLOAT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index d88109e5a..ae8817465 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,82 +25,83 @@ enum class PixelFormat {
25 R8UI = 7, 25 R8UI = 7,
26 RGBA16F = 8, 26 RGBA16F = 8,
27 RGBA16U = 9, 27 RGBA16U = 9,
28 RGBA16UI = 10, 28 RGBA16S = 10,
29 R11FG11FB10F = 11, 29 RGBA16UI = 11,
30 RGBA32UI = 12, 30 R11FG11FB10F = 12,
31 DXT1 = 13, 31 RGBA32UI = 13,
32 DXT23 = 14, 32 DXT1 = 14,
33 DXT45 = 15, 33 DXT23 = 15,
34 DXN1 = 16, // This is also known as BC4 34 DXT45 = 16,
35 DXN2UNORM = 17, 35 DXN1 = 17, // This is also known as BC4
36 DXN2SNORM = 18, 36 DXN2UNORM = 18,
37 BC7U = 19, 37 DXN2SNORM = 19,
38 BC6H_UF16 = 20, 38 BC7U = 20,
39 BC6H_SF16 = 21, 39 BC6H_UF16 = 21,
40 ASTC_2D_4X4 = 22, 40 BC6H_SF16 = 22,
41 BGRA8 = 23, 41 ASTC_2D_4X4 = 23,
42 RGBA32F = 24, 42 BGRA8 = 24,
43 RG32F = 25, 43 RGBA32F = 25,
44 R32F = 26, 44 RG32F = 26,
45 R16F = 27, 45 R32F = 27,
46 R16U = 28, 46 R16F = 28,
47 R16S = 29, 47 R16U = 29,
48 R16UI = 30, 48 R16S = 30,
49 R16I = 31, 49 R16UI = 31,
50 RG16 = 32, 50 R16I = 32,
51 RG16F = 33, 51 RG16 = 33,
52 RG16UI = 34, 52 RG16F = 34,
53 RG16I = 35, 53 RG16UI = 35,
54 RG16S = 36, 54 RG16I = 36,
55 RGB32F = 37, 55 RG16S = 37,
56 RGBA8_SRGB = 38, 56 RGB32F = 38,
57 RG8U = 39, 57 RGBA8_SRGB = 39,
58 RG8S = 40, 58 RG8U = 40,
59 RG32UI = 41, 59 RG8S = 41,
60 RGBX16F = 42, 60 RG32UI = 42,
61 R32UI = 43, 61 RGBX16F = 43,
62 R32I = 44, 62 R32UI = 44,
63 ASTC_2D_8X8 = 45, 63 R32I = 45,
64 ASTC_2D_8X5 = 46, 64 ASTC_2D_8X8 = 46,
65 ASTC_2D_5X4 = 47, 65 ASTC_2D_8X5 = 47,
66 BGRA8_SRGB = 48, 66 ASTC_2D_5X4 = 48,
67 DXT1_SRGB = 49, 67 BGRA8_SRGB = 49,
68 DXT23_SRGB = 50, 68 DXT1_SRGB = 50,
69 DXT45_SRGB = 51, 69 DXT23_SRGB = 51,
70 BC7U_SRGB = 52, 70 DXT45_SRGB = 52,
71 R4G4B4A4U = 53, 71 BC7U_SRGB = 53,
72 ASTC_2D_4X4_SRGB = 54, 72 R4G4B4A4U = 54,
73 ASTC_2D_8X8_SRGB = 55, 73 ASTC_2D_4X4_SRGB = 55,
74 ASTC_2D_8X5_SRGB = 56, 74 ASTC_2D_8X8_SRGB = 56,
75 ASTC_2D_5X4_SRGB = 57, 75 ASTC_2D_8X5_SRGB = 57,
76 ASTC_2D_5X5 = 58, 76 ASTC_2D_5X4_SRGB = 58,
77 ASTC_2D_5X5_SRGB = 59, 77 ASTC_2D_5X5 = 59,
78 ASTC_2D_10X8 = 60, 78 ASTC_2D_5X5_SRGB = 60,
79 ASTC_2D_10X8_SRGB = 61, 79 ASTC_2D_10X8 = 61,
80 ASTC_2D_6X6 = 62, 80 ASTC_2D_10X8_SRGB = 62,
81 ASTC_2D_6X6_SRGB = 63, 81 ASTC_2D_6X6 = 63,
82 ASTC_2D_10X10 = 64, 82 ASTC_2D_6X6_SRGB = 64,
83 ASTC_2D_10X10_SRGB = 65, 83 ASTC_2D_10X10 = 65,
84 ASTC_2D_12X12 = 66, 84 ASTC_2D_10X10_SRGB = 66,
85 ASTC_2D_12X12_SRGB = 67, 85 ASTC_2D_12X12 = 67,
86 ASTC_2D_8X6 = 68, 86 ASTC_2D_12X12_SRGB = 68,
87 ASTC_2D_8X6_SRGB = 69, 87 ASTC_2D_8X6 = 69,
88 ASTC_2D_6X5 = 70, 88 ASTC_2D_8X6_SRGB = 70,
89 ASTC_2D_6X5_SRGB = 71, 89 ASTC_2D_6X5 = 71,
90 E5B9G9R9F = 72, 90 ASTC_2D_6X5_SRGB = 72,
91 E5B9G9R9F = 73,
91 92
92 MaxColorFormat, 93 MaxColorFormat,
93 94
94 // Depth formats 95 // Depth formats
95 Z32F = 73, 96 Z32F = 74,
96 Z16 = 74, 97 Z16 = 75,
97 98
98 MaxDepthFormat, 99 MaxDepthFormat,
99 100
100 // DepthStencil formats 101 // DepthStencil formats
101 Z24S8 = 75, 102 Z24S8 = 76,
102 S8Z24 = 76, 103 S8Z24 = 77,
103 Z32FS8 = 77, 104 Z32FS8 = 78,
104 105
105 MaxDepthStencilFormat, 106 MaxDepthStencilFormat,
106 107
@@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
138 0, // R8UI 139 0, // R8UI
139 0, // RGBA16F 140 0, // RGBA16F
140 0, // RGBA16U 141 0, // RGBA16U
142 0, // RGBA16S
141 0, // RGBA16UI 143 0, // RGBA16UI
142 0, // R11FG11FB10F 144 0, // R11FG11FB10F
143 0, // RGBA32UI 145 0, // RGBA32UI
@@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
235 1, // R8UI 237 1, // R8UI
236 1, // RGBA16F 238 1, // RGBA16F
237 1, // RGBA16U 239 1, // RGBA16U
240 1, // RGBA16S
238 1, // RGBA16UI 241 1, // RGBA16UI
239 1, // R11FG11FB10F 242 1, // R11FG11FB10F
240 1, // RGBA32UI 243 1, // RGBA32UI
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
324 1, // R8UI 327 1, // R8UI
325 1, // RGBA16F 328 1, // RGBA16F
326 1, // RGBA16U 329 1, // RGBA16U
330 1, // RGBA16S
327 1, // RGBA16UI 331 1, // RGBA16UI
328 1, // R11FG11FB10F 332 1, // R11FG11FB10F
329 1, // RGBA32UI 333 1, // RGBA32UI
@@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
413 8, // R8UI 417 8, // R8UI
414 64, // RGBA16F 418 64, // RGBA16F
415 64, // RGBA16U 419 64, // RGBA16U
420 64, // RGBA16S
416 64, // RGBA16UI 421 64, // RGBA16UI
417 32, // R11FG11FB10F 422 32, // R11FG11FB10F
418 128, // RGBA32UI 423 128, // RGBA32UI
@@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
517 SurfaceCompression::None, // R8UI 522 SurfaceCompression::None, // R8UI
518 SurfaceCompression::None, // RGBA16F 523 SurfaceCompression::None, // RGBA16F
519 SurfaceCompression::None, // RGBA16U 524 SurfaceCompression::None, // RGBA16U
525 SurfaceCompression::None, // RGBA16S
520 SurfaceCompression::None, // RGBA16UI 526 SurfaceCompression::None, // RGBA16UI
521 SurfaceCompression::None, // R11FG11FB10F 527 SurfaceCompression::None, // R11FG11FB10F
522 SurfaceCompression::None, // RGBA32UI 528 SurfaceCompression::None, // RGBA32UI
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index cc3ad8417..e151c26c4 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 75> DefinitionTable = {{ 44constexpr std::array<Table, 76> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{
61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, 61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, 62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
63 63
64 {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
64 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, 65 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
65 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, 66 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
66 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, 67 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index f00839313..9931c5ef7 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
113 params.height = tic.Height(); 113 params.height = tic.Height();
114 params.depth = tic.Depth(); 114 params.depth = tic.Depth();
115 params.pitch = params.is_tiled ? 0 : tic.Pitch(); 115 params.pitch = params.is_tiled ? 0 : tic.Pitch();
116 if (params.target == SurfaceTarget::TextureCubemap || 116 if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
117 params.target == SurfaceTarget::TextureCubeArray) { 117 params.depth = 1;
118 } else if (params.target == SurfaceTarget::TextureCubemap ||
119 params.target == SurfaceTarget::TextureCubeArray) {
118 params.depth *= 6; 120 params.depth *= 6;
119 } 121 }
120 params.num_levels = tic.max_mip_level + 1; 122 params.num_levels = tic.max_mip_level + 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c70e4aec2..6cdbe63d0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -22,6 +22,7 @@
22#include "core/core.h" 22#include "core/core.h"
23#include "core/memory.h" 23#include "core/memory.h"
24#include "core/settings.h" 24#include "core/settings.h"
25#include "video_core/dirty_flags.h"
25#include "video_core/engines/fermi_2d.h" 26#include "video_core/engines/fermi_2d.h"
26#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
27#include "video_core/gpu.h" 28#include "video_core/gpu.h"
@@ -103,6 +104,11 @@ public:
103 if (!cache_addr) { 104 if (!cache_addr) {
104 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
105 } 106 }
107
108 if (!IsTypeCompatible(tic.texture_type, entry)) {
109 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
110 }
111
106 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 112 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
107 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 113 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
108 if (guard_samplers) { 114 if (guard_samplers) {
@@ -142,11 +148,10 @@ public:
142 TView GetDepthBufferSurface(bool preserve_contents) { 148 TView GetDepthBufferSurface(bool preserve_contents) {
143 std::lock_guard lock{mutex}; 149 std::lock_guard lock{mutex};
144 auto& maxwell3d = system.GPU().Maxwell3D(); 150 auto& maxwell3d = system.GPU().Maxwell3D();
145 151 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
146 if (!maxwell3d.dirty.depth_buffer) {
147 return depth_buffer.view; 152 return depth_buffer.view;
148 } 153 }
149 maxwell3d.dirty.depth_buffer = false; 154 maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
150 155
151 const auto& regs{maxwell3d.regs}; 156 const auto& regs{maxwell3d.regs};
152 const auto gpu_addr{regs.zeta.Address()}; 157 const auto gpu_addr{regs.zeta.Address()};
@@ -175,10 +180,10 @@ public:
175 std::lock_guard lock{mutex}; 180 std::lock_guard lock{mutex};
176 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 181 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
177 auto& maxwell3d = system.GPU().Maxwell3D(); 182 auto& maxwell3d = system.GPU().Maxwell3D();
178 if (!maxwell3d.dirty.render_target[index]) { 183 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
179 return render_targets[index].view; 184 return render_targets[index].view;
180 } 185 }
181 maxwell3d.dirty.render_target[index] = false; 186 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
182 187
183 const auto& regs{maxwell3d.regs}; 188 const auto& regs{maxwell3d.regs};
184 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 189 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -320,14 +325,14 @@ protected:
320 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 325 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
321 326
322 void ManageRenderTargetUnregister(TSurface& surface) { 327 void ManageRenderTargetUnregister(TSurface& surface) {
323 auto& maxwell3d = system.GPU().Maxwell3D(); 328 auto& dirty = system.GPU().Maxwell3D().dirty;
324 const u32 index = surface->GetRenderTarget(); 329 const u32 index = surface->GetRenderTarget();
325 if (index == DEPTH_RT) { 330 if (index == DEPTH_RT) {
326 maxwell3d.dirty.depth_buffer = true; 331 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
327 } else { 332 } else {
328 maxwell3d.dirty.render_target[index] = true; 333 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true;
329 } 334 }
330 maxwell3d.dirty.render_settings = true; 335 dirty.flags[VideoCommon::Dirty::RenderTargets] = true;
331 } 336 }
332 337
333 void Register(TSurface surface) { 338 void Register(TSurface surface) {
@@ -914,13 +919,15 @@ private:
914 params.width = 1; 919 params.width = 1;
915 params.height = 1; 920 params.height = 1;
916 params.depth = 1; 921 params.depth = 1;
922 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
923 params.depth = 6;
924 }
917 params.pitch = 4; 925 params.pitch = 4;
918 params.num_levels = 1; 926 params.num_levels = 1;
919 params.emulated_levels = 1; 927 params.emulated_levels = 1;
920 params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; 928 params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
921 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 929 params.type = VideoCore::Surface::SurfaceType::ColorTexture;
922 auto surface = CreateSurface(0ULL, params); 930 auto surface = CreateSurface(0ULL, params);
923 invalid_memory.clear();
924 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); 931 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
925 surface->UploadTexture(invalid_memory); 932 surface->UploadTexture(invalid_memory);
926 surface->MarkAsModified(false, Tick()); 933 surface->MarkAsModified(false, Tick());
@@ -1082,6 +1089,36 @@ private:
1082 return siblings_table[static_cast<std::size_t>(format)]; 1089 return siblings_table[static_cast<std::size_t>(format)];
1083 } 1090 }
1084 1091
1092 /// Returns true the shader sampler entry is compatible with the TIC texture type.
1093 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
1094 const VideoCommon::Shader::Sampler& entry) {
1095 const auto shader_type = entry.GetType();
1096 switch (tic_type) {
1097 case Tegra::Texture::TextureType::Texture1D:
1098 case Tegra::Texture::TextureType::Texture1DArray:
1099 return shader_type == Tegra::Shader::TextureType::Texture1D;
1100 case Tegra::Texture::TextureType::Texture1DBuffer:
1101 // TODO(Rodrigo): Assume as valid for now
1102 return true;
1103 case Tegra::Texture::TextureType::Texture2D:
1104 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1105 return shader_type == Tegra::Shader::TextureType::Texture2D;
1106 case Tegra::Texture::TextureType::Texture2DArray:
1107 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1108 shader_type == Tegra::Shader::TextureType::TextureCube;
1109 case Tegra::Texture::TextureType::Texture3D:
1110 return shader_type == Tegra::Shader::TextureType::Texture3D;
1111 case Tegra::Texture::TextureType::TextureCubeArray:
1112 case Tegra::Texture::TextureType::TextureCubemap:
1113 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1114 return true;
1115 }
1116 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
1117 }
1118 UNREACHABLE();
1119 return true;
1120 }
1121
1085 struct FramebufferTargetInfo { 1122 struct FramebufferTargetInfo {
1086 TSurface target; 1123 TSurface target;
1087 TView view; 1124 TView view;
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 33bd31865..062b4f252 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -17,26 +17,37 @@
17 17
18#include <algorithm> 18#include <algorithm>
19#include <cassert> 19#include <cassert>
20#include <cstdint>
21#include <cstring> 20#include <cstring>
22#include <vector> 21#include <vector>
23 22
23#include "common/common_types.h"
24
24#include "video_core/textures/astc.h" 25#include "video_core/textures/astc.h"
25 26
27namespace {
28
29/// Count the number of bits set in a number.
30constexpr u32 Popcnt(u32 n) {
31 u32 c = 0;
32 for (; n; c++) {
33 n &= n - 1;
34 }
35 return c;
36}
37
38} // Anonymous namespace
39
26class InputBitStream { 40class InputBitStream {
27public: 41public:
28 explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) 42 explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
29 : m_CurByte(ptr), m_NextBit(start_offset % 8) {} 43 : m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 44
31 ~InputBitStream() = default; 45 std::size_t GetBitsRead() const {
32
33 int GetBitsRead() const {
34 return m_BitsRead; 46 return m_BitsRead;
35 } 47 }
36 48
37 int ReadBit() { 49 u32 ReadBit() {
38 50 u32 bit = *m_CurByte >> m_NextBit++;
39 int bit = *m_CurByte >> m_NextBit++;
40 while (m_NextBit >= 8) { 51 while (m_NextBit >= 8) {
41 m_NextBit -= 8; 52 m_NextBit -= 8;
42 m_CurByte++; 53 m_CurByte++;
@@ -46,57 +57,66 @@ public:
46 return bit & 1; 57 return bit & 1;
47 } 58 }
48 59
49 unsigned int ReadBits(unsigned int nBits) { 60 u32 ReadBits(std::size_t nBits) {
50 unsigned int ret = 0; 61 u32 ret = 0;
51 for (unsigned int i = 0; i < nBits; i++) { 62 for (std::size_t i = 0; i < nBits; ++i) {
63 ret |= (ReadBit() & 1) << i;
64 }
65 return ret;
66 }
67
68 template <std::size_t nBits>
69 u32 ReadBits() {
70 u32 ret = 0;
71 for (std::size_t i = 0; i < nBits; ++i) {
52 ret |= (ReadBit() & 1) << i; 72 ret |= (ReadBit() & 1) << i;
53 } 73 }
54 return ret; 74 return ret;
55 } 75 }
56 76
57private: 77private:
58 const unsigned char* m_CurByte; 78 const u8* m_CurByte;
59 int m_NextBit = 0; 79 std::size_t m_NextBit = 0;
60 int m_BitsRead = 0; 80 std::size_t m_BitsRead = 0;
61}; 81};
62 82
63class OutputBitStream { 83class OutputBitStream {
64public: 84public:
65 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 85 explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0)
66 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 86 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
67 87
68 ~OutputBitStream() = default; 88 ~OutputBitStream() = default;
69 89
70 int GetBitsWritten() const { 90 s32 GetBitsWritten() const {
71 return m_BitsWritten; 91 return m_BitsWritten;
72 } 92 }
73 93
74 void WriteBitsR(unsigned int val, unsigned int nBits) { 94 void WriteBitsR(u32 val, u32 nBits) {
75 for (unsigned int i = 0; i < nBits; i++) { 95 for (u32 i = 0; i < nBits; i++) {
76 WriteBit((val >> (nBits - i - 1)) & 1); 96 WriteBit((val >> (nBits - i - 1)) & 1);
77 } 97 }
78 } 98 }
79 99
80 void WriteBits(unsigned int val, unsigned int nBits) { 100 void WriteBits(u32 val, u32 nBits) {
81 for (unsigned int i = 0; i < nBits; i++) { 101 for (u32 i = 0; i < nBits; i++) {
82 WriteBit((val >> i) & 1); 102 WriteBit((val >> i) & 1);
83 } 103 }
84 } 104 }
85 105
86private: 106private:
87 void WriteBit(int b) { 107 void WriteBit(s32 b) {
88 108
89 if (done) 109 if (done)
90 return; 110 return;
91 111
92 const unsigned int mask = 1 << m_NextBit++; 112 const u32 mask = 1 << m_NextBit++;
93 113
94 // clear the bit 114 // clear the bit
95 *m_CurByte &= static_cast<unsigned char>(~mask); 115 *m_CurByte &= static_cast<u8>(~mask);
96 116
97 // Write the bit, if necessary 117 // Write the bit, if necessary
98 if (b) 118 if (b)
99 *m_CurByte |= static_cast<unsigned char>(mask); 119 *m_CurByte |= static_cast<u8>(mask);
100 120
101 // Next byte? 121 // Next byte?
102 if (m_NextBit >= 8) { 122 if (m_NextBit >= 8) {
@@ -107,10 +127,10 @@ private:
107 done = done || ++m_BitsWritten >= m_NumBits; 127 done = done || ++m_BitsWritten >= m_NumBits;
108 } 128 }
109 129
110 int m_BitsWritten = 0; 130 s32 m_BitsWritten = 0;
111 const int m_NumBits; 131 const s32 m_NumBits;
112 unsigned char* m_CurByte; 132 u8* m_CurByte;
113 int m_NextBit = 0; 133 s32 m_NextBit = 0;
114 134
115 bool done = false; 135 bool done = false;
116}; 136};
@@ -123,20 +143,20 @@ public:
123 Bits(const Bits&) = delete; 143 Bits(const Bits&) = delete;
124 Bits& operator=(const Bits&) = delete; 144 Bits& operator=(const Bits&) = delete;
125 145
126 uint8_t operator[](uint32_t bitPos) const { 146 u8 operator[](u32 bitPos) const {
127 return static_cast<uint8_t>((m_Bits >> bitPos) & 1); 147 return static_cast<u8>((m_Bits >> bitPos) & 1);
128 } 148 }
129 149
130 IntType operator()(uint32_t start, uint32_t end) const { 150 IntType operator()(u32 start, u32 end) const {
131 if (start == end) { 151 if (start == end) {
132 return (*this)[start]; 152 return (*this)[start];
133 } else if (start > end) { 153 } else if (start > end) {
134 uint32_t t = start; 154 u32 t = start;
135 start = end; 155 start = end;
136 end = t; 156 end = t;
137 } 157 }
138 158
139 uint64_t mask = (1 << (end - start + 1)) - 1; 159 u64 mask = (1 << (end - start + 1)) - 1;
140 return (m_Bits >> start) & static_cast<IntType>(mask); 160 return (m_Bits >> start) & static_cast<IntType>(mask);
141 } 161 }
142 162
@@ -144,273 +164,236 @@ private:
144 const IntType& m_Bits; 164 const IntType& m_Bits;
145}; 165};
146 166
147enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit }; 167enum class IntegerEncoding { JustBits, Qus32, Trit };
148
149class IntegerEncodedValue {
150private:
151 const EIntegerEncoding m_Encoding;
152 const uint32_t m_NumBits;
153 uint32_t m_BitValue;
154 union {
155 uint32_t m_QuintValue;
156 uint32_t m_TritValue;
157 };
158 168
159public: 169struct IntegerEncodedValue {
160 // Jank, but we're not doing any heavy lifting in this class, so it's 170 constexpr IntegerEncodedValue() = default;
161 // probably OK. It allows us to use these in std::vectors...
162 IntegerEncodedValue& operator=(const IntegerEncodedValue& other) {
163 new (this) IntegerEncodedValue(other);
164 return *this;
165 }
166 171
167 IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits) 172 constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
168 : m_Encoding(encoding), m_NumBits(numBits) {} 173 : encoding{encoding_}, num_bits{num_bits_} {}
169 174
170 EIntegerEncoding GetEncoding() const { 175 constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
171 return m_Encoding; 176 return encoding == other.encoding && num_bits == other.num_bits;
172 }
173 uint32_t BaseBitLength() const {
174 return m_NumBits;
175 }
176
177 uint32_t GetBitValue() const {
178 return m_BitValue;
179 }
180 void SetBitValue(uint32_t val) {
181 m_BitValue = val;
182 }
183
184 uint32_t GetTritValue() const {
185 return m_TritValue;
186 }
187 void SetTritValue(uint32_t val) {
188 m_TritValue = val;
189 }
190
191 uint32_t GetQuintValue() const {
192 return m_QuintValue;
193 }
194 void SetQuintValue(uint32_t val) {
195 m_QuintValue = val;
196 }
197
198 bool MatchesEncoding(const IntegerEncodedValue& other) const {
199 return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits;
200 } 177 }
201 178
202 // Returns the number of bits required to encode nVals values. 179 // Returns the number of bits required to encode nVals values.
203 uint32_t GetBitLength(uint32_t nVals) const { 180 u32 GetBitLength(u32 nVals) const {
204 uint32_t totalBits = m_NumBits * nVals; 181 u32 totalBits = num_bits * nVals;
205 if (m_Encoding == eIntegerEncoding_Trit) { 182 if (encoding == IntegerEncoding::Trit) {
206 totalBits += (nVals * 8 + 4) / 5; 183 totalBits += (nVals * 8 + 4) / 5;
207 } else if (m_Encoding == eIntegerEncoding_Quint) { 184 } else if (encoding == IntegerEncoding::Qus32) {
208 totalBits += (nVals * 7 + 2) / 3; 185 totalBits += (nVals * 7 + 2) / 3;
209 } 186 }
210 return totalBits; 187 return totalBits;
211 } 188 }
212 189
213 // Count the number of bits set in a number. 190 IntegerEncoding encoding{};
214 static inline uint32_t Popcnt(uint32_t n) { 191 u32 num_bits = 0;
215 uint32_t c; 192 u32 bit_value = 0;
216 for (c = 0; n; c++) { 193 union {
217 n &= n - 1; 194 u32 qus32_value = 0;
195 u32 trit_value;
196 };
197};
198
199static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
200 u32 nBitsPerValue) {
201 // Implement the algorithm in section C.2.12
202 u32 m[5];
203 u32 t[5];
204 u32 T;
205
206 // Read the trit encoded block according to
207 // table C.2.14
208 m[0] = bits.ReadBits(nBitsPerValue);
209 T = bits.ReadBits<2>();
210 m[1] = bits.ReadBits(nBitsPerValue);
211 T |= bits.ReadBits<2>() << 2;
212 m[2] = bits.ReadBits(nBitsPerValue);
213 T |= bits.ReadBit() << 4;
214 m[3] = bits.ReadBits(nBitsPerValue);
215 T |= bits.ReadBits<2>() << 5;
216 m[4] = bits.ReadBits(nBitsPerValue);
217 T |= bits.ReadBit() << 7;
218
219 u32 C = 0;
220
221 Bits<u32> Tb(T);
222 if (Tb(2, 4) == 7) {
223 C = (Tb(5, 7) << 2) | Tb(0, 1);
224 t[4] = t[3] = 2;
225 } else {
226 C = Tb(0, 4);
227 if (Tb(5, 6) == 3) {
228 t[4] = 2;
229 t[3] = Tb[7];
230 } else {
231 t[4] = Tb[7];
232 t[3] = Tb(5, 6);
218 } 233 }
219 return c;
220 } 234 }
221 235
222 // Returns a new instance of this struct that corresponds to the 236 Bits<u32> Cb(C);
223 // can take no more than maxval values 237 if (Cb(0, 1) == 3) {
224 static IntegerEncodedValue CreateEncoding(uint32_t maxVal) { 238 t[2] = 2;
225 while (maxVal > 0) { 239 t[1] = Cb[4];
226 uint32_t check = maxVal + 1; 240 t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
227 241 } else if (Cb(2, 3) == 3) {
228 // Is maxVal a power of two? 242 t[2] = 2;
229 if (!(check & (check - 1))) { 243 t[1] = 2;
230 return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal)); 244 t[0] = Cb(0, 1);
231 } 245 } else {
232 246 t[2] = Cb[4];
233 // Is maxVal of the type 3*2^n - 1? 247 t[1] = Cb(2, 3);
234 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { 248 t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
235 return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1)); 249 }
236 }
237 250
238 // Is maxVal of the type 5*2^n - 1? 251 for (std::size_t i = 0; i < 5; ++i) {
239 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { 252 IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue);
240 return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1)); 253 val.bit_value = m[i];
241 } 254 val.trit_value = t[i];
255 }
256}
242 257
243 // Apparently it can't be represented with a bounded integer sequence... 258static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
244 // just iterate. 259 u32 nBitsPerValue) {
245 maxVal--; 260 // Implement the algorithm in section C.2.12
261 u32 m[3];
262 u32 q[3];
263 u32 Q;
264
265 // Read the trit encoded block according to
266 // table C.2.15
267 m[0] = bits.ReadBits(nBitsPerValue);
268 Q = bits.ReadBits<3>();
269 m[1] = bits.ReadBits(nBitsPerValue);
270 Q |= bits.ReadBits<2>() << 3;
271 m[2] = bits.ReadBits(nBitsPerValue);
272 Q |= bits.ReadBits<2>() << 5;
273
274 Bits<u32> Qb(Q);
275 if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
276 q[0] = q[1] = 4;
277 q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
278 } else {
279 u32 C = 0;
280 if (Qb(1, 2) == 3) {
281 q[2] = 4;
282 C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
283 } else {
284 q[2] = Qb(5, 6);
285 C = Qb(0, 4);
246 } 286 }
247 return IntegerEncodedValue(eIntegerEncoding_JustBits, 0);
248 }
249
250 // Fills result with the values that are encoded in the given
251 // bitstream. We must know beforehand what the maximum possible
252 // value is, and how many values we're decoding.
253 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
254 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
255 // Determine encoding parameters
256 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
257
258 // Start decoding
259 uint32_t nValsDecoded = 0;
260 while (nValsDecoded < nValues) {
261 switch (val.GetEncoding()) {
262 case eIntegerEncoding_Quint:
263 DecodeQuintBlock(bits, result, val.BaseBitLength());
264 nValsDecoded += 3;
265 break;
266 287
267 case eIntegerEncoding_Trit: 288 Bits<u32> Cb(C);
268 DecodeTritBlock(bits, result, val.BaseBitLength()); 289 if (Cb(0, 2) == 5) {
269 nValsDecoded += 5; 290 q[1] = 4;
270 break; 291 q[0] = Cb(3, 4);
271 292 } else {
272 case eIntegerEncoding_JustBits: 293 q[1] = Cb(3, 4);
273 val.SetBitValue(bits.ReadBits(val.BaseBitLength())); 294 q[0] = Cb(0, 2);
274 result.push_back(val);
275 nValsDecoded++;
276 break;
277 }
278 } 295 }
279 } 296 }
280 297
281private: 298 for (std::size_t i = 0; i < 3; ++i) {
282 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 299 IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue);
283 uint32_t nBitsPerValue) { 300 val.bit_value = m[i];
284 // Implement the algorithm in section C.2.12 301 val.qus32_value = q[i];
285 uint32_t m[5]; 302 }
286 uint32_t t[5]; 303}
287 uint32_t T; 304
288 305// Returns a new instance of this struct that corresponds to the
289 // Read the trit encoded block according to 306// can take no more than maxval values
290 // table C.2.14 307static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) {
291 m[0] = bits.ReadBits(nBitsPerValue); 308 while (maxVal > 0) {
292 T = bits.ReadBits(2); 309 u32 check = maxVal + 1;
293 m[1] = bits.ReadBits(nBitsPerValue); 310
294 T |= bits.ReadBits(2) << 2; 311 // Is maxVal a power of two?
295 m[2] = bits.ReadBits(nBitsPerValue); 312 if (!(check & (check - 1))) {
296 T |= bits.ReadBit() << 4; 313 return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal));
297 m[3] = bits.ReadBits(nBitsPerValue);
298 T |= bits.ReadBits(2) << 5;
299 m[4] = bits.ReadBits(nBitsPerValue);
300 T |= bits.ReadBit() << 7;
301
302 uint32_t C = 0;
303
304 Bits<uint32_t> Tb(T);
305 if (Tb(2, 4) == 7) {
306 C = (Tb(5, 7) << 2) | Tb(0, 1);
307 t[4] = t[3] = 2;
308 } else {
309 C = Tb(0, 4);
310 if (Tb(5, 6) == 3) {
311 t[4] = 2;
312 t[3] = Tb[7];
313 } else {
314 t[4] = Tb[7];
315 t[3] = Tb(5, 6);
316 }
317 } 314 }
318 315
319 Bits<uint32_t> Cb(C); 316 // Is maxVal of the type 3*2^n - 1?
320 if (Cb(0, 1) == 3) { 317 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
321 t[2] = 2; 318 return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1));
322 t[1] = Cb[4];
323 t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
324 } else if (Cb(2, 3) == 3) {
325 t[2] = 2;
326 t[1] = 2;
327 t[0] = Cb(0, 1);
328 } else {
329 t[2] = Cb[4];
330 t[1] = Cb(2, 3);
331 t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
332 } 319 }
333 320
334 for (uint32_t i = 0; i < 5; i++) { 321 // Is maxVal of the type 5*2^n - 1?
335 IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue); 322 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
336 val.SetBitValue(m[i]); 323 return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1));
337 val.SetTritValue(t[i]);
338 result.push_back(val);
339 } 324 }
325
326 // Apparently it can't be represented with a bounded integer sequence...
327 // just iterate.
328 maxVal--;
340 } 329 }
330 return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
331}
341 332
342 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 333static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
343 uint32_t nBitsPerValue) { 334 std::array<IntegerEncodedValue, 256> encodings{};
344 // Implement the algorithm in section C.2.12 335 for (std::size_t i = 0; i < encodings.size(); ++i) {
345 uint32_t m[3]; 336 encodings[i] = CreateEncoding(static_cast<u32>(i));
346 uint32_t q[3]; 337 }
347 uint32_t Q; 338 return encodings;
348 339}
349 // Read the trit encoded block according to
350 // table C.2.15
351 m[0] = bits.ReadBits(nBitsPerValue);
352 Q = bits.ReadBits(3);
353 m[1] = bits.ReadBits(nBitsPerValue);
354 Q |= bits.ReadBits(2) << 3;
355 m[2] = bits.ReadBits(nBitsPerValue);
356 Q |= bits.ReadBits(2) << 5;
357
358 Bits<uint32_t> Qb(Q);
359 if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
360 q[0] = q[1] = 4;
361 q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
362 } else {
363 uint32_t C = 0;
364 if (Qb(1, 2) == 3) {
365 q[2] = 4;
366 C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
367 } else {
368 q[2] = Qb(5, 6);
369 C = Qb(0, 4);
370 }
371 340
372 Bits<uint32_t> Cb(C); 341static constexpr std::array EncodingsValues = MakeEncodedValues();
373 if (Cb(0, 2) == 5) { 342
374 q[1] = 4; 343// Fills result with the values that are encoded in the given
375 q[0] = Cb(3, 4); 344// bitstream. We must know beforehand what the maximum possible
376 } else { 345// value is, and how many values we're decoding.
377 q[1] = Cb(3, 4); 346static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits,
378 q[0] = Cb(0, 2); 347 u32 maxRange, u32 nValues) {
379 } 348 // Determine encoding parameters
380 } 349 IntegerEncodedValue val = EncodingsValues[maxRange];
350
351 // Start decoding
352 u32 nValsDecoded = 0;
353 while (nValsDecoded < nValues) {
354 switch (val.encoding) {
355 case IntegerEncoding::Qus32:
356 DecodeQus32Block(bits, result, val.num_bits);
357 nValsDecoded += 3;
358 break;
359
360 case IntegerEncoding::Trit:
361 DecodeTritBlock(bits, result, val.num_bits);
362 nValsDecoded += 5;
363 break;
381 364
382 for (uint32_t i = 0; i < 3; i++) { 365 case IntegerEncoding::JustBits:
383 IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue); 366 val.bit_value = bits.ReadBits(val.num_bits);
384 val.m_BitValue = m[i];
385 val.m_QuintValue = q[i];
386 result.push_back(val); 367 result.push_back(val);
368 nValsDecoded++;
369 break;
387 } 370 }
388 } 371 }
389}; 372}
390 373
391namespace ASTCC { 374namespace ASTCC {
392 375
393struct TexelWeightParams { 376struct TexelWeightParams {
394 uint32_t m_Width = 0; 377 u32 m_Width = 0;
395 uint32_t m_Height = 0; 378 u32 m_Height = 0;
396 bool m_bDualPlane = false; 379 bool m_bDualPlane = false;
397 uint32_t m_MaxWeight = 0; 380 u32 m_MaxWeight = 0;
398 bool m_bError = false; 381 bool m_bError = false;
399 bool m_bVoidExtentLDR = false; 382 bool m_bVoidExtentLDR = false;
400 bool m_bVoidExtentHDR = false; 383 bool m_bVoidExtentHDR = false;
401 384
402 uint32_t GetPackedBitSize() const { 385 u32 GetPackedBitSize() const {
403 // How many indices do we have? 386 // How many indices do we have?
404 uint32_t nIdxs = m_Height * m_Width; 387 u32 nIdxs = m_Height * m_Width;
405 if (m_bDualPlane) { 388 if (m_bDualPlane) {
406 nIdxs *= 2; 389 nIdxs *= 2;
407 } 390 }
408 391
409 return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs); 392 return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs);
410 } 393 }
411 394
412 uint32_t GetNumWeightValues() const { 395 u32 GetNumWeightValues() const {
413 uint32_t ret = m_Width * m_Height; 396 u32 ret = m_Width * m_Height;
414 if (m_bDualPlane) { 397 if (m_bDualPlane) {
415 ret *= 2; 398 ret *= 2;
416 } 399 }
@@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
422 TexelWeightParams params; 405 TexelWeightParams params;
423 406
424 // Read the entire block mode all at once 407 // Read the entire block mode all at once
425 uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); 408 u16 modeBits = static_cast<u16>(strm.ReadBits<11>());
426 409
427 // Does this match the void extent block mode? 410 // Does this match the void extent block mode?
428 if ((modeBits & 0x01FF) == 0x1FC) { 411 if ((modeBits & 0x01FF) == 0x1FC) {
@@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
457 // of the block mode. Layout is determined by a number 440 // of the block mode. Layout is determined by a number
458 // between 0 and 9 corresponding to table C.2.8 of the 441 // between 0 and 9 corresponding to table C.2.8 of the
459 // ASTC spec. 442 // ASTC spec.
460 uint32_t layout = 0; 443 u32 layout = 0;
461 444
462 if ((modeBits & 0x1) || (modeBits & 0x2)) { 445 if ((modeBits & 0x1) || (modeBits & 0x2)) {
463 // layout is in [0-4] 446 // layout is in [0-4]
@@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
509 assert(layout < 10); 492 assert(layout < 10);
510 493
511 // Determine R 494 // Determine R
512 uint32_t R = !!(modeBits & 0x10); 495 u32 R = !!(modeBits & 0x10);
513 if (layout < 5) { 496 if (layout < 5) {
514 R |= (modeBits & 0x3) << 1; 497 R |= (modeBits & 0x3) << 1;
515 } else { 498 } else {
@@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
520 // Determine width & height 503 // Determine width & height
521 switch (layout) { 504 switch (layout) {
522 case 0: { 505 case 0: {
523 uint32_t A = (modeBits >> 5) & 0x3; 506 u32 A = (modeBits >> 5) & 0x3;
524 uint32_t B = (modeBits >> 7) & 0x3; 507 u32 B = (modeBits >> 7) & 0x3;
525 params.m_Width = B + 4; 508 params.m_Width = B + 4;
526 params.m_Height = A + 2; 509 params.m_Height = A + 2;
527 break; 510 break;
528 } 511 }
529 512
530 case 1: { 513 case 1: {
531 uint32_t A = (modeBits >> 5) & 0x3; 514 u32 A = (modeBits >> 5) & 0x3;
532 uint32_t B = (modeBits >> 7) & 0x3; 515 u32 B = (modeBits >> 7) & 0x3;
533 params.m_Width = B + 8; 516 params.m_Width = B + 8;
534 params.m_Height = A + 2; 517 params.m_Height = A + 2;
535 break; 518 break;
536 } 519 }
537 520
538 case 2: { 521 case 2: {
539 uint32_t A = (modeBits >> 5) & 0x3; 522 u32 A = (modeBits >> 5) & 0x3;
540 uint32_t B = (modeBits >> 7) & 0x3; 523 u32 B = (modeBits >> 7) & 0x3;
541 params.m_Width = A + 2; 524 params.m_Width = A + 2;
542 params.m_Height = B + 8; 525 params.m_Height = B + 8;
543 break; 526 break;
544 } 527 }
545 528
546 case 3: { 529 case 3: {
547 uint32_t A = (modeBits >> 5) & 0x3; 530 u32 A = (modeBits >> 5) & 0x3;
548 uint32_t B = (modeBits >> 7) & 0x1; 531 u32 B = (modeBits >> 7) & 0x1;
549 params.m_Width = A + 2; 532 params.m_Width = A + 2;
550 params.m_Height = B + 6; 533 params.m_Height = B + 6;
551 break; 534 break;
552 } 535 }
553 536
554 case 4: { 537 case 4: {
555 uint32_t A = (modeBits >> 5) & 0x3; 538 u32 A = (modeBits >> 5) & 0x3;
556 uint32_t B = (modeBits >> 7) & 0x1; 539 u32 B = (modeBits >> 7) & 0x1;
557 params.m_Width = B + 2; 540 params.m_Width = B + 2;
558 params.m_Height = A + 2; 541 params.m_Height = A + 2;
559 break; 542 break;
560 } 543 }
561 544
562 case 5: { 545 case 5: {
563 uint32_t A = (modeBits >> 5) & 0x3; 546 u32 A = (modeBits >> 5) & 0x3;
564 params.m_Width = 12; 547 params.m_Width = 12;
565 params.m_Height = A + 2; 548 params.m_Height = A + 2;
566 break; 549 break;
567 } 550 }
568 551
569 case 6: { 552 case 6: {
570 uint32_t A = (modeBits >> 5) & 0x3; 553 u32 A = (modeBits >> 5) & 0x3;
571 params.m_Width = A + 2; 554 params.m_Width = A + 2;
572 params.m_Height = 12; 555 params.m_Height = 12;
573 break; 556 break;
@@ -586,15 +569,15 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
586 } 569 }
587 570
588 case 9: { 571 case 9: {
589 uint32_t A = (modeBits >> 5) & 0x3; 572 u32 A = (modeBits >> 5) & 0x3;
590 uint32_t B = (modeBits >> 9) & 0x3; 573 u32 B = (modeBits >> 9) & 0x3;
591 params.m_Width = A + 6; 574 params.m_Width = A + 6;
592 params.m_Height = B + 6; 575 params.m_Height = B + 6;
593 break; 576 break;
594 } 577 }
595 578
596 default: 579 default:
597 assert(!"Don't know this layout..."); 580 assert(false && "Don't know this layout...");
598 params.m_bError = true; 581 params.m_bError = true;
599 break; 582 break;
600 } 583 }
@@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
605 bool H = (layout != 9) && (modeBits & 0x200); 588 bool H = (layout != 9) && (modeBits & 0x200);
606 589
607 if (H) { 590 if (H) {
608 const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31}; 591 const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31};
609 params.m_MaxWeight = maxWeights[R - 2]; 592 params.m_MaxWeight = maxWeights[R - 2];
610 } else { 593 } else {
611 const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7}; 594 const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7};
612 params.m_MaxWeight = maxWeights[R - 2]; 595 params.m_MaxWeight = maxWeights[R - 2];
613 } 596 }
614 597
@@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
617 return params; 600 return params;
618} 601}
619 602
620static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 603static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,
621 uint32_t blockHeight) { 604 u32 blockHeight) {
622 // Don't actually care about the void extent, just read the bits... 605 // Don't actually care about the void extent, just read the bits...
623 for (int i = 0; i < 4; ++i) { 606 for (s32 i = 0; i < 4; ++i) {
624 strm.ReadBits(13); 607 strm.ReadBits<13>();
625 } 608 }
626 609
627 // Decode the RGBA components and renormalize them to the range [0, 255] 610 // Decode the RGBA components and renormalize them to the range [0, 255]
628 uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); 611 u16 r = static_cast<u16>(strm.ReadBits<16>());
629 uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); 612 u16 g = static_cast<u16>(strm.ReadBits<16>());
630 uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); 613 u16 b = static_cast<u16>(strm.ReadBits<16>());
631 uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); 614 u16 a = static_cast<u16>(strm.ReadBits<16>());
632 615
633 uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | 616 u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
634 (static_cast<uint32_t>(a) & 0xFF00) << 16; 617 (static_cast<u32>(a) & 0xFF00) << 16;
635 618
636 for (uint32_t j = 0; j < blockHeight; j++) { 619 for (u32 j = 0; j < blockHeight; j++) {
637 for (uint32_t i = 0; i < blockWidth; i++) { 620 for (u32 i = 0; i < blockWidth; i++) {
638 outBuf[j * blockWidth + i] = rgba; 621 outBuf[j * blockWidth + i] = rgba;
639 } 622 }
640 } 623 }
641} 624}
642 625
643static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) { 626static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
644 for (uint32_t j = 0; j < blockHeight; j++) { 627 for (u32 j = 0; j < blockHeight; j++) {
645 for (uint32_t i = 0; i < blockWidth; i++) { 628 for (u32 i = 0; i < blockWidth; i++) {
646 outBuf[j * blockWidth + i] = 0xFFFF00FF; 629 outBuf[j * blockWidth + i] = 0xFFFF00FF;
647 } 630 }
648 } 631 }
@@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh
651// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] 634// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
652// is the same as [(numBits - 1):0] and repeats all the way down. 635// is the same as [(numBits - 1):0] and repeats all the way down.
653template <typename IntType> 636template <typename IntType>
654static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { 637static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
655 if (numBits == 0) 638 if (numBits == 0)
656 return 0; 639 return 0;
657 if (toBit == 0) 640 if (toBit == 0)
658 return 0; 641 return 0;
659 IntType v = val & static_cast<IntType>((1 << numBits) - 1); 642 IntType v = val & static_cast<IntType>((1 << numBits) - 1);
660 IntType res = v; 643 IntType res = v;
661 uint32_t reslen = numBits; 644 u32 reslen = numBits;
662 while (reslen < toBit) { 645 while (reslen < toBit) {
663 uint32_t comp = 0; 646 u32 comp = 0;
664 if (numBits > toBit - reslen) { 647 if (numBits > toBit - reslen) {
665 uint32_t newshift = toBit - reslen; 648 u32 newshift = toBit - reslen;
666 comp = numBits - newshift; 649 comp = numBits - newshift;
667 numBits = newshift; 650 numBits = newshift;
668 } 651 }
@@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
675 658
676class Pixel { 659class Pixel {
677protected: 660protected:
678 using ChannelType = int16_t; 661 using ChannelType = s16;
679 uint8_t m_BitDepth[4] = {8, 8, 8, 8}; 662 u8 m_BitDepth[4] = {8, 8, 8, 8};
680 int16_t color[4] = {}; 663 s16 color[4] = {};
681 664
682public: 665public:
683 Pixel() = default; 666 Pixel() = default;
684 Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) 667 Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8)
685 : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, 668 : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)},
686 color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), 669 color{static_cast<ChannelType>(a), static_cast<ChannelType>(r),
687 static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} 670 static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {}
688 671
@@ -691,22 +674,22 @@ public:
691 // significant bits when going from larger to smaller bit depth 674 // significant bits when going from larger to smaller bit depth
692 // or by repeating the most significant bits when going from 675 // or by repeating the most significant bits when going from
693 // smaller to larger bit depths. 676 // smaller to larger bit depths.
694 void ChangeBitDepth(const uint8_t (&depth)[4]) { 677 void ChangeBitDepth(const u8 (&depth)[4]) {
695 for (uint32_t i = 0; i < 4; i++) { 678 for (u32 i = 0; i < 4; i++) {
696 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); 679 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
697 m_BitDepth[i] = depth[i]; 680 m_BitDepth[i] = depth[i];
698 } 681 }
699 } 682 }
700 683
701 template <typename IntType> 684 template <typename IntType>
702 static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) { 685 static float ConvertChannelToFloat(IntType channel, u8 bitDepth) {
703 float denominator = static_cast<float>((1 << bitDepth) - 1); 686 float denominator = static_cast<float>((1 << bitDepth) - 1);
704 return static_cast<float>(channel) / denominator; 687 return static_cast<float>(channel) / denominator;
705 } 688 }
706 689
707 // Changes the bit depth of a single component. See the comment 690 // Changes the bit depth of a single component. See the comment
708 // above for how we do this. 691 // above for how we do this.
709 static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) { 692 static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) {
710 assert(newDepth <= 8); 693 assert(newDepth <= 8);
711 assert(oldDepth <= 8); 694 assert(oldDepth <= 8);
712 695
@@ -722,16 +705,15 @@ public:
722 if (newDepth == 0) { 705 if (newDepth == 0) {
723 return 0xFF; 706 return 0xFF;
724 } else { 707 } else {
725 uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); 708 u8 bitsWasted = static_cast<u8>(oldDepth - newDepth);
726 uint16_t v = static_cast<uint16_t>(val); 709 u16 v = static_cast<u16>(val);
727 v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); 710 v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
728 v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), 711 v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1));
729 static_cast<uint16_t>((1 << newDepth) - 1)); 712 return static_cast<u8>(v);
730 return static_cast<uint8_t>(v);
731 } 713 }
732 } 714 }
733 715
734 assert(!"We shouldn't get here."); 716 assert(false && "We shouldn't get here.");
735 return 0; 717 return 0;
736 } 718 }
737 719
@@ -759,15 +741,15 @@ public:
759 ChannelType& B() { 741 ChannelType& B() {
760 return color[3]; 742 return color[3];
761 } 743 }
762 const ChannelType& Component(uint32_t idx) const { 744 const ChannelType& Component(u32 idx) const {
763 return color[idx]; 745 return color[idx];
764 } 746 }
765 ChannelType& Component(uint32_t idx) { 747 ChannelType& Component(u32 idx) {
766 return color[idx]; 748 return color[idx];
767 } 749 }
768 750
769 void GetBitDepth(uint8_t (&outDepth)[4]) const { 751 void GetBitDepth(u8 (&outDepth)[4]) const {
770 for (int i = 0; i < 4; i++) { 752 for (s32 i = 0; i < 4; i++) {
771 outDepth[i] = m_BitDepth[i]; 753 outDepth[i] = m_BitDepth[i];
772 } 754 }
773 } 755 }
@@ -776,12 +758,12 @@ public:
776 // and then pack each channel into an R8G8B8A8 32-bit integer. We assume 758 // and then pack each channel into an R8G8B8A8 32-bit integer. We assume
777 // that the architecture is little-endian, so the alpha channel will end 759 // that the architecture is little-endian, so the alpha channel will end
778 // up in the most-significant byte. 760 // up in the most-significant byte.
779 uint32_t Pack() const { 761 u32 Pack() const {
780 Pixel eightBit(*this); 762 Pixel eightBit(*this);
781 const uint8_t eightBitDepth[4] = {8, 8, 8, 8}; 763 const u8 eightBitDepth[4] = {8, 8, 8, 8};
782 eightBit.ChangeBitDepth(eightBitDepth); 764 eightBit.ChangeBitDepth(eightBitDepth);
783 765
784 uint32_t r = 0; 766 u32 r = 0;
785 r |= eightBit.A(); 767 r |= eightBit.A();
786 r <<= 8; 768 r <<= 8;
787 r |= eightBit.B(); 769 r |= eightBit.B();
@@ -794,7 +776,7 @@ public:
794 776
795 // Clamps the pixel to the range [0,255] 777 // Clamps the pixel to the range [0,255]
796 void ClampByte() { 778 void ClampByte() {
797 for (uint32_t i = 0; i < 4; i++) { 779 for (u32 i = 0; i < 4; i++) {
798 color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); 780 color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]);
799 } 781 }
800 } 782 }
@@ -804,24 +786,24 @@ public:
804 } 786 }
805}; 787};
806 788
807static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes, 789static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions,
808 const uint32_t nPartitions, const uint32_t nBitsForColorData) { 790 const u32 nBitsForColorData) {
809 // First figure out how many color values we have 791 // First figure out how many color values we have
810 uint32_t nValues = 0; 792 u32 nValues = 0;
811 for (uint32_t i = 0; i < nPartitions; i++) { 793 for (u32 i = 0; i < nPartitions; i++) {
812 nValues += ((modes[i] >> 2) + 1) << 1; 794 nValues += ((modes[i] >> 2) + 1) << 1;
813 } 795 }
814 796
815 // Then based on the number of values and the remaining number of bits, 797 // Then based on the number of values and the remaining number of bits,
816 // figure out the max value for each of them... 798 // figure out the max value for each of them...
817 uint32_t range = 256; 799 u32 range = 256;
818 while (--range > 0) { 800 while (--range > 0) {
819 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range); 801 IntegerEncodedValue val = EncodingsValues[range];
820 uint32_t bitLength = val.GetBitLength(nValues); 802 u32 bitLength = val.GetBitLength(nValues);
821 if (bitLength <= nBitsForColorData) { 803 if (bitLength <= nBitsForColorData) {
822 // Find the smallest possible range that matches the given encoding 804 // Find the smallest possible range that matches the given encoding
823 while (--range > 0) { 805 while (--range > 0) {
824 IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range); 806 IntegerEncodedValue newval = EncodingsValues[range];
825 if (!newval.MatchesEncoding(val)) { 807 if (!newval.MatchesEncoding(val)) {
826 break; 808 break;
827 } 809 }
@@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
835 817
836 // We now have enough to decode our integer sequence. 818 // We now have enough to decode our integer sequence.
837 std::vector<IntegerEncodedValue> decodedColorValues; 819 std::vector<IntegerEncodedValue> decodedColorValues;
820 decodedColorValues.reserve(32);
821
838 InputBitStream colorStream(data); 822 InputBitStream colorStream(data);
839 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 823 DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
840 824
841 // Once we have the decoded values, we need to dequantize them to the 0-255 range 825 // Once we have the decoded values, we need to dequantize them to the 0-255 range
842 // This procedure is outlined in ASTC spec C.2.13 826 // This procedure is outlined in ASTC spec C.2.13
843 uint32_t outIdx = 0; 827 u32 outIdx = 0;
844 for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { 828 for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) {
845 // Have we already decoded all that we need? 829 // Have we already decoded all that we need?
846 if (outIdx >= nValues) { 830 if (outIdx >= nValues) {
@@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
848 } 832 }
849 833
850 const IntegerEncodedValue& val = *itr; 834 const IntegerEncodedValue& val = *itr;
851 uint32_t bitlen = val.BaseBitLength(); 835 u32 bitlen = val.num_bits;
852 uint32_t bitval = val.GetBitValue(); 836 u32 bitval = val.bit_value;
853 837
854 assert(bitlen >= 1); 838 assert(bitlen >= 1);
855 839
856 uint32_t A = 0, B = 0, C = 0, D = 0; 840 u32 A = 0, B = 0, C = 0, D = 0;
857 // A is just the lsb replicated 9 times. 841 // A is just the lsb replicated 9 times.
858 A = Replicate(bitval & 1, 1, 9); 842 A = Replicate(bitval & 1, 1, 9);
859 843
860 switch (val.GetEncoding()) { 844 switch (val.encoding) {
861 // Replicate bits 845 // Replicate bits
862 case eIntegerEncoding_JustBits: 846 case IntegerEncoding::JustBits:
863 out[outIdx++] = Replicate(bitval, bitlen, 8); 847 out[outIdx++] = Replicate(bitval, bitlen, 8);
864 break; 848 break;
865 849
866 // Use algorithm in C.2.13 850 // Use algorithm in C.2.13
867 case eIntegerEncoding_Trit: { 851 case IntegerEncoding::Trit: {
868 852
869 D = val.GetTritValue(); 853 D = val.trit_value;
870 854
871 switch (bitlen) { 855 switch (bitlen) {
872 case 1: { 856 case 1: {
@@ -876,48 +860,48 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
876 case 2: { 860 case 2: {
877 C = 93; 861 C = 93;
878 // B = b000b0bb0 862 // B = b000b0bb0
879 uint32_t b = (bitval >> 1) & 1; 863 u32 b = (bitval >> 1) & 1;
880 B = (b << 8) | (b << 4) | (b << 2) | (b << 1); 864 B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
881 } break; 865 } break;
882 866
883 case 3: { 867 case 3: {
884 C = 44; 868 C = 44;
885 // B = cb000cbcb 869 // B = cb000cbcb
886 uint32_t cb = (bitval >> 1) & 3; 870 u32 cb = (bitval >> 1) & 3;
887 B = (cb << 7) | (cb << 2) | cb; 871 B = (cb << 7) | (cb << 2) | cb;
888 } break; 872 } break;
889 873
890 case 4: { 874 case 4: {
891 C = 22; 875 C = 22;
892 // B = dcb000dcb 876 // B = dcb000dcb
893 uint32_t dcb = (bitval >> 1) & 7; 877 u32 dcb = (bitval >> 1) & 7;
894 B = (dcb << 6) | dcb; 878 B = (dcb << 6) | dcb;
895 } break; 879 } break;
896 880
897 case 5: { 881 case 5: {
898 C = 11; 882 C = 11;
899 // B = edcb000ed 883 // B = edcb000ed
900 uint32_t edcb = (bitval >> 1) & 0xF; 884 u32 edcb = (bitval >> 1) & 0xF;
901 B = (edcb << 5) | (edcb >> 2); 885 B = (edcb << 5) | (edcb >> 2);
902 } break; 886 } break;
903 887
904 case 6: { 888 case 6: {
905 C = 5; 889 C = 5;
906 // B = fedcb000f 890 // B = fedcb000f
907 uint32_t fedcb = (bitval >> 1) & 0x1F; 891 u32 fedcb = (bitval >> 1) & 0x1F;
908 B = (fedcb << 4) | (fedcb >> 4); 892 B = (fedcb << 4) | (fedcb >> 4);
909 } break; 893 } break;
910 894
911 default: 895 default:
912 assert(!"Unsupported trit encoding for color values!"); 896 assert(false && "Unsupported trit encoding for color values!");
913 break; 897 break;
914 } // switch(bitlen) 898 } // switch(bitlen)
915 } // case eIntegerEncoding_Trit 899 } // case IntegerEncoding::Trit
916 break; 900 break;
917 901
918 case eIntegerEncoding_Quint: { 902 case IntegerEncoding::Qus32: {
919 903
920 D = val.GetQuintValue(); 904 D = val.qus32_value;
921 905
922 switch (bitlen) { 906 switch (bitlen) {
923 case 1: { 907 case 1: {
@@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
927 case 2: { 911 case 2: {
928 C = 54; 912 C = 54;
929 // B = b0000bb00 913 // B = b0000bb00
930 uint32_t b = (bitval >> 1) & 1; 914 u32 b = (bitval >> 1) & 1;
931 B = (b << 8) | (b << 3) | (b << 2); 915 B = (b << 8) | (b << 3) | (b << 2);
932 } break; 916 } break;
933 917
934 case 3: { 918 case 3: {
935 C = 26; 919 C = 26;
936 // B = cb0000cbc 920 // B = cb0000cbc
937 uint32_t cb = (bitval >> 1) & 3; 921 u32 cb = (bitval >> 1) & 3;
938 B = (cb << 7) | (cb << 1) | (cb >> 1); 922 B = (cb << 7) | (cb << 1) | (cb >> 1);
939 } break; 923 } break;
940 924
941 case 4: { 925 case 4: {
942 C = 13; 926 C = 13;
943 // B = dcb0000dc 927 // B = dcb0000dc
944 uint32_t dcb = (bitval >> 1) & 7; 928 u32 dcb = (bitval >> 1) & 7;
945 B = (dcb << 6) | (dcb >> 1); 929 B = (dcb << 6) | (dcb >> 1);
946 } break; 930 } break;
947 931
948 case 5: { 932 case 5: {
949 C = 6; 933 C = 6;
950 // B = edcb0000e 934 // B = edcb0000e
951 uint32_t edcb = (bitval >> 1) & 0xF; 935 u32 edcb = (bitval >> 1) & 0xF;
952 B = (edcb << 5) | (edcb >> 3); 936 B = (edcb << 5) | (edcb >> 3);
953 } break; 937 } break;
954 938
955 default: 939 default:
956 assert(!"Unsupported quint encoding for color values!"); 940 assert(false && "Unsupported quint encoding for color values!");
957 break; 941 break;
958 } // switch(bitlen) 942 } // switch(bitlen)
959 } // case eIntegerEncoding_Quint 943 } // case IntegerEncoding::Qus32
960 break; 944 break;
961 } // switch(val.GetEncoding()) 945 } // switch(val.encoding)
962 946
963 if (val.GetEncoding() != eIntegerEncoding_JustBits) { 947 if (val.encoding != IntegerEncoding::JustBits) {
964 uint32_t T = D * C + B; 948 u32 T = D * C + B;
965 T ^= A; 949 T ^= A;
966 T = (A & 0x80) | (T >> 2); 950 T = (A & 0x80) | (T >> 2);
967 out[outIdx++] = T; 951 out[outIdx++] = T;
@@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
969 } 953 }
970 954
971 // Make sure that each of our values is in the proper range... 955 // Make sure that each of our values is in the proper range...
972 for (uint32_t i = 0; i < nValues; i++) { 956 for (u32 i = 0; i < nValues; i++) {
973 assert(out[i] <= 255); 957 assert(out[i] <= 255);
974 } 958 }
975} 959}
976 960
977static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { 961static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
978 uint32_t bitval = val.GetBitValue(); 962 u32 bitval = val.bit_value;
979 uint32_t bitlen = val.BaseBitLength(); 963 u32 bitlen = val.num_bits;
980 964
981 uint32_t A = Replicate(bitval & 1, 1, 7); 965 u32 A = Replicate(bitval & 1, 1, 7);
982 uint32_t B = 0, C = 0, D = 0; 966 u32 B = 0, C = 0, D = 0;
983 967
984 uint32_t result = 0; 968 u32 result = 0;
985 switch (val.GetEncoding()) { 969 switch (val.encoding) {
986 case eIntegerEncoding_JustBits: 970 case IntegerEncoding::JustBits:
987 result = Replicate(bitval, bitlen, 6); 971 result = Replicate(bitval, bitlen, 6);
988 break; 972 break;
989 973
990 case eIntegerEncoding_Trit: { 974 case IntegerEncoding::Trit: {
991 D = val.GetTritValue(); 975 D = val.trit_value;
992 assert(D < 3); 976 assert(D < 3);
993 977
994 switch (bitlen) { 978 switch (bitlen) {
995 case 0: { 979 case 0: {
996 uint32_t results[3] = {0, 32, 63}; 980 u32 results[3] = {0, 32, 63};
997 result = results[D]; 981 result = results[D];
998 } break; 982 } break;
999 983
@@ -1003,29 +987,29 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1003 987
1004 case 2: { 988 case 2: {
1005 C = 23; 989 C = 23;
1006 uint32_t b = (bitval >> 1) & 1; 990 u32 b = (bitval >> 1) & 1;
1007 B = (b << 6) | (b << 2) | b; 991 B = (b << 6) | (b << 2) | b;
1008 } break; 992 } break;
1009 993
1010 case 3: { 994 case 3: {
1011 C = 11; 995 C = 11;
1012 uint32_t cb = (bitval >> 1) & 3; 996 u32 cb = (bitval >> 1) & 3;
1013 B = (cb << 5) | cb; 997 B = (cb << 5) | cb;
1014 } break; 998 } break;
1015 999
1016 default: 1000 default:
1017 assert(!"Invalid trit encoding for texel weight"); 1001 assert(false && "Invalid trit encoding for texel weight");
1018 break; 1002 break;
1019 } 1003 }
1020 } break; 1004 } break;
1021 1005
1022 case eIntegerEncoding_Quint: { 1006 case IntegerEncoding::Qus32: {
1023 D = val.GetQuintValue(); 1007 D = val.qus32_value;
1024 assert(D < 5); 1008 assert(D < 5);
1025 1009
1026 switch (bitlen) { 1010 switch (bitlen) {
1027 case 0: { 1011 case 0: {
1028 uint32_t results[5] = {0, 16, 32, 47, 63}; 1012 u32 results[5] = {0, 16, 32, 47, 63};
1029 result = results[D]; 1013 result = results[D];
1030 } break; 1014 } break;
1031 1015
@@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1035 1019
1036 case 2: { 1020 case 2: {
1037 C = 13; 1021 C = 13;
1038 uint32_t b = (bitval >> 1) & 1; 1022 u32 b = (bitval >> 1) & 1;
1039 B = (b << 6) | (b << 1); 1023 B = (b << 6) | (b << 1);
1040 } break; 1024 } break;
1041 1025
1042 default: 1026 default:
1043 assert(!"Invalid quint encoding for texel weight"); 1027 assert(false && "Invalid quint encoding for texel weight");
1044 break; 1028 break;
1045 } 1029 }
1046 } break; 1030 } break;
1047 } 1031 }
1048 1032
1049 if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) { 1033 if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) {
1050 // Decode the value... 1034 // Decode the value...
1051 result = D * C + B; 1035 result = D * C + B;
1052 result ^= A; 1036 result ^= A;
@@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1063 return result; 1047 return result;
1064} 1048}
1065 1049
1066static void UnquantizeTexelWeights(uint32_t out[2][144], 1050static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights,
1067 const std::vector<IntegerEncodedValue>& weights, 1051 const TexelWeightParams& params, const u32 blockWidth,
1068 const TexelWeightParams& params, const uint32_t blockWidth, 1052 const u32 blockHeight) {
1069 const uint32_t blockHeight) { 1053 u32 weightIdx = 0;
1070 uint32_t weightIdx = 0; 1054 u32 unquantized[2][144];
1071 uint32_t unquantized[2][144];
1072 1055
1073 for (auto itr = weights.begin(); itr != weights.end(); ++itr) { 1056 for (auto itr = weights.begin(); itr != weights.end(); ++itr) {
1074 unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); 1057 unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr);
@@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144],
1086 } 1069 }
1087 1070
1088 // Do infill if necessary (Section C.2.18) ... 1071 // Do infill if necessary (Section C.2.18) ...
1089 uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); 1072 u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
1090 uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); 1073 u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
1091 1074
1092 const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U; 1075 const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U;
1093 for (uint32_t plane = 0; plane < kPlaneScale; plane++) 1076 for (u32 plane = 0; plane < kPlaneScale; plane++)
1094 for (uint32_t t = 0; t < blockHeight; t++) 1077 for (u32 t = 0; t < blockHeight; t++)
1095 for (uint32_t s = 0; s < blockWidth; s++) { 1078 for (u32 s = 0; s < blockWidth; s++) {
1096 uint32_t cs = Ds * s; 1079 u32 cs = Ds * s;
1097 uint32_t ct = Dt * t; 1080 u32 ct = Dt * t;
1098 1081
1099 uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6; 1082 u32 gs = (cs * (params.m_Width - 1) + 32) >> 6;
1100 uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6; 1083 u32 gt = (ct * (params.m_Height - 1) + 32) >> 6;
1101 1084
1102 uint32_t js = gs >> 4; 1085 u32 js = gs >> 4;
1103 uint32_t fs = gs & 0xF; 1086 u32 fs = gs & 0xF;
1104 1087
1105 uint32_t jt = gt >> 4; 1088 u32 jt = gt >> 4;
1106 uint32_t ft = gt & 0x0F; 1089 u32 ft = gt & 0x0F;
1107 1090
1108 uint32_t w11 = (fs * ft + 8) >> 4; 1091 u32 w11 = (fs * ft + 8) >> 4;
1109 uint32_t w10 = ft - w11; 1092 u32 w10 = ft - w11;
1110 uint32_t w01 = fs - w11; 1093 u32 w01 = fs - w11;
1111 uint32_t w00 = 16 - fs - ft + w11; 1094 u32 w00 = 16 - fs - ft + w11;
1112 1095
1113 uint32_t v0 = js + jt * params.m_Width; 1096 u32 v0 = js + jt * params.m_Width;
1114 1097
1115#define FIND_TEXEL(tidx, bidx) \ 1098#define FIND_TEXEL(tidx, bidx) \
1116 uint32_t p##bidx = 0; \ 1099 u32 p##bidx = 0; \
1117 do { \ 1100 do { \
1118 if ((tidx) < (params.m_Width * params.m_Height)) { \ 1101 if ((tidx) < (params.m_Width * params.m_Height)) { \
1119 p##bidx = unquantized[plane][(tidx)]; \ 1102 p##bidx = unquantized[plane][(tidx)]; \
@@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144],
1133} 1116}
1134 1117
1135// Transfers a bit as described in C.2.14 1118// Transfers a bit as described in C.2.14
1136static inline void BitTransferSigned(int32_t& a, int32_t& b) { 1119static inline void BitTransferSigned(s32& a, s32& b) {
1137 b >>= 1; 1120 b >>= 1;
1138 b |= a & 0x80; 1121 b |= a & 0x80;
1139 a >>= 1; 1122 a >>= 1;
@@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) {
1144 1127
1145// Adds more precision to the blue channel as described 1128// Adds more precision to the blue channel as described
1146// in C.2.14 1129// in C.2.14
1147static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) { 1130static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) {
1148 return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1), 1131 return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1),
1149 static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b)); 1132 static_cast<s16>((g + b) >> 1), static_cast<s16>(b));
1150} 1133}
1151 1134
1152// Partition selection functions as specified in 1135// Partition selection functions as specified in
1153// C.2.21 1136// C.2.21
1154static inline uint32_t hash52(uint32_t p) { 1137static inline u32 hash52(u32 p) {
1155 p ^= p >> 15; 1138 p ^= p >> 15;
1156 p -= p << 17; 1139 p -= p << 17;
1157 p += p << 7; 1140 p += p << 7;
@@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) {
1165 return p; 1148 return p;
1166} 1149}
1167 1150
1168static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, 1151static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) {
1169 int32_t partitionCount, int32_t smallBlock) {
1170 if (1 == partitionCount) 1152 if (1 == partitionCount)
1171 return 0; 1153 return 0;
1172 1154
@@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1178 1160
1179 seed += (partitionCount - 1) * 1024; 1161 seed += (partitionCount - 1) * 1024;
1180 1162
1181 uint32_t rnum = hash52(static_cast<uint32_t>(seed)); 1163 u32 rnum = hash52(static_cast<u32>(seed));
1182 uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF); 1164 u8 seed1 = static_cast<u8>(rnum & 0xF);
1183 uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF); 1165 u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF);
1184 uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF); 1166 u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF);
1185 uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF); 1167 u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF);
1186 uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF); 1168 u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF);
1187 uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF); 1169 u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF);
1188 uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF); 1170 u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF);
1189 uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF); 1171 u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF);
1190 uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF); 1172 u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF);
1191 uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF); 1173 u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF);
1192 uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); 1174 u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF);
1193 uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); 1175 u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF);
1194 1176
1195 seed1 = static_cast<uint8_t>(seed1 * seed1); 1177 seed1 = static_cast<u8>(seed1 * seed1);
1196 seed2 = static_cast<uint8_t>(seed2 * seed2); 1178 seed2 = static_cast<u8>(seed2 * seed2);
1197 seed3 = static_cast<uint8_t>(seed3 * seed3); 1179 seed3 = static_cast<u8>(seed3 * seed3);
1198 seed4 = static_cast<uint8_t>(seed4 * seed4); 1180 seed4 = static_cast<u8>(seed4 * seed4);
1199 seed5 = static_cast<uint8_t>(seed5 * seed5); 1181 seed5 = static_cast<u8>(seed5 * seed5);
1200 seed6 = static_cast<uint8_t>(seed6 * seed6); 1182 seed6 = static_cast<u8>(seed6 * seed6);
1201 seed7 = static_cast<uint8_t>(seed7 * seed7); 1183 seed7 = static_cast<u8>(seed7 * seed7);
1202 seed8 = static_cast<uint8_t>(seed8 * seed8); 1184 seed8 = static_cast<u8>(seed8 * seed8);
1203 seed9 = static_cast<uint8_t>(seed9 * seed9); 1185 seed9 = static_cast<u8>(seed9 * seed9);
1204 seed10 = static_cast<uint8_t>(seed10 * seed10); 1186 seed10 = static_cast<u8>(seed10 * seed10);
1205 seed11 = static_cast<uint8_t>(seed11 * seed11); 1187 seed11 = static_cast<u8>(seed11 * seed11);
1206 seed12 = static_cast<uint8_t>(seed12 * seed12); 1188 seed12 = static_cast<u8>(seed12 * seed12);
1207 1189
1208 int32_t sh1, sh2, sh3; 1190 s32 sh1, sh2, sh3;
1209 if (seed & 1) { 1191 if (seed & 1) {
1210 sh1 = (seed & 2) ? 4 : 5; 1192 sh1 = (seed & 2) ? 4 : 5;
1211 sh2 = (partitionCount == 3) ? 6 : 5; 1193 sh2 = (partitionCount == 3) ? 6 : 5;
@@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1215 } 1197 }
1216 sh3 = (seed & 0x10) ? sh1 : sh2; 1198 sh3 = (seed & 0x10) ? sh1 : sh2;
1217 1199
1218 seed1 = static_cast<uint8_t>(seed1 >> sh1); 1200 seed1 = static_cast<u8>(seed1 >> sh1);
1219 seed2 = static_cast<uint8_t>(seed2 >> sh2); 1201 seed2 = static_cast<u8>(seed2 >> sh2);
1220 seed3 = static_cast<uint8_t>(seed3 >> sh1); 1202 seed3 = static_cast<u8>(seed3 >> sh1);
1221 seed4 = static_cast<uint8_t>(seed4 >> sh2); 1203 seed4 = static_cast<u8>(seed4 >> sh2);
1222 seed5 = static_cast<uint8_t>(seed5 >> sh1); 1204 seed5 = static_cast<u8>(seed5 >> sh1);
1223 seed6 = static_cast<uint8_t>(seed6 >> sh2); 1205 seed6 = static_cast<u8>(seed6 >> sh2);
1224 seed7 = static_cast<uint8_t>(seed7 >> sh1); 1206 seed7 = static_cast<u8>(seed7 >> sh1);
1225 seed8 = static_cast<uint8_t>(seed8 >> sh2); 1207 seed8 = static_cast<u8>(seed8 >> sh2);
1226 seed9 = static_cast<uint8_t>(seed9 >> sh3); 1208 seed9 = static_cast<u8>(seed9 >> sh3);
1227 seed10 = static_cast<uint8_t>(seed10 >> sh3); 1209 seed10 = static_cast<u8>(seed10 >> sh3);
1228 seed11 = static_cast<uint8_t>(seed11 >> sh3); 1210 seed11 = static_cast<u8>(seed11 >> sh3);
1229 seed12 = static_cast<uint8_t>(seed12 >> sh3); 1211 seed12 = static_cast<u8>(seed12 >> sh3);
1230 1212
1231 int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 1213 s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
1232 int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 1214 s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
1233 int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 1215 s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
1234 int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 1216 s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
1235 1217
1236 a &= 0x3F; 1218 a &= 0x3F;
1237 b &= 0x3F; 1219 b &= 0x3F;
@@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1252 return 3; 1234 return 3;
1253} 1235}
1254 1236
1255static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount, 1237static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) {
1256 int32_t smallBlock) {
1257 return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); 1238 return SelectPartition(seed, x, y, 0, partitionCount, smallBlock);
1258} 1239}
1259 1240
1260// Section C.2.14 1241// Section C.2.14
1261static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues, 1242static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1262 uint32_t colorEndpointMode) { 1243 u32 colorEndpos32Mode) {
1263#define READ_UINT_VALUES(N) \ 1244#define READ_UINT_VALUES(N) \
1264 uint32_t v[N]; \ 1245 u32 v[N]; \
1265 for (uint32_t i = 0; i < N; i++) { \ 1246 for (u32 i = 0; i < N; i++) { \
1266 v[i] = *(colorValues++); \ 1247 v[i] = *(colorValues++); \
1267 } 1248 }
1268 1249
1269#define READ_INT_VALUES(N) \ 1250#define READ_INT_VALUES(N) \
1270 int32_t v[N]; \ 1251 s32 v[N]; \
1271 for (uint32_t i = 0; i < N; i++) { \ 1252 for (u32 i = 0; i < N; i++) { \
1272 v[i] = static_cast<int32_t>(*(colorValues++)); \ 1253 v[i] = static_cast<s32>(*(colorValues++)); \
1273 } 1254 }
1274 1255
1275 switch (colorEndpointMode) { 1256 switch (colorEndpos32Mode) {
1276 case 0: { 1257 case 0: {
1277 READ_UINT_VALUES(2) 1258 READ_UINT_VALUES(2)
1278 ep1 = Pixel(0xFF, v[0], v[0], v[0]); 1259 ep1 = Pixel(0xFF, v[0], v[0], v[0]);
@@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1281 1262
1282 case 1: { 1263 case 1: {
1283 READ_UINT_VALUES(2) 1264 READ_UINT_VALUES(2)
1284 uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0); 1265 u32 L0 = (v[0] >> 2) | (v[1] & 0xC0);
1285 uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); 1266 u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
1286 ep1 = Pixel(0xFF, L0, L0, L0); 1267 ep1 = Pixel(0xFF, L0, L0, L0);
1287 ep2 = Pixel(0xFF, L1, L1, L1); 1268 ep2 = Pixel(0xFF, L1, L1, L1);
1288 } break; 1269 } break;
@@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1371 } break; 1352 } break;
1372 1353
1373 default: 1354 default:
1374 assert(!"Unsupported color endpoint mode (is it HDR?)"); 1355 assert(false && "Unsupported color endpoint mode (is it HDR?)");
1375 break; 1356 break;
1376 } 1357 }
1377 1358
@@ -1379,14 +1360,14 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1379#undef READ_INT_VALUES 1360#undef READ_INT_VALUES
1380} 1361}
1381 1362
1382static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, 1363static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,
1383 const uint32_t blockHeight, uint32_t* outBuf) { 1364 u32* outBuf) {
1384 InputBitStream strm(inBuf); 1365 InputBitStream strm(inBuf);
1385 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1366 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1386 1367
1387 // Was there an error? 1368 // Was there an error?
1388 if (weightParams.m_bError) { 1369 if (weightParams.m_bError) {
1389 assert(!"Invalid block mode"); 1370 assert(false && "Invalid block mode");
1390 FillError(outBuf, blockWidth, blockHeight); 1371 FillError(outBuf, blockWidth, blockHeight);
1391 return; 1372 return;
1392 } 1373 }
@@ -1397,63 +1378,63 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1397 } 1378 }
1398 1379
1399 if (weightParams.m_bVoidExtentHDR) { 1380 if (weightParams.m_bVoidExtentHDR) {
1400 assert(!"HDR void extent blocks are unsupported!"); 1381 assert(false && "HDR void extent blocks are unsupported!");
1401 FillError(outBuf, blockWidth, blockHeight); 1382 FillError(outBuf, blockWidth, blockHeight);
1402 return; 1383 return;
1403 } 1384 }
1404 1385
1405 if (weightParams.m_Width > blockWidth) { 1386 if (weightParams.m_Width > blockWidth) {
1406 assert(!"Texel weight grid width should be smaller than block width"); 1387 assert(false && "Texel weight grid width should be smaller than block width");
1407 FillError(outBuf, blockWidth, blockHeight); 1388 FillError(outBuf, blockWidth, blockHeight);
1408 return; 1389 return;
1409 } 1390 }
1410 1391
1411 if (weightParams.m_Height > blockHeight) { 1392 if (weightParams.m_Height > blockHeight) {
1412 assert(!"Texel weight grid height should be smaller than block height"); 1393 assert(false && "Texel weight grid height should be smaller than block height");
1413 FillError(outBuf, blockWidth, blockHeight); 1394 FillError(outBuf, blockWidth, blockHeight);
1414 return; 1395 return;
1415 } 1396 }
1416 1397
1417 // Read num partitions 1398 // Read num partitions
1418 uint32_t nPartitions = strm.ReadBits(2) + 1; 1399 u32 nPartitions = strm.ReadBits<2>() + 1;
1419 assert(nPartitions <= 4); 1400 assert(nPartitions <= 4);
1420 1401
1421 if (nPartitions == 4 && weightParams.m_bDualPlane) { 1402 if (nPartitions == 4 && weightParams.m_bDualPlane) {
1422 assert(!"Dual plane mode is incompatible with four partition blocks"); 1403 assert(false && "Dual plane mode is incompatible with four partition blocks");
1423 FillError(outBuf, blockWidth, blockHeight); 1404 FillError(outBuf, blockWidth, blockHeight);
1424 return; 1405 return;
1425 } 1406 }
1426 1407
1427 // Based on the number of partitions, read the color endpoint mode for 1408 // Based on the number of partitions, read the color endpos32 mode for
1428 // each partition. 1409 // each partition.
1429 1410
1430 // Determine partitions, partition index, and color endpoint modes 1411 // Determine partitions, partition index, and color endpos32 modes
1431 int32_t planeIdx = -1; 1412 s32 planeIdx = -1;
1432 uint32_t partitionIndex; 1413 u32 partitionIndex;
1433 uint32_t colorEndpointMode[4] = {0, 0, 0, 0}; 1414 u32 colorEndpos32Mode[4] = {0, 0, 0, 0};
1434 1415
1435 // Define color data. 1416 // Define color data.
1436 uint8_t colorEndpointData[16]; 1417 u8 colorEndpos32Data[16];
1437 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1418 memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data));
1438 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1419 OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0);
1439 1420
1440 // Read extra config data... 1421 // Read extra config data...
1441 uint32_t baseCEM = 0; 1422 u32 baseCEM = 0;
1442 if (nPartitions == 1) { 1423 if (nPartitions == 1) {
1443 colorEndpointMode[0] = strm.ReadBits(4); 1424 colorEndpos32Mode[0] = strm.ReadBits<4>();
1444 partitionIndex = 0; 1425 partitionIndex = 0;
1445 } else { 1426 } else {
1446 partitionIndex = strm.ReadBits(10); 1427 partitionIndex = strm.ReadBits<10>();
1447 baseCEM = strm.ReadBits(6); 1428 baseCEM = strm.ReadBits<6>();
1448 } 1429 }
1449 uint32_t baseMode = (baseCEM & 3); 1430 u32 baseMode = (baseCEM & 3);
1450 1431
1451 // Remaining bits are color endpoint data... 1432 // Remaining bits are color endpos32 data...
1452 uint32_t nWeightBits = weightParams.GetPackedBitSize(); 1433 u32 nWeightBits = weightParams.GetPackedBitSize();
1453 int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead(); 1434 s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead());
1454 1435
1455 // Consider extra bits prior to texel data... 1436 // Consider extra bits prior to texel data...
1456 uint32_t extraCEMbits = 0; 1437 u32 extraCEMbits = 0;
1457 if (baseMode) { 1438 if (baseMode) {
1458 switch (nPartitions) { 1439 switch (nPartitions) {
1459 case 2: 1440 case 2:
@@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1473 remainingBits -= extraCEMbits; 1454 remainingBits -= extraCEMbits;
1474 1455
1475 // Do we have a dual plane situation? 1456 // Do we have a dual plane situation?
1476 uint32_t planeSelectorBits = 0; 1457 u32 planeSelectorBits = 0;
1477 if (weightParams.m_bDualPlane) { 1458 if (weightParams.m_bDualPlane) {
1478 planeSelectorBits = 2; 1459 planeSelectorBits = 2;
1479 } 1460 }
1480 remainingBits -= planeSelectorBits; 1461 remainingBits -= planeSelectorBits;
1481 1462
1482 // Read color data... 1463 // Read color data...
1483 uint32_t colorDataBits = remainingBits; 1464 u32 colorDataBits = remainingBits;
1484 while (remainingBits > 0) { 1465 while (remainingBits > 0) {
1485 uint32_t nb = std::min(remainingBits, 8); 1466 u32 nb = std::min(remainingBits, 8);
1486 uint32_t b = strm.ReadBits(nb); 1467 u32 b = strm.ReadBits(nb);
1487 colorEndpointStream.WriteBits(b, nb); 1468 colorEndpos32Stream.WriteBits(b, nb);
1488 remainingBits -= 8; 1469 remainingBits -= 8;
1489 } 1470 }
1490 1471
@@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1493 1474
1494 // Read the rest of the CEM 1475 // Read the rest of the CEM
1495 if (baseMode) { 1476 if (baseMode) {
1496 uint32_t extraCEM = strm.ReadBits(extraCEMbits); 1477 u32 extraCEM = strm.ReadBits(extraCEMbits);
1497 uint32_t CEM = (extraCEM << 6) | baseCEM; 1478 u32 CEM = (extraCEM << 6) | baseCEM;
1498 CEM >>= 2; 1479 CEM >>= 2;
1499 1480
1500 bool C[4] = {0}; 1481 bool C[4] = {0};
1501 for (uint32_t i = 0; i < nPartitions; i++) { 1482 for (u32 i = 0; i < nPartitions; i++) {
1502 C[i] = CEM & 1; 1483 C[i] = CEM & 1;
1503 CEM >>= 1; 1484 CEM >>= 1;
1504 } 1485 }
1505 1486
1506 uint8_t M[4] = {0}; 1487 u8 M[4] = {0};
1507 for (uint32_t i = 0; i < nPartitions; i++) { 1488 for (u32 i = 0; i < nPartitions; i++) {
1508 M[i] = CEM & 3; 1489 M[i] = CEM & 3;
1509 CEM >>= 2; 1490 CEM >>= 2;
1510 assert(M[i] <= 3); 1491 assert(M[i] <= 3);
1511 } 1492 }
1512 1493
1513 for (uint32_t i = 0; i < nPartitions; i++) { 1494 for (u32 i = 0; i < nPartitions; i++) {
1514 colorEndpointMode[i] = baseMode; 1495 colorEndpos32Mode[i] = baseMode;
1515 if (!(C[i])) 1496 if (!(C[i]))
1516 colorEndpointMode[i] -= 1; 1497 colorEndpos32Mode[i] -= 1;
1517 colorEndpointMode[i] <<= 2; 1498 colorEndpos32Mode[i] <<= 2;
1518 colorEndpointMode[i] |= M[i]; 1499 colorEndpos32Mode[i] |= M[i];
1519 } 1500 }
1520 } else if (nPartitions > 1) { 1501 } else if (nPartitions > 1) {
1521 uint32_t CEM = baseCEM >> 2; 1502 u32 CEM = baseCEM >> 2;
1522 for (uint32_t i = 0; i < nPartitions; i++) { 1503 for (u32 i = 0; i < nPartitions; i++) {
1523 colorEndpointMode[i] = CEM; 1504 colorEndpos32Mode[i] = CEM;
1524 } 1505 }
1525 } 1506 }
1526 1507
1527 // Make sure everything up till here is sane. 1508 // Make sure everything up till here is sane.
1528 for (uint32_t i = 0; i < nPartitions; i++) { 1509 for (u32 i = 0; i < nPartitions; i++) {
1529 assert(colorEndpointMode[i] < 16); 1510 assert(colorEndpos32Mode[i] < 16);
1530 } 1511 }
1531 assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); 1512 assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128);
1532 1513
1533 // Decode both color data and texel weight data 1514 // Decode both color data and texel weight data
1534 uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions 1515 u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions
1535 DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, 1516 DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions,
1536 colorDataBits); 1517 colorDataBits);
1537 1518
1538 Pixel endpoints[4][2]; 1519 Pixel endpos32s[4][2];
1539 const uint32_t* colorValuesPtr = colorValues; 1520 const u32* colorValuesPtr = colorValues;
1540 for (uint32_t i = 0; i < nPartitions; i++) { 1521 for (u32 i = 0; i < nPartitions; i++) {
1541 ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); 1522 ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]);
1542 } 1523 }
1543 1524
1544 // Read the texel weight data.. 1525 // Read the texel weight data..
1545 uint8_t texelWeightData[16]; 1526 u8 texelWeightData[16];
1546 memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); 1527 memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
1547 1528
1548 // Reverse everything 1529 // Reverse everything
1549 for (uint32_t i = 0; i < 8; i++) { 1530 for (u32 i = 0; i < 8; i++) {
1550// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits 1531// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
1551#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 1532#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32
1552 unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i])); 1533 u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i]));
1553 unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i])); 1534 u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i]));
1554#undef REVERSE_BYTE 1535#undef REVERSE_BYTE
1555 1536
1556 texelWeightData[i] = b; 1537 texelWeightData[i] = b;
@@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1558 } 1539 }
1559 1540
1560 // Make sure that higher non-texel bits are set to zero 1541 // Make sure that higher non-texel bits are set to zero
1561 const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1542 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1562 texelWeightData[clearByteStart - 1] = 1543 texelWeightData[clearByteStart - 1] =
1563 texelWeightData[clearByteStart - 1] & 1544 texelWeightData[clearByteStart - 1] &
1564 static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1545 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1546 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1566 1547
1567 std::vector<IntegerEncodedValue> texelWeightValues; 1548 std::vector<IntegerEncodedValue> texelWeightValues;
1549 texelWeightValues.reserve(64);
1550
1568 InputBitStream weightStream(texelWeightData); 1551 InputBitStream weightStream(texelWeightData);
1569 1552
1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1553 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1571 weightParams.m_MaxWeight, 1554 weightParams.GetNumWeightValues());
1572 weightParams.GetNumWeightValues());
1573 1555
1574 // Blocks can be at most 12x12, so we can have as many as 144 weights 1556 // Blocks can be at most 12x12, so we can have as many as 144 weights
1575 uint32_t weights[2][144]; 1557 u32 weights[2][144];
1576 UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); 1558 UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight);
1577 1559
1578 // Now that we have endpoints and weights, we can interpolate and generate 1560 // Now that we have endpos32s and weights, we can s32erpolate and generate
1579 // the proper decoding... 1561 // the proper decoding...
1580 for (uint32_t j = 0; j < blockHeight; j++) 1562 for (u32 j = 0; j < blockHeight; j++)
1581 for (uint32_t i = 0; i < blockWidth; i++) { 1563 for (u32 i = 0; i < blockWidth; i++) {
1582 uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions, 1564 u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions,
1583 (blockHeight * blockWidth) < 32); 1565 (blockHeight * blockWidth) < 32);
1584 assert(partition < nPartitions); 1566 assert(partition < nPartitions);
1585 1567
1586 Pixel p; 1568 Pixel p;
1587 for (uint32_t c = 0; c < 4; c++) { 1569 for (u32 c = 0; c < 4; c++) {
1588 uint32_t C0 = endpoints[partition][0].Component(c); 1570 u32 C0 = endpos32s[partition][0].Component(c);
1589 C0 = Replicate(C0, 8, 16); 1571 C0 = Replicate(C0, 8, 16);
1590 uint32_t C1 = endpoints[partition][1].Component(c); 1572 u32 C1 = endpos32s[partition][1].Component(c);
1591 C1 = Replicate(C1, 8, 16); 1573 C1 = Replicate(C1, 8, 16);
1592 1574
1593 uint32_t plane = 0; 1575 u32 plane = 0;
1594 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { 1576 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
1595 plane = 1; 1577 plane = 1;
1596 } 1578 }
1597 1579
1598 uint32_t weight = weights[plane][j * blockWidth + i]; 1580 u32 weight = weights[plane][j * blockWidth + i];
1599 uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64; 1581 u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
1600 if (C == 65535) { 1582 if (C == 65535) {
1601 p.Component(c) = 255; 1583 p.Component(c) = 255;
1602 } else { 1584 } else {
1603 double Cf = static_cast<double>(C); 1585 double Cf = static_cast<double>(C);
1604 p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5); 1586 p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5);
1605 } 1587 }
1606 } 1588 }
1607 1589
@@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1613 1595
1614namespace Tegra::Texture::ASTC { 1596namespace Tegra::Texture::ASTC {
1615 1597
1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 1598std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,
1617 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1599 u32 block_height) {
1618 uint32_t blockIdx = 0; 1600 u32 blockIdx = 0;
1619 std::size_t depth_offset = 0; 1601 std::size_t depth_offset = 0;
1620 std::vector<uint8_t> outData(height * width * depth * 4); 1602 std::vector<u8> outData(height * width * depth * 4);
1621 for (uint32_t k = 0; k < depth; k++) { 1603 for (u32 k = 0; k < depth; k++) {
1622 for (uint32_t j = 0; j < height; j += block_height) { 1604 for (u32 j = 0; j < height; j += block_height) {
1623 for (uint32_t i = 0; i < width; i += block_width) { 1605 for (u32 i = 0; i < width; i += block_width) {
1624 1606
1625 const uint8_t* blockPtr = data + blockIdx * 16; 1607 const u8* blockPtr = data + blockIdx * 16;
1626 1608
1627 // Blocks can be at most 12x12 1609 // Blocks can be at most 12x12
1628 uint32_t uncompData[144]; 1610 u32 uncompData[144];
1629 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); 1611 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1630 1612
1631 uint32_t decompWidth = std::min(block_width, width - i); 1613 u32 decompWidth = std::min(block_width, width - i);
1632 uint32_t decompHeight = std::min(block_height, height - j); 1614 u32 decompHeight = std::min(block_height, height - j);
1633 1615
1634 uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; 1616 u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;
1635 for (uint32_t jj = 0; jj < decompHeight; jj++) { 1617 for (u32 jj = 0; jj < decompHeight; jj++) {
1636 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1618 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
1637 } 1619 }
1638 1620
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 8e82c6748..7edc4abe1 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,6 +8,7 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/settings.h"
11 12
12namespace Tegra::Texture { 13namespace Tegra::Texture {
13 14
@@ -294,6 +295,14 @@ enum class TextureMipmapFilter : u32 {
294 Linear = 3, 295 Linear = 3,
295}; 296};
296 297
298enum class Anisotropy {
299 Default,
300 Filter2x,
301 Filter4x,
302 Filter8x,
303 Filter16x,
304};
305
297struct TSCEntry { 306struct TSCEntry {
298 union { 307 union {
299 struct { 308 struct {
@@ -328,7 +337,22 @@ struct TSCEntry {
328 }; 337 };
329 338
330 float GetMaxAnisotropy() const { 339 float GetMaxAnisotropy() const {
331 return static_cast<float>(1U << max_anisotropy); 340 const u32 min_value = [] {
341 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
342 default:
343 case Anisotropy::Default:
344 return 1U;
345 case Anisotropy::Filter2x:
346 return 2U;
347 case Anisotropy::Filter4x:
348 return 4U;
349 case Anisotropy::Filter8x:
350 return 8U;
351 case Anisotropy::Filter16x:
352 return 16U;
353 }
354 }();
355 return static_cast<float>(std::max(1U << max_anisotropy, min_value));
332 } 356 }
333 357
334 float GetMinLod() const { 358 float GetMinLod() const {
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index b841e63fa..d34b47b3f 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -42,6 +42,9 @@ add_executable(yuzu
42 configuration/configure_graphics.cpp 42 configuration/configure_graphics.cpp
43 configuration/configure_graphics.h 43 configuration/configure_graphics.h
44 configuration/configure_graphics.ui 44 configuration/configure_graphics.ui
45 configuration/configure_graphics_advanced.cpp
46 configuration/configure_graphics_advanced.h
47 configuration/configure_graphics_advanced.ui
45 configuration/configure_hotkeys.cpp 48 configuration/configure_hotkeys.cpp
46 configuration/configure_hotkeys.h 49 configuration/configure_hotkeys.h
47 configuration/configure_hotkeys.ui 50 configuration/configure_hotkeys.ui
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index d0f574147..3b9ab38dd 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -539,7 +539,7 @@ void Config::ReadDebuggingValues() {
539void Config::ReadServiceValues() { 539void Config::ReadServiceValues() {
540 qt_config->beginGroup(QStringLiteral("Services")); 540 qt_config->beginGroup(QStringLiteral("Services"));
541 Settings::values.bcat_backend = 541 Settings::values.bcat_backend =
542 ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("boxcat")) 542 ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("null"))
543 .toString() 543 .toString()
544 .toStdString(); 544 .toStdString();
545 Settings::values.bcat_boxcat_local = 545 Settings::values.bcat_boxcat_local =
@@ -631,6 +631,7 @@ void Config::ReadRendererValues() {
631 Settings::values.resolution_factor = 631 Settings::values.resolution_factor =
632 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); 632 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
633 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); 633 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
634 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
634 Settings::values.use_frame_limit = 635 Settings::values.use_frame_limit =
635 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); 636 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
636 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 637 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
@@ -681,6 +682,8 @@ void Config::ReadSystemValues() {
681 682
682 Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt(); 683 Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt();
683 684
685 Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt();
686
684 const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); 687 const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool();
685 if (rng_seed_enabled) { 688 if (rng_seed_enabled) {
686 Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); 689 Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong();
@@ -697,6 +700,8 @@ void Config::ReadSystemValues() {
697 Settings::values.custom_rtc = std::nullopt; 700 Settings::values.custom_rtc = std::nullopt;
698 } 701 }
699 702
703 Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt();
704
700 qt_config->endGroup(); 705 qt_config->endGroup();
701} 706}
702 707
@@ -1067,6 +1072,7 @@ void Config::SaveRendererValues() {
1067 WriteSetting(QStringLiteral("resolution_factor"), 1072 WriteSetting(QStringLiteral("resolution_factor"),
1068 static_cast<double>(Settings::values.resolution_factor), 1.0); 1073 static_cast<double>(Settings::values.resolution_factor), 1.0);
1069 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); 1074 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
1075 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
1070 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); 1076 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
1071 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); 1077 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
1072 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, 1078 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
@@ -1112,6 +1118,7 @@ void Config::SaveSystemValues() {
1112 WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); 1118 WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false);
1113 WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); 1119 WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0);
1114 WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); 1120 WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1);
1121 WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1);
1115 1122
1116 WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); 1123 WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false);
1117 WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); 1124 WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0);
@@ -1123,6 +1130,8 @@ void Config::SaveSystemValues() {
1123 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), 1130 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
1124 0); 1131 0);
1125 1132
1133 WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1);
1134
1126 qt_config->endGroup(); 1135 qt_config->endGroup();
1127} 1136}
1128 1137
diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui
index 67b990f1a..9aec1bd09 100644
--- a/src/yuzu/configuration/configure.ui
+++ b/src/yuzu/configuration/configure.ui
@@ -83,6 +83,11 @@
83 <string>Graphics</string> 83 <string>Graphics</string>
84 </attribute> 84 </attribute>
85 </widget> 85 </widget>
86 <widget class="ConfigureGraphicsAdvanced" name="graphicsAdvancedTab">
87 <attribute name="title">
88 <string>GraphicsAdvanced</string>
89 </attribute>
90 </widget>
86 <widget class="ConfigureAudio" name="audioTab"> 91 <widget class="ConfigureAudio" name="audioTab">
87 <attribute name="title"> 92 <attribute name="title">
88 <string>Audio</string> 93 <string>Audio</string>
@@ -160,6 +165,12 @@
160 <container>1</container> 165 <container>1</container>
161 </customwidget> 166 </customwidget>
162 <customwidget> 167 <customwidget>
168 <class>ConfigureGraphicsAdvanced</class>
169 <extends>QWidget</extends>
170 <header>configuration/configure_graphics_advanced.h</header>
171 <container>1</container>
172 </customwidget>
173 <customwidget>
163 <class>ConfigureWeb</class> 174 <class>ConfigureWeb</class>
164 <extends>QWidget</extends> 175 <extends>QWidget</extends>
165 <header>configuration/configure_web.h</header> 176 <header>configuration/configure_web.h</header>
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index db3b19352..df4473b46 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -41,6 +41,7 @@ void ConfigureDialog::ApplyConfiguration() {
41 ui->inputTab->ApplyConfiguration(); 41 ui->inputTab->ApplyConfiguration();
42 ui->hotkeysTab->ApplyConfiguration(registry); 42 ui->hotkeysTab->ApplyConfiguration(registry);
43 ui->graphicsTab->ApplyConfiguration(); 43 ui->graphicsTab->ApplyConfiguration();
44 ui->graphicsAdvancedTab->ApplyConfiguration();
44 ui->audioTab->ApplyConfiguration(); 45 ui->audioTab->ApplyConfiguration();
45 ui->debugTab->ApplyConfiguration(); 46 ui->debugTab->ApplyConfiguration();
46 ui->webTab->ApplyConfiguration(); 47 ui->webTab->ApplyConfiguration();
@@ -76,7 +77,7 @@ void ConfigureDialog::PopulateSelectionList() {
76 const std::array<std::pair<QString, QList<QWidget*>>, 5> items{ 77 const std::array<std::pair<QString, QList<QWidget*>>, 5> items{
77 {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}}, 78 {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}},
78 {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}}, 79 {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}},
79 {tr("Graphics"), {ui->graphicsTab}}, 80 {tr("Graphics"), {ui->graphicsTab, ui->graphicsAdvancedTab}},
80 {tr("Audio"), {ui->audioTab}}, 81 {tr("Audio"), {ui->audioTab}},
81 {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}}, 82 {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}},
82 }; 83 };
@@ -105,6 +106,7 @@ void ConfigureDialog::UpdateVisibleTabs() {
105 {ui->inputTab, tr("Input")}, 106 {ui->inputTab, tr("Input")},
106 {ui->hotkeysTab, tr("Hotkeys")}, 107 {ui->hotkeysTab, tr("Hotkeys")},
107 {ui->graphicsTab, tr("Graphics")}, 108 {ui->graphicsTab, tr("Graphics")},
109 {ui->graphicsAdvancedTab, tr("Advanced")},
108 {ui->audioTab, tr("Audio")}, 110 {ui->audioTab, tr("Audio")},
109 {ui->debugTab, tr("Debug")}, 111 {ui->debugTab, tr("Debug")},
110 {ui->webTab, tr("Web")}, 112 {ui->webTab, tr("Web")},
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index fe64c7d81..a821c7b3c 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -100,13 +100,8 @@ void ConfigureGraphics::SetConfiguration() {
100 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); 100 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
101 ui->use_disk_shader_cache->setEnabled(runtime_lock); 101 ui->use_disk_shader_cache->setEnabled(runtime_lock);
102 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 102 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
103 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
104 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); 103 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
105 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); 104 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
106 ui->use_vsync->setEnabled(runtime_lock);
107 ui->use_vsync->setChecked(Settings::values.use_vsync);
108 ui->force_30fps_mode->setEnabled(runtime_lock);
109 ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
110 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 105 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
111 Settings::values.bg_blue)); 106 Settings::values.bg_blue));
112 UpdateDeviceComboBox(); 107 UpdateDeviceComboBox();
@@ -119,11 +114,8 @@ void ConfigureGraphics::ApplyConfiguration() {
119 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 114 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
120 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); 115 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
121 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 116 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
122 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
123 Settings::values.use_asynchronous_gpu_emulation = 117 Settings::values.use_asynchronous_gpu_emulation =
124 ui->use_asynchronous_gpu_emulation->isChecked(); 118 ui->use_asynchronous_gpu_emulation->isChecked();
125 Settings::values.use_vsync = ui->use_vsync->isChecked();
126 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
127 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 119 Settings::values.bg_red = static_cast<float>(bg_color.redF());
128 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 120 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
129 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 121 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 9acc7dd93..c816d6108 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,30 +85,6 @@
85 </widget> 85 </widget>
86 </item> 86 </item>
87 <item> 87 <item>
88 <widget class="QCheckBox" name="use_vsync">
89 <property name="toolTip">
90 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string>
91 </property>
92 <property name="text">
93 <string>Use VSync (OpenGL only)</string>
94 </property>
95 </widget>
96 </item>
97 <item>
98 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
99 <property name="text">
100 <string>Use accurate GPU emulation (slow)</string>
101 </property>
102 </widget>
103 </item>
104 <item>
105 <widget class="QCheckBox" name="force_30fps_mode">
106 <property name="text">
107 <string>Force 30 FPS mode</string>
108 </property>
109 </widget>
110 </item>
111 <item>
112 <layout class="QHBoxLayout" name="horizontalLayout_2"> 88 <layout class="QHBoxLayout" name="horizontalLayout_2">
113 <item> 89 <item>
114 <widget class="QLabel" name="label"> 90 <widget class="QLabel" name="label">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
new file mode 100644
index 000000000..b9f429f84
--- /dev/null
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/settings.h"
7#include "ui_configure_graphics_advanced.h"
8#include "yuzu/configuration/configure_graphics_advanced.h"
9
10ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
11 : QWidget(parent), ui(new Ui::ConfigureGraphicsAdvanced) {
12
13 ui->setupUi(this);
14
15 SetConfiguration();
16}
17
18ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
19
20void ConfigureGraphicsAdvanced::SetConfiguration() {
21 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
22 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
23 ui->use_vsync->setEnabled(runtime_lock);
24 ui->use_vsync->setChecked(Settings::values.use_vsync);
25 ui->force_30fps_mode->setEnabled(runtime_lock);
26 ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
27 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
28 ui->anisotropic_filtering_combobox->setCurrentIndex(Settings::values.max_anisotropy);
29}
30
31void ConfigureGraphicsAdvanced::ApplyConfiguration() {
32 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
33 Settings::values.use_vsync = ui->use_vsync->isChecked();
34 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
35 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
36}
37
38void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
39 if (event->type() == QEvent::LanguageChange) {
40 RetranslateUI();
41 }
42
43 QWidget::changeEvent(event);
44}
45
46void ConfigureGraphicsAdvanced::RetranslateUI() {
47 ui->retranslateUi(this);
48}
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
new file mode 100644
index 000000000..bbc9d4355
--- /dev/null
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -0,0 +1,30 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QWidget>
9
10namespace Ui {
11class ConfigureGraphicsAdvanced;
12}
13
14class ConfigureGraphicsAdvanced : public QWidget {
15 Q_OBJECT
16
17public:
18 explicit ConfigureGraphicsAdvanced(QWidget* parent = nullptr);
19 ~ConfigureGraphicsAdvanced() override;
20
21 void ApplyConfiguration();
22
23private:
24 void changeEvent(QEvent* event) override;
25 void RetranslateUI();
26
27 void SetConfiguration();
28
29 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
30};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
new file mode 100644
index 000000000..42eec278e
--- /dev/null
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -0,0 +1,111 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ConfigureGraphicsAdvanced</class>
4 <widget class="QWidget" name="ConfigureGraphicsAdvanced">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>400</width>
10 <height>321</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Form</string>
15 </property>
16 <layout class="QVBoxLayout" name="verticalLayout_1">
17 <item>
18 <layout class="QVBoxLayout" name="verticalLayout_2">
19 <item>
20 <widget class="QGroupBox" name="groupBox_1">
21 <property name="title">
22 <string>Advanced Graphics Settings</string>
23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_3">
25 <item>
26 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
27 <property name="text">
28 <string>Use accurate GPU emulation (slow)</string>
29 </property>
30 </widget>
31 </item>
32 <item>
33 <widget class="QCheckBox" name="use_vsync">
34 <property name="toolTip">
35 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string>
36 </property>
37 <property name="text">
38 <string>Use VSync (OpenGL only)</string>
39 </property>
40 </widget>
41 </item>
42 <item>
43 <widget class="QCheckBox" name="force_30fps_mode">
44 <property name="text">
45 <string>Force 30 FPS mode</string>
46 </property>
47 </widget>
48 </item>
49 <item>
50 <layout class="QHBoxLayout" name="horizontalLayout_1">
51 <item>
52 <widget class="QLabel" name="af_label">
53 <property name="text">
54 <string>Anisotropic Filtering:</string>
55 </property>
56 </widget>
57 </item>
58 <item>
59 <widget class="QComboBox" name="anisotropic_filtering_combobox">
60 <item>
61 <property name="text">
62 <string>Default</string>
63 </property>
64 </item>
65 <item>
66 <property name="text">
67 <string>2x</string>
68 </property>
69 </item>
70 <item>
71 <property name="text">
72 <string>4x</string>
73 </property>
74 </item>
75 <item>
76 <property name="text">
77 <string>8x</string>
78 </property>
79 </item>
80 <item>
81 <property name="text">
82 <string>16x</string>
83 </property>
84 </item>
85 </widget>
86 </item>
87 </layout>
88 </item>
89 </layout>
90 </widget>
91 </item>
92 </layout>
93 </item>
94 <item>
95 <spacer name="verticalSpacer">
96 <property name="orientation">
97 <enum>Qt::Vertical</enum>
98 </property>
99 <property name="sizeHint" stdset="0">
100 <size>
101 <width>20</width>
102 <height>40</height>
103 </size>
104 </property>
105 </spacer>
106 </item>
107 </layout>
108 </widget>
109 <resources/>
110 <connections/>
111</ui>
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index e1b52f8d9..f49cd4c8f 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -56,6 +56,8 @@ void ConfigureSystem::SetConfiguration() {
56 enabled = !Core::System::GetInstance().IsPoweredOn(); 56 enabled = !Core::System::GetInstance().IsPoweredOn();
57 57
58 ui->combo_language->setCurrentIndex(Settings::values.language_index); 58 ui->combo_language->setCurrentIndex(Settings::values.language_index);
59 ui->combo_region->setCurrentIndex(Settings::values.region_index);
60 ui->combo_sound->setCurrentIndex(Settings::values.sound_index);
59 61
60 ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); 62 ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value());
61 ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); 63 ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value());
@@ -81,6 +83,8 @@ void ConfigureSystem::ApplyConfiguration() {
81 } 83 }
82 84
83 Settings::values.language_index = ui->combo_language->currentIndex(); 85 Settings::values.language_index = ui->combo_language->currentIndex();
86 Settings::values.region_index = ui->combo_region->currentIndex();
87 Settings::values.sound_index = ui->combo_sound->currentIndex();
84 88
85 if (ui->rng_seed_checkbox->isChecked()) { 89 if (ui->rng_seed_checkbox->isChecked()) {
86 Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); 90 Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16);
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h
index 1eab3781d..d8fa2d2cc 100644
--- a/src/yuzu/configuration/configure_system.h
+++ b/src/yuzu/configuration/configure_system.h
@@ -36,5 +36,6 @@ private:
36 bool enabled = false; 36 bool enabled = false;
37 37
38 int language_index = 0; 38 int language_index = 0;
39 int region_index = 0;
39 int sound_index = 0; 40 int sound_index = 0;
40}; 41};
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui
index 65745a2f8..4e2c7e76e 100644
--- a/src/yuzu/configuration/configure_system.ui
+++ b/src/yuzu/configuration/configure_system.ui
@@ -22,14 +22,14 @@
22 <string>System Settings</string> 22 <string>System Settings</string>
23 </property> 23 </property>
24 <layout class="QGridLayout" name="gridLayout"> 24 <layout class="QGridLayout" name="gridLayout">
25 <item row="1" column="0"> 25 <item row="2" column="0">
26 <widget class="QLabel" name="label_sound"> 26 <widget class="QLabel" name="label_sound">
27 <property name="text"> 27 <property name="text">
28 <string>Sound output mode</string> 28 <string>Sound output mode</string>
29 </property> 29 </property>
30 </widget> 30 </widget>
31 </item> 31 </item>
32 <item row="2" column="0"> 32 <item row="3" column="0">
33 <widget class="QLabel" name="label_console_id"> 33 <widget class="QLabel" name="label_console_id">
34 <property name="text"> 34 <property name="text">
35 <string>Console ID:</string> 35 <string>Console ID:</string>
@@ -128,14 +128,60 @@
128 </item> 128 </item>
129 </widget> 129 </widget>
130 </item> 130 </item>
131 <item row="4" column="0"> 131 <item row="1" column="0">
132 <widget class="QLabel" name="label_region">
133 <property name="text">
134 <string>Region:</string>
135 </property>
136 </widget>
137 </item>
138 <item row="1" column="1">
139 <widget class="QComboBox" name="combo_region">
140 <item>
141 <property name="text">
142 <string>Japan</string>
143 </property>
144 </item>
145 <item>
146 <property name="text">
147 <string>USA</string>
148 </property>
149 </item>
150 <item>
151 <property name="text">
152 <string>Europe</string>
153 </property>
154 </item>
155 <item>
156 <property name="text">
157 <string>Australia</string>
158 </property>
159 </item>
160 <item>
161 <property name="text">
162 <string>China</string>
163 </property>
164 </item>
165 <item>
166 <property name="text">
167 <string>Korea</string>
168 </property>
169 </item>
170 <item>
171 <property name="text">
172 <string>Taiwan</string>
173 </property>
174 </item>
175 </widget>
176 </item>
177 <item row="5" column="0">
132 <widget class="QCheckBox" name="rng_seed_checkbox"> 178 <widget class="QCheckBox" name="rng_seed_checkbox">
133 <property name="text"> 179 <property name="text">
134 <string>RNG Seed</string> 180 <string>RNG Seed</string>
135 </property> 181 </property>
136 </widget> 182 </widget>
137 </item> 183 </item>
138 <item row="1" column="1"> 184 <item row="2" column="1">
139 <widget class="QComboBox" name="combo_sound"> 185 <widget class="QComboBox" name="combo_sound">
140 <item> 186 <item>
141 <property name="text"> 187 <property name="text">
@@ -161,7 +207,7 @@
161 </property> 207 </property>
162 </widget> 208 </widget>
163 </item> 209 </item>
164 <item row="2" column="1"> 210 <item row="3" column="1">
165 <widget class="QPushButton" name="button_regenerate_console_id"> 211 <widget class="QPushButton" name="button_regenerate_console_id">
166 <property name="sizePolicy"> 212 <property name="sizePolicy">
167 <sizepolicy hsizetype="Fixed" vsizetype="Fixed"> 213 <sizepolicy hsizetype="Fixed" vsizetype="Fixed">
@@ -177,14 +223,14 @@
177 </property> 223 </property>
178 </widget> 224 </widget>
179 </item> 225 </item>
180 <item row="3" column="0"> 226 <item row="4" column="0">
181 <widget class="QCheckBox" name="custom_rtc_checkbox"> 227 <widget class="QCheckBox" name="custom_rtc_checkbox">
182 <property name="text"> 228 <property name="text">
183 <string>Custom RTC</string> 229 <string>Custom RTC</string>
184 </property> 230 </property>
185 </widget> 231 </widget>
186 </item> 232 </item>
187 <item row="3" column="1"> 233 <item row="4" column="1">
188 <widget class="QDateTimeEdit" name="custom_rtc_edit"> 234 <widget class="QDateTimeEdit" name="custom_rtc_edit">
189 <property name="minimumDate"> 235 <property name="minimumDate">
190 <date> 236 <date>
@@ -198,7 +244,7 @@
198 </property> 244 </property>
199 </widget> 245 </widget>
200 </item> 246 </item>
201 <item row="4" column="1"> 247 <item row="5" column="1">
202 <widget class="QLineEdit" name="rng_seed_edit"> 248 <widget class="QLineEdit" name="rng_seed_edit">
203 <property name="sizePolicy"> 249 <property name="sizePolicy">
204 <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> 250 <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 3f1a94627..c1ea25fb8 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -116,7 +116,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
116 116
117 constexpr std::size_t BaseRegister = 29; 117 constexpr std::size_t BaseRegister = 29;
118 auto& memory = Core::System::GetInstance().Memory(); 118 auto& memory = Core::System::GetInstance().Memory();
119 u64 base_pointer = thread.GetContext().cpu_registers[BaseRegister]; 119 u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister];
120 120
121 while (base_pointer != 0) { 121 while (base_pointer != 0) {
122 const u64 lr = memory.Read64(base_pointer + sizeof(u64)); 122 const u64 lr = memory.Read64(base_pointer + sizeof(u64));
@@ -240,7 +240,7 @@ QString WaitTreeThread::GetText() const {
240 break; 240 break;
241 } 241 }
242 242
243 const auto& context = thread.GetContext(); 243 const auto& context = thread.GetContext64();
244 const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") 244 const QString pc_info = tr(" PC = 0x%1 LR = 0x%2")
245 .arg(context.pc, 8, 16, QLatin1Char{'0'}) 245 .arg(context.pc, 8, 16, QLatin1Char{'0'})
246 .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); 246 .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'});
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 4f2bfab48..2a6483370 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"(
34QProgressBar {} 34QProgressBar {}
35QProgressBar::chunk {})"; 35QProgressBar::chunk {})";
36 36
37constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"(
38QProgressBar {
39 background-color: black;
40 border: 2px solid white;
41 border-radius: 4px;
42 padding: 2px;
43}
44QProgressBar::chunk {
45 background-color: #0ab9e6;
46 width: 1px;
47})";
48
49constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( 37constexpr char PROGRESSBAR_STYLE_BUILD[] = R"(
50QProgressBar { 38QProgressBar {
51 background-color: black; 39 background-color: black;
@@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent)
100 88
101 stage_translations = { 89 stage_translations = {
102 {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, 90 {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")},
103 {VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")},
104 {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, 91 {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")},
105 {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, 92 {VideoCore::LoadCallbackStage::Complete, tr("Launching...")},
106 }; 93 };
107 progressbar_style = { 94 progressbar_style = {
108 {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, 95 {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE},
109 {VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE},
110 {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, 96 {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD},
111 {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, 97 {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE},
112 }; 98 };
@@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
192 } 178 }
193 179
194 // update labels and progress bar 180 // update labels and progress bar
195 if (stage == VideoCore::LoadCallbackStage::Decompile || 181 if (stage == VideoCore::LoadCallbackStage::Build) {
196 stage == VideoCore::LoadCallbackStage::Build) {
197 ui->stage->setText(stage_translations[stage].arg(value).arg(total)); 182 ui->stage->setText(stage_translations[stage].arg(value).arg(total));
198 } else { 183 } else {
199 ui->stage->setText(stage_translations[stage]); 184 ui->stage->setText(stage_translations[stage]);
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index b77c12baf..f4cd905c9 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -381,6 +381,8 @@ void Config::ReadValues() {
381 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); 381 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
382 Settings::values.aspect_ratio = 382 Settings::values.aspect_ratio =
383 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 383 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
384 Settings::values.max_anisotropy =
385 static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0));
384 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 386 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
385 Settings::values.frame_limit = 387 Settings::values.frame_limit =
386 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -450,7 +452,7 @@ void Config::ReadValues() {
450 Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", ""); 452 Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", "");
451 453
452 // Services 454 // Services
453 Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "boxcat"); 455 Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "null");
454 Settings::values.bcat_boxcat_local = 456 Settings::values.bcat_boxcat_local =
455 sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false); 457 sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false);
456} 458}
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index df7473858..d63d7a58e 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -84,7 +84,7 @@ touch_device=
84# from any cemuhook compatible motion program. 84# from any cemuhook compatible motion program.
85 85
86# IPv4 address of the udp input server (Default "127.0.0.1") 86# IPv4 address of the udp input server (Default "127.0.0.1")
87udp_input_address= 87udp_input_address=127.0.0.1
88 88
89# Port of the udp input server. (Default 26760) 89# Port of the udp input server. (Default 26760)
90udp_input_port= 90udp_input_port=
@@ -126,6 +126,10 @@ resolution_factor =
126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
127aspect_ratio = 127aspect_ratio =
128 128
129# Anisotropic filtering
130# 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x
131max_anisotropy =
132
129# Whether to enable V-Sync (caps the framerate at 60FPS) or not. 133# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
130# 0 (default): Off, 1: On 134# 0 (default): Off, 1: On
131use_vsync = 135use_vsync =
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 0ac93b62a..ee2591c8f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -120,6 +120,8 @@ void Config::ReadValues() {
120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); 120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
121 Settings::values.aspect_ratio = 121 Settings::values.aspect_ratio =
122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
123 Settings::values.max_anisotropy =
124 static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0));
123 Settings::values.use_frame_limit = false; 125 Settings::values.use_frame_limit = false;
124 Settings::values.frame_limit = 100; 126 Settings::values.frame_limit = 100;
125 Settings::values.use_disk_shader_cache = 127 Settings::values.use_disk_shader_cache =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 8d93f7b88..ca203b64d 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -30,6 +30,10 @@ resolution_factor =
30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
31aspect_ratio = 31aspect_ratio =
32 32
33# Anisotropic filtering
34# 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x
35max_anisotropy =
36
33# Whether to enable V-Sync (caps the framerate at 60FPS) or not. 37# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
34# 0 (default): Off, 1: On 38# 0 (default): Off, 1: On
35use_vsync = 39use_vsync =