summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/kernel/svc.cpp3
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp18
-rw-r--r--src/core/hle/service/nvdrv/interface.h2
-rw-r--r--src/core/hle/service/sm/sm.cpp2
-rw-r--r--src/core/hle/service/sm/sm.h2
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/command_processor.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.cpp5
-rw-r--r--src/video_core/engines/kepler_memory.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h36
-rw-r--r--src/video_core/engines/maxwell_dma.cpp5
-rw-r--r--src/video_core/engines/shader_bytecode.h19
-rw-r--r--src/video_core/engines/shader_header.h11
-rw-r--r--src/video_core/memory_manager.cpp7
-rw-r--r--src/video_core/memory_manager.h3
-rw-r--r--src/video_core/morton.cpp353
-rw-r--r--src/video_core/morton.h21
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp36
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp193
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp444
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp57
-rw-r--r--src/video_core/renderer_opengl/gl_state.h10
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/utils.h164
31 files changed, 940 insertions, 519 deletions
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index b8b6b4d49..f287f7c97 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -671,7 +671,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
671 break; 671 break;
672 } 672 }
673 default: 673 default:
674 UNIMPLEMENTED(); 674 LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
675 return ERR_INVALID_ENUM_VALUE;
675 } 676 }
676 677
677 return RESULT_SUCCESS; 678 return RESULT_SUCCESS;
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index ac3859353..602086eed 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -88,6 +88,20 @@ void NVDRV::FinishInitialize(Kernel::HLERequestContext& ctx) {
88 rb.Push(RESULT_SUCCESS); 88 rb.Push(RESULT_SUCCESS);
89} 89}
90 90
91void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) {
92 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
93 IPC::ResponseBuilder rb{ctx, 2};
94 rb.Push(RESULT_SUCCESS);
95}
96
97void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
98 // According to SwitchBrew, this has no inputs and no outputs, so effectively does nothing on
99 // retail hardware.
100 LOG_DEBUG(Service_NVDRV, "called");
101 IPC::ResponseBuilder rb{ctx, 2};
102 rb.Push(RESULT_SUCCESS);
103}
104
91NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) 105NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
92 : ServiceFramework(name), nvdrv(std::move(nvdrv)) { 106 : ServiceFramework(name), nvdrv(std::move(nvdrv)) {
93 static const FunctionInfo functions[] = { 107 static const FunctionInfo functions[] = {
@@ -97,10 +111,10 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
97 {3, &NVDRV::Initialize, "Initialize"}, 111 {3, &NVDRV::Initialize, "Initialize"},
98 {4, &NVDRV::QueryEvent, "QueryEvent"}, 112 {4, &NVDRV::QueryEvent, "QueryEvent"},
99 {5, nullptr, "MapSharedMem"}, 113 {5, nullptr, "MapSharedMem"},
100 {6, nullptr, "GetStatus"}, 114 {6, &NVDRV::GetStatus, "GetStatus"},
101 {7, nullptr, "ForceSetClientPID"}, 115 {7, nullptr, "ForceSetClientPID"},
102 {8, &NVDRV::SetClientPID, "SetClientPID"}, 116 {8, &NVDRV::SetClientPID, "SetClientPID"},
103 {9, nullptr, "DumpGraphicsMemoryInfo"}, 117 {9, &NVDRV::DumpGraphicsMemoryInfo, "DumpGraphicsMemoryInfo"},
104 {10, nullptr, "InitializeDevtools"}, 118 {10, nullptr, "InitializeDevtools"},
105 {11, &NVDRV::Ioctl, "Ioctl2"}, 119 {11, &NVDRV::Ioctl, "Ioctl2"},
106 {12, nullptr, "Ioctl3"}, 120 {12, nullptr, "Ioctl3"},
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index d340893c2..5a1e4baa7 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -24,6 +24,8 @@ private:
24 void QueryEvent(Kernel::HLERequestContext& ctx); 24 void QueryEvent(Kernel::HLERequestContext& ctx);
25 void SetClientPID(Kernel::HLERequestContext& ctx); 25 void SetClientPID(Kernel::HLERequestContext& ctx);
26 void FinishInitialize(Kernel::HLERequestContext& ctx); 26 void FinishInitialize(Kernel::HLERequestContext& ctx);
27 void GetStatus(Kernel::HLERequestContext& ctx);
28 void DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx);
27 29
28 std::shared_ptr<Module> nvdrv; 30 std::shared_ptr<Module> nvdrv;
29 31
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index c1b2f33b9..9ca8483a5 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -63,7 +63,7 @@ ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> ServiceManager::RegisterService
63 return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port)); 63 return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port));
64} 64}
65 65
66ResultCode ServiceManager::UnregisterService(std::string name) { 66ResultCode ServiceManager::UnregisterService(const std::string& name) {
67 CASCADE_CODE(ValidateServiceName(name)); 67 CASCADE_CODE(ValidateServiceName(name));
68 68
69 const auto iter = registered_services.find(name); 69 const auto iter = registered_services.find(name);
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index c4714b3e3..bef25433e 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -50,7 +50,7 @@ public:
50 50
51 ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name, 51 ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name,
52 unsigned int max_sessions); 52 unsigned int max_sessions);
53 ResultCode UnregisterService(std::string name); 53 ResultCode UnregisterService(const std::string& name);
54 ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name); 54 ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name);
55 ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name); 55 ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name);
56 56
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a780215c1..3f906a517 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -21,6 +21,8 @@ add_library(video_core STATIC
21 macro_interpreter.h 21 macro_interpreter.h
22 memory_manager.cpp 22 memory_manager.cpp
23 memory_manager.h 23 memory_manager.h
24 morton.cpp
25 morton.h
24 rasterizer_cache.cpp 26 rasterizer_cache.cpp
25 rasterizer_cache.h 27 rasterizer_cache.h
26 rasterizer_interface.h 28 rasterizer_interface.h
@@ -62,7 +64,6 @@ add_library(video_core STATIC
62 textures/decoders.cpp 64 textures/decoders.cpp
63 textures/decoders.h 65 textures/decoders.h
64 textures/texture.h 66 textures/texture.h
65 utils.h
66 video_core.cpp 67 video_core.cpp
67 video_core.h 68 video_core.h
68) 69)
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 28e8c13aa..8b9c548cc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { 34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
35 MICROPROFILE_SCOPE(ProcessCommandLists); 35 MICROPROFILE_SCOPE(ProcessCommandLists);
36 36
37 // On entering GPU code, assume all memory may be touched by the ARM core.
38 maxwell_3d->dirty_flags.OnMemoryWrite();
39
37 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { 40 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
38 LOG_TRACE(HW_GPU, 41 LOG_TRACE(HW_GPU,
39 "Processing method {:08X} on subchannel {} value " 42 "Processing method {:08X} on subchannel {} value "
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 74e44c7fe..8d0700d13 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,8 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
6#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
9 11
@@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
47 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 49 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
48 50
49 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
52 // All copies here update the main memory, so mark all rasterizer states as invalid.
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54
50 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 55 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
51 // We have to invalidate the destination region to evict any outdated surfaces from the 56 // We have to invalidate the destination region to evict any outdated surfaces from the
52 // cache. We do this before actually writing the new data because the destination address 57 // cache. We do this before actually writing the new data because the destination address
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 585290d9f..2adbc9eaf 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -3,8 +3,10 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/core.h"
6#include "core/memory.h" 7#include "core/memory.h"
7#include "video_core/engines/kepler_memory.h" 8#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
9 11
10namespace Tegra::Engines { 12namespace Tegra::Engines {
@@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
47 rasterizer.InvalidateRegion(dest_address, sizeof(u32)); 49 rasterizer.InvalidateRegion(dest_address, sizeof(u32));
48 50
49 Memory::Write32(dest_address, data); 51 Memory::Write32(dest_address, data);
52 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
50 53
51 state.write_offset++; 54 state.write_offset++;
52} 55}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2bc534be3..f0a5470b9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
135 135
136 if (regs.reg_array[method] != value) { 136 if (regs.reg_array[method] != value) {
137 regs.reg_array[method] = value; 137 regs.reg_array[method] = value;
138 // Vertex format
138 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 139 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
139 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { 140 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
140 dirty_flags.vertex_attrib_format = true; 141 dirty_flags.vertex_attrib_format = true;
141 } 142 }
143
144 // Vertex buffer
145 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
146 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
147 dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
148 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
149 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
150 dirty_flags.vertex_array |=
151 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
152 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
153 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
154 dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
155 }
142 } 156 }
143 157
144 switch (method) { 158 switch (method) {
@@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() {
270 query_result.timestamp = CoreTiming::GetTicks(); 284 query_result.timestamp = CoreTiming::GetTicks();
271 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 285 Memory::WriteBlock(*address, &query_result, sizeof(query_result));
272 } 286 }
287 dirty_flags.OnMemoryWrite();
273 break; 288 break;
274 } 289 }
275 default: 290 default:
@@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
346 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 361 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
347 362
348 Memory::Write32(*address, value); 363 Memory::Write32(*address, value);
364 dirty_flags.OnMemoryWrite();
349 365
350 // Increment the current buffer position. 366 // Increment the current buffer position.
351 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 367 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index c5dcdeb31..9324d9710 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -590,10 +590,18 @@ public:
590 590
591 float clear_color[4]; 591 float clear_color[4];
592 float clear_depth; 592 float clear_depth;
593
593 INSERT_PADDING_WORDS(0x3); 594 INSERT_PADDING_WORDS(0x3);
595
594 s32 clear_stencil; 596 s32 clear_stencil;
595 597
596 INSERT_PADDING_WORDS(0x17); 598 INSERT_PADDING_WORDS(0x7);
599
600 u32 polygon_offset_point_enable;
601 u32 polygon_offset_line_enable;
602 u32 polygon_offset_fill_enable;
603
604 INSERT_PADDING_WORDS(0xD);
597 605
598 std::array<ScissorTest, NumViewports> scissor_test; 606 std::array<ScissorTest, NumViewports> scissor_test;
599 607
@@ -728,6 +736,7 @@ public:
728 u32 frag_color_clamp; 736 u32 frag_color_clamp;
729 737
730 union { 738 union {
739 BitField<0, 1, u32> y_negate;
731 BitField<4, 1, u32> triangle_rast_flip; 740 BitField<4, 1, u32> triangle_rast_flip;
732 } screen_y_control; 741 } screen_y_control;
733 742
@@ -761,7 +770,11 @@ public:
761 } 770 }
762 } tsc; 771 } tsc;
763 772
764 INSERT_PADDING_WORDS(0x3); 773 INSERT_PADDING_WORDS(0x1);
774
775 float polygon_offset_factor;
776
777 INSERT_PADDING_WORDS(0x1);
765 778
766 struct { 779 struct {
767 u32 tic_address_high; 780 u32 tic_address_high;
@@ -786,7 +799,9 @@ public:
786 799
787 u32 framebuffer_srgb; 800 u32 framebuffer_srgb;
788 801
789 INSERT_PADDING_WORDS(0x12); 802 float polygon_offset_units;
803
804 INSERT_PADDING_WORDS(0x11);
790 805
791 union { 806 union {
792 BitField<2, 1, u32> coord_origin; 807 BitField<2, 1, u32> coord_origin;
@@ -863,7 +878,9 @@ public:
863 878
864 INSERT_PADDING_WORDS(0x7); 879 INSERT_PADDING_WORDS(0x7);
865 880
866 INSERT_PADDING_WORDS(0x20); 881 INSERT_PADDING_WORDS(0x1F);
882
883 float polygon_offset_clamp;
867 884
868 struct { 885 struct {
869 u32 is_instanced[NumVertexArrays]; 886 u32 is_instanced[NumVertexArrays];
@@ -1050,6 +1067,11 @@ public:
1050 1067
1051 struct DirtyFlags { 1068 struct DirtyFlags {
1052 bool vertex_attrib_format = true; 1069 bool vertex_attrib_format = true;
1070 u32 vertex_array = 0xFFFFFFFF;
1071
1072 void OnMemoryWrite() {
1073 vertex_array = 0xFFFFFFFF;
1074 }
1053 }; 1075 };
1054 1076
1055 DirtyFlags dirty_flags; 1077 DirtyFlags dirty_flags;
@@ -1142,6 +1164,9 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
1142ASSERT_REG_POSITION(clear_color[0], 0x360); 1164ASSERT_REG_POSITION(clear_color[0], 0x360);
1143ASSERT_REG_POSITION(clear_depth, 0x364); 1165ASSERT_REG_POSITION(clear_depth, 0x364);
1144ASSERT_REG_POSITION(clear_stencil, 0x368); 1166ASSERT_REG_POSITION(clear_stencil, 0x368);
1167ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1168ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1169ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
1145ASSERT_REG_POSITION(scissor_test, 0x380); 1170ASSERT_REG_POSITION(scissor_test, 0x380);
1146ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); 1171ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1147ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1172ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
@@ -1180,6 +1205,7 @@ ASSERT_REG_POSITION(point_size, 0x546);
1180ASSERT_REG_POSITION(zeta_enable, 0x54E); 1205ASSERT_REG_POSITION(zeta_enable, 0x54E);
1181ASSERT_REG_POSITION(multisample_control, 0x54F); 1206ASSERT_REG_POSITION(multisample_control, 0x54F);
1182ASSERT_REG_POSITION(tsc, 0x557); 1207ASSERT_REG_POSITION(tsc, 0x557);
1208ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
1183ASSERT_REG_POSITION(tic, 0x55D); 1209ASSERT_REG_POSITION(tic, 0x55D);
1184ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); 1210ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
1185ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); 1211ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1187,11 +1213,13 @@ ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
1187ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); 1213ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
1188ASSERT_REG_POSITION(stencil_back_func_func, 0x569); 1214ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
1189ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); 1215ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
1216ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
1190ASSERT_REG_POSITION(point_coord_replace, 0x581); 1217ASSERT_REG_POSITION(point_coord_replace, 0x581);
1191ASSERT_REG_POSITION(code_address, 0x582); 1218ASSERT_REG_POSITION(code_address, 0x582);
1192ASSERT_REG_POSITION(draw, 0x585); 1219ASSERT_REG_POSITION(draw, 0x585);
1193ASSERT_REG_POSITION(primitive_restart, 0x591); 1220ASSERT_REG_POSITION(primitive_restart, 0x591);
1194ASSERT_REG_POSITION(index_array, 0x5F2); 1221ASSERT_REG_POSITION(index_array, 0x5F2);
1222ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1195ASSERT_REG_POSITION(instanced_arrays, 0x620); 1223ASSERT_REG_POSITION(instanced_arrays, 0x620);
1196ASSERT_REG_POSITION(cull, 0x646); 1224ASSERT_REG_POSITION(cull, 0x646);
1197ASSERT_REG_POSITION(pixel_center_integer, 0x649); 1225ASSERT_REG_POSITION(pixel_center_integer, 0x649);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index b8a78cf82..a34e884fe 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,7 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h"
6#include "video_core/engines/maxwell_dma.h" 8#include "video_core/engines/maxwell_dma.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
@@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
54 return; 56 return;
55 } 57 }
56 58
59 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
61
57 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
58 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
59 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 64 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7e8449bc4..b9faaf8e0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -82,6 +82,8 @@ union Attribute {
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
85 ClipDistances0123 = 44,
86 ClipDistances4567 = 45,
85 PointCoord = 46, 87 PointCoord = 46,
86 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex 88 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
87 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval 89 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
@@ -366,6 +368,11 @@ enum class HalfPrecision : u64 {
366 FMZ = 2, 368 FMZ = 2,
367}; 369};
368 370
371enum class R2pMode : u64 {
372 Pr = 0,
373 Cc = 1,
374};
375
369enum class IpaInterpMode : u64 { 376enum class IpaInterpMode : u64 {
370 Linear = 0, 377 Linear = 0,
371 Perspective = 1, 378 Perspective = 1,
@@ -855,6 +862,12 @@ union Instruction {
855 } hsetp2; 862 } hsetp2;
856 863
857 union { 864 union {
865 BitField<40, 1, R2pMode> mode;
866 BitField<41, 2, u64> byte;
867 BitField<20, 7, u64> immediate_mask;
868 } r2p;
869
870 union {
858 BitField<39, 3, u64> pred39; 871 BitField<39, 3, u64> pred39;
859 BitField<42, 1, u64> neg_pred; 872 BitField<42, 1, u64> neg_pred;
860 BitField<43, 1, u64> neg_a; 873 BitField<43, 1, u64> neg_a;
@@ -1256,6 +1269,7 @@ public:
1256 BFE_C, 1269 BFE_C,
1257 BFE_R, 1270 BFE_R,
1258 BFE_IMM, 1271 BFE_IMM,
1272 BFI_IMM_R,
1259 BRA, 1273 BRA,
1260 PBK, 1274 PBK,
1261 LD_A, 1275 LD_A,
@@ -1381,6 +1395,7 @@ public:
1381 PSETP, 1395 PSETP,
1382 PSET, 1396 PSET,
1383 CSETP, 1397 CSETP,
1398 R2P_IMM,
1384 XMAD_IMM, 1399 XMAD_IMM,
1385 XMAD_CR, 1400 XMAD_CR,
1386 XMAD_RC, 1401 XMAD_RC,
@@ -1396,6 +1411,7 @@ public:
1396 ArithmeticHalf, 1411 ArithmeticHalf,
1397 ArithmeticHalfImmediate, 1412 ArithmeticHalfImmediate,
1398 Bfe, 1413 Bfe,
1414 Bfi,
1399 Shift, 1415 Shift,
1400 Ffma, 1416 Ffma,
1401 Hfma2, 1417 Hfma2,
@@ -1410,6 +1426,7 @@ public:
1410 HalfSetPredicate, 1426 HalfSetPredicate,
1411 PredicateSetPredicate, 1427 PredicateSetPredicate,
1412 PredicateSetRegister, 1428 PredicateSetRegister,
1429 RegisterSetPredicate,
1413 Conversion, 1430 Conversion,
1414 Xmad, 1431 Xmad,
1415 Unknown, 1432 Unknown,
@@ -1613,6 +1630,7 @@ private:
1613 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), 1630 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
1614 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), 1631 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
1615 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), 1632 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
1633 INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
1616 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), 1634 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
1617 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), 1635 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
1618 INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), 1636 INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
@@ -1647,6 +1665,7 @@ private:
1647 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), 1665 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
1648 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 1666 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
1649 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), 1667 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
1668 INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
1650 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), 1669 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
1651 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), 1670 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
1652 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), 1671 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a0e015c4b..99c34649f 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -62,7 +62,16 @@ struct Header {
62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32] 63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
64 INSERT_PADDING_BYTES(2); // ImapColor 64 INSERT_PADDING_BYTES(2); // ImapColor
65 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 65 union {
66 BitField<0, 8, u16> clip_distances;
67 BitField<8, 1, u16> point_sprite_s;
68 BitField<9, 1, u16> point_sprite_t;
69 BitField<10, 1, u16> fog_coordinate;
70 BitField<12, 1, u16> tessellation_eval_point_u;
71 BitField<13, 1, u16> tessellation_eval_point_v;
72 BitField<14, 1, u16> instance_id;
73 BitField<15, 1, u16> vertex_id;
74 };
66 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] 75 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
67 INSERT_PADDING_BYTES(1); // ImapReserved 76 INSERT_PADDING_BYTES(1); // ImapReserved
68 INSERT_PADDING_BYTES(3); // OmapSystemValuesA 77 INSERT_PADDING_BYTES(3); // OmapSystemValuesA
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 77a20bb84..47247f097 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,6 +9,13 @@
9 9
10namespace Tegra { 10namespace Tegra {
11 11
12MemoryManager::MemoryManager() {
13 // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
14 // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
15 // Undertale using 0 for a render target.
16 PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
17}
18
12GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { 19GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
13 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; 20 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
14 21
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 4eb338aa2..fb03497ca 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -18,7 +18,7 @@ using GPUVAddr = u64;
18 18
19class MemoryManager final { 19class MemoryManager final {
20public: 20public:
21 MemoryManager() = default; 21 MemoryManager();
22 22
23 GPUVAddr AllocateSpace(u64 size, u64 align); 23 GPUVAddr AllocateSpace(u64 size, u64 align);
24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); 24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
@@ -37,6 +37,7 @@ private:
37 enum class PageStatus : u64 { 37 enum class PageStatus : u64 {
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 38 Unmapped = 0xFFFFFFFFFFFFFFFFULL,
39 Allocated = 0xFFFFFFFFFFFFFFFEULL, 39 Allocated = 0xFFFFFFFFFFFFFFFEULL,
40 Reserved = 0xFFFFFFFFFFFFFFFDULL,
40 }; 41 };
41 42
42 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 43 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
new file mode 100644
index 000000000..f14abba7d
--- /dev/null
+++ b/src/video_core/morton.cpp
@@ -0,0 +1,353 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h"
11#include "video_core/surface.h"
12#include "video_core/textures/decoders.h"
13
14namespace VideoCore {
15
16using Surface::GetBytesPerPixel;
17using Surface::PixelFormat;
18
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21
22template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u8* buffer, std::size_t buffer_size, VAddr addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
28 // pixel values.
29 const u32 tile_size_x{GetDefaultBlockWidth(format)};
30 const u32 tile_size_y{GetDefaultBlockHeight(format)};
31
32 if constexpr (morton_to_linear) {
33 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
34 stride, height, depth, block_height, block_depth);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
39 buffer, false, block_height, block_depth);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 // clang-format off
45 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::G8R8U>,
69 MortonCopy<true, PixelFormat::G8R8S>,
70 MortonCopy<true, PixelFormat::BGRA8>,
71 MortonCopy<true, PixelFormat::RGBA32F>,
72 MortonCopy<true, PixelFormat::RG32F>,
73 MortonCopy<true, PixelFormat::R32F>,
74 MortonCopy<true, PixelFormat::R16F>,
75 MortonCopy<true, PixelFormat::R16U>,
76 MortonCopy<true, PixelFormat::R16S>,
77 MortonCopy<true, PixelFormat::R16UI>,
78 MortonCopy<true, PixelFormat::R16I>,
79 MortonCopy<true, PixelFormat::RG16>,
80 MortonCopy<true, PixelFormat::RG16F>,
81 MortonCopy<true, PixelFormat::RG16UI>,
82 MortonCopy<true, PixelFormat::RG16I>,
83 MortonCopy<true, PixelFormat::RG16S>,
84 MortonCopy<true, PixelFormat::RGB32F>,
85 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
86 MortonCopy<true, PixelFormat::RG8U>,
87 MortonCopy<true, PixelFormat::RG8S>,
88 MortonCopy<true, PixelFormat::RG32UI>,
89 MortonCopy<true, PixelFormat::R32UI>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
91 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
92 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
93 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
94 MortonCopy<true, PixelFormat::DXT1_SRGB>,
95 MortonCopy<true, PixelFormat::DXT23_SRGB>,
96 MortonCopy<true, PixelFormat::DXT45_SRGB>,
97 MortonCopy<true, PixelFormat::BC7U_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
103 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
105 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
106 MortonCopy<true, PixelFormat::Z32F>,
107 MortonCopy<true, PixelFormat::Z16>,
108 MortonCopy<true, PixelFormat::Z24S8>,
109 MortonCopy<true, PixelFormat::S8Z24>,
110 MortonCopy<true, PixelFormat::Z32FS8>,
111 // clang-format on
112};
113
114static constexpr ConversionArray linear_to_morton_fns = {
115 // clang-format off
116 MortonCopy<false, PixelFormat::ABGR8U>,
117 MortonCopy<false, PixelFormat::ABGR8S>,
118 MortonCopy<false, PixelFormat::ABGR8UI>,
119 MortonCopy<false, PixelFormat::B5G6R5U>,
120 MortonCopy<false, PixelFormat::A2B10G10R10U>,
121 MortonCopy<false, PixelFormat::A1B5G5R5U>,
122 MortonCopy<false, PixelFormat::R8U>,
123 MortonCopy<false, PixelFormat::R8UI>,
124 MortonCopy<false, PixelFormat::RGBA16F>,
125 MortonCopy<false, PixelFormat::RGBA16U>,
126 MortonCopy<false, PixelFormat::RGBA16UI>,
127 MortonCopy<false, PixelFormat::R11FG11FB10F>,
128 MortonCopy<false, PixelFormat::RGBA32UI>,
129 MortonCopy<false, PixelFormat::DXT1>,
130 MortonCopy<false, PixelFormat::DXT23>,
131 MortonCopy<false, PixelFormat::DXT45>,
132 MortonCopy<false, PixelFormat::DXN1>,
133 MortonCopy<false, PixelFormat::DXN2UNORM>,
134 MortonCopy<false, PixelFormat::DXN2SNORM>,
135 MortonCopy<false, PixelFormat::BC7U>,
136 MortonCopy<false, PixelFormat::BC6H_UF16>,
137 MortonCopy<false, PixelFormat::BC6H_SF16>,
138 // TODO(Subv): Swizzling ASTC formats are not supported
139 nullptr,
140 MortonCopy<false, PixelFormat::G8R8U>,
141 MortonCopy<false, PixelFormat::G8R8S>,
142 MortonCopy<false, PixelFormat::BGRA8>,
143 MortonCopy<false, PixelFormat::RGBA32F>,
144 MortonCopy<false, PixelFormat::RG32F>,
145 MortonCopy<false, PixelFormat::R32F>,
146 MortonCopy<false, PixelFormat::R16F>,
147 MortonCopy<false, PixelFormat::R16U>,
148 MortonCopy<false, PixelFormat::R16S>,
149 MortonCopy<false, PixelFormat::R16UI>,
150 MortonCopy<false, PixelFormat::R16I>,
151 MortonCopy<false, PixelFormat::RG16>,
152 MortonCopy<false, PixelFormat::RG16F>,
153 MortonCopy<false, PixelFormat::RG16UI>,
154 MortonCopy<false, PixelFormat::RG16I>,
155 MortonCopy<false, PixelFormat::RG16S>,
156 MortonCopy<false, PixelFormat::RGB32F>,
157 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
158 MortonCopy<false, PixelFormat::RG8U>,
159 MortonCopy<false, PixelFormat::RG8S>,
160 MortonCopy<false, PixelFormat::RG32UI>,
161 MortonCopy<false, PixelFormat::R32UI>,
162 nullptr,
163 nullptr,
164 nullptr,
165 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
166 MortonCopy<false, PixelFormat::DXT1_SRGB>,
167 MortonCopy<false, PixelFormat::DXT23_SRGB>,
168 MortonCopy<false, PixelFormat::DXT45_SRGB>,
169 MortonCopy<false, PixelFormat::BC7U_SRGB>,
170 nullptr,
171 nullptr,
172 nullptr,
173 nullptr,
174 nullptr,
175 nullptr,
176 nullptr,
177 nullptr,
178 MortonCopy<false, PixelFormat::Z32F>,
179 MortonCopy<false, PixelFormat::Z16>,
180 MortonCopy<false, PixelFormat::Z24S8>,
181 MortonCopy<false, PixelFormat::S8Z24>,
182 MortonCopy<false, PixelFormat::Z32FS8>,
183 // clang-format on
184};
185
186constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
187 switch (mode) {
188 case MortonSwizzleMode::MortonToLinear:
189 return morton_to_linear_fns[static_cast<std::size_t>(format)];
190 case MortonSwizzleMode::LinearToMorton:
191 return linear_to_morton_fns[static_cast<std::size_t>(format)];
192 }
193}
194
195/// 8x8 Z-Order coordinate from 2D coordinates
196static u32 MortonInterleave(u32 x, u32 y) {
197 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
198 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
199 return xlut[x % 8] + ylut[y % 8];
200}
201
202/// Calculates the offset of the position of the pixel in Morton order
203static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
204 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
205 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
206 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
207 // texels are laid out in a 2x2 subtile like this:
208 // 2 3
209 // 0 1
210 //
211 // The full 8x8 tile has the texels arranged like this:
212 //
213 // 42 43 46 47 58 59 62 63
214 // 40 41 44 45 56 57 60 61
215 // 34 35 38 39 50 51 54 55
216 // 32 33 36 37 48 49 52 53
217 // 10 11 14 15 26 27 30 31
218 // 08 09 12 13 24 25 28 29
219 // 02 03 06 07 18 19 22 23
220 // 00 01 04 05 16 17 20 21
221 //
222 // This pattern is what's called Z-order curve, or Morton order.
223
224 const unsigned int block_height = 8;
225 const unsigned int coarse_x = x & ~7;
226
227 u32 i = MortonInterleave(x, y);
228
229 const unsigned int offset = coarse_x * block_height;
230
231 return (i + offset) * bytes_per_pixel;
232}
233
234static u32 MortonInterleave128(u32 x, u32 y) {
235 // 128x128 Z-Order coordinate from 2D coordinates
236 static constexpr u32 xlut[] = {
237 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
238 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
239 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
240 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
241 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
242 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
243 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
244 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
245 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
246 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
247 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
248 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
249 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
250 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
251 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
252 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
253 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
254 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
255 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
256 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
257 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
258 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
259 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
260 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
261 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
262 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
263 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
264 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
265 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
266 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
267 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
268 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
269 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
270 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
271 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
272 };
273 static constexpr u32 ylut[] = {
274 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
275 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
276 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
277 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
278 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
279 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
280 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
281 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
282 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
283 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
284 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
285 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
286 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
287 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
288 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
289 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
290 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
291 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
292 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
293 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
294 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
295 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
296 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
297 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
298 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
299 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
300 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
301 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
302 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
303 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
304 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
305 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
306 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
307 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
308 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
309 };
310 return xlut[x % 128] + ylut[y % 128];
311}
312
313static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
314 // Calculates the offset of the position of the pixel in Morton order
315 // Framebuffer images are split into 128x128 tiles.
316
317 constexpr u32 block_height = 128;
318 const u32 coarse_x = x & ~127;
319
320 const u32 i = MortonInterleave128(x, y);
321
322 const u32 offset = coarse_x * block_height;
323
324 return (i + offset) * bytes_per_pixel;
325}
326
327void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
328 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
329 std::size_t buffer_size, VAddr addr) {
330
331 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer,
332 buffer_size, addr);
333}
334
335void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
336 u8* morton_data, u8* linear_data, bool morton_to_linear) {
337 u8* data_ptrs[2];
338 for (u32 y = 0; y < height; ++y) {
339 for (u32 x = 0; x < width; ++x) {
340 const u32 coarse_y = y & ~127;
341 const u32 morton_offset =
342 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
343 const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
344
345 data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
346 data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
347
348 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
349 }
350 }
351}
352
353} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
new file mode 100644
index 000000000..b9b9eca86
--- /dev/null
+++ b/src/video_core/morton.h
@@ -0,0 +1,21 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
16 std::size_t buffer_size, VAddr addr);
17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear);
20
21} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 075192c3f..46a6c0308 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
76 return std::make_tuple(uploaded_ptr, uploaded_offset); 76 return std::make_tuple(uploaded_ptr, uploaded_offset);
77} 77}
78 78
79void OGLBufferCache::Map(std::size_t max_size) { 79bool OGLBufferCache::Map(std::size_t max_size) {
80 bool invalidate; 80 bool invalidate;
81 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 81 std::tie(buffer_ptr, buffer_offset_base, invalidate) =
82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); 82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
85 if (invalidate) { 85 if (invalidate) {
86 InvalidateAll(); 86 InvalidateAll();
87 } 87 }
88 return invalidate;
88} 89}
89 90
90void OGLBufferCache::Unmap() { 91void OGLBufferCache::Unmap() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 91fca3f6c..c11acfb79 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -50,7 +50,7 @@ public:
50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset. 50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); 51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
52 52
53 void Map(std::size_t max_size); 53 bool Map(std::size_t max_size);
54 void Unmap(); 54 void Unmap();
55 55
56 GLuint GetHandle() const; 56 GLuint GetHandle() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 99004c9ad..98fb5a9aa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
176 } 176 }
177 state.draw.vertex_array = VAO.handle; 177 state.draw.vertex_array = VAO.handle;
178 state.ApplyVertexBufferState(); 178 state.ApplyVertexBufferState();
179
180 // Rebinding the VAO invalidates the vertex buffer bindings.
181 gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
179} 182}
180 183
181void RasterizerOpenGL::SetupVertexBuffer() { 184void RasterizerOpenGL::SetupVertexBuffer() {
182 MICROPROFILE_SCOPE(OpenGL_VB); 185 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
183 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
184 const auto& regs = gpu.regs; 186 const auto& regs = gpu.regs;
185 187
188 if (!gpu.dirty_flags.vertex_array)
189 return;
190
191 MICROPROFILE_SCOPE(OpenGL_VB);
192
186 // Upload all guest vertex arrays sequentially to our buffer 193 // Upload all guest vertex arrays sequentially to our buffer
187 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 194 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
195 if (~gpu.dirty_flags.vertex_array & (1u << index))
196 continue;
197
188 const auto& vertex_array = regs.vertex_array[index]; 198 const auto& vertex_array = regs.vertex_array[index];
189 if (!vertex_array.IsEnabled()) 199 if (!vertex_array.IsEnabled())
190 continue; 200 continue;
@@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
211 221
212 // Implicit set by glBindVertexBuffer. Stupid glstate handling... 222 // Implicit set by glBindVertexBuffer. Stupid glstate handling...
213 state.draw.vertex_buffer = buffer_cache.GetHandle(); 223 state.draw.vertex_buffer = buffer_cache.GetHandle();
224
225 gpu.dirty_flags.vertex_array = 0;
214} 226}
215 227
216DrawParameters RasterizerOpenGL::SetupDraw() { 228DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() {
600 return; 612 return;
601 613
602 MICROPROFILE_SCOPE(OpenGL_Drawing); 614 MICROPROFILE_SCOPE(OpenGL_Drawing);
603 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 615 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
604 const auto& regs = gpu.regs; 616 const auto& regs = gpu.regs;
605 617
606 ScopeAcquireGLContext acquire_context{emu_window}; 618 ScopeAcquireGLContext acquire_context{emu_window};
@@ -620,7 +632,7 @@ void RasterizerOpenGL::DrawArrays() {
620 SyncTransformFeedback(); 632 SyncTransformFeedback();
621 SyncPointState(); 633 SyncPointState();
622 CheckAlphaTests(); 634 CheckAlphaTests();
623 635 SyncPolygonOffset();
624 // TODO(bunnei): Sync framebuffer_scale uniform here 636 // TODO(bunnei): Sync framebuffer_scale uniform here
625 // TODO(bunnei): Sync scissorbox uniform(s) here 637 // TODO(bunnei): Sync scissorbox uniform(s) here
626 638
@@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() {
653 // Add space for at least 18 constant buffers 665 // Add space for at least 18 constant buffers
654 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 666 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
655 667
656 buffer_cache.Map(buffer_size); 668 bool invalidate = buffer_cache.Map(buffer_size);
669 if (invalidate) {
670 // As all cached buffers are invalidated, we need to recheck their state.
671 gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
672 }
657 673
658 SetupVertexFormat(); 674 SetupVertexFormat();
659 SetupVertexBuffer(); 675 SetupVertexBuffer();
@@ -1195,6 +1211,16 @@ void RasterizerOpenGL::SyncPointState() {
1195 state.point.size = regs.point_size; 1211 state.point.size = regs.point_size;
1196} 1212}
1197 1213
1214void RasterizerOpenGL::SyncPolygonOffset() {
1215 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1216 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1217 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1218 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1219 state.polygon_offset.units = regs.polygon_offset_units;
1220 state.polygon_offset.factor = regs.polygon_offset_factor;
1221 state.polygon_offset.clamp = regs.polygon_offset_clamp;
1222}
1223
1198void RasterizerOpenGL::CheckAlphaTests() { 1224void RasterizerOpenGL::CheckAlphaTests() {
1199 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1225 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1200 1226
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f4354289c..dfb4616f2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -183,6 +183,9 @@ private:
183 /// Syncs Color Mask 183 /// Syncs Color Mask
184 void SyncColorMask(); 184 void SyncColorMask();
185 185
186 /// Syncs the polygon offsets
187 void SyncPolygonOffset();
188
186 /// Check asserts for alpha testing. 189 /// Check asserts for alpha testing.
187 void CheckAlphaTests(); 190 void CheckAlphaTests();
188 191
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 4f434fc31..d458f77e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
15#include "core/memory.h" 15#include "core/memory.h"
16#include "core/settings.h" 16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/morton.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h" 19#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 20#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 21#include "video_core/renderer_opengl/gl_state.h"
@@ -22,10 +23,11 @@
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25#include "video_core/utils.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
28 28
29using VideoCore::MortonSwizzle;
30using VideoCore::MortonSwizzleMode;
29using VideoCore::Surface::ComponentTypeFromDepthFormat; 31using VideoCore::Surface::ComponentTypeFromDepthFormat;
30using VideoCore::Surface::ComponentTypeFromRenderTarget; 32using VideoCore::Surface::ComponentTypeFromRenderTarget;
31using VideoCore::Surface::ComponentTypeFromTexture; 33using VideoCore::Surface::ComponentTypeFromTexture;
@@ -370,174 +372,7 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
370 return {0, actual_height, MipWidth(mip_level), 0}; 372 return {0, actual_height, MipWidth(mip_level), 0};
371} 373}
372 374
373template <bool morton_to_gl, PixelFormat format> 375void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
374void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
375 std::size_t gl_buffer_size, VAddr addr) {
376 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
377
378 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
379 // pixel values.
380 const u32 tile_size_x{GetDefaultBlockWidth(format)};
381 const u32 tile_size_y{GetDefaultBlockHeight(format)};
382
383 if (morton_to_gl) {
384 Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
385 stride, height, depth, block_height, block_depth);
386 } else {
387 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
388 (height + tile_size_y - 1) / tile_size_y, depth,
389 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
390 gl_buffer, false, block_height, block_depth);
391 }
392}
393
394using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
395 VideoCore::Surface::MaxPixelFormat>;
396
397static constexpr GLConversionArray morton_to_gl_fns = {
398 // clang-format off
399 MortonCopy<true, PixelFormat::ABGR8U>,
400 MortonCopy<true, PixelFormat::ABGR8S>,
401 MortonCopy<true, PixelFormat::ABGR8UI>,
402 MortonCopy<true, PixelFormat::B5G6R5U>,
403 MortonCopy<true, PixelFormat::A2B10G10R10U>,
404 MortonCopy<true, PixelFormat::A1B5G5R5U>,
405 MortonCopy<true, PixelFormat::R8U>,
406 MortonCopy<true, PixelFormat::R8UI>,
407 MortonCopy<true, PixelFormat::RGBA16F>,
408 MortonCopy<true, PixelFormat::RGBA16U>,
409 MortonCopy<true, PixelFormat::RGBA16UI>,
410 MortonCopy<true, PixelFormat::R11FG11FB10F>,
411 MortonCopy<true, PixelFormat::RGBA32UI>,
412 MortonCopy<true, PixelFormat::DXT1>,
413 MortonCopy<true, PixelFormat::DXT23>,
414 MortonCopy<true, PixelFormat::DXT45>,
415 MortonCopy<true, PixelFormat::DXN1>,
416 MortonCopy<true, PixelFormat::DXN2UNORM>,
417 MortonCopy<true, PixelFormat::DXN2SNORM>,
418 MortonCopy<true, PixelFormat::BC7U>,
419 MortonCopy<true, PixelFormat::BC6H_UF16>,
420 MortonCopy<true, PixelFormat::BC6H_SF16>,
421 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
422 MortonCopy<true, PixelFormat::G8R8U>,
423 MortonCopy<true, PixelFormat::G8R8S>,
424 MortonCopy<true, PixelFormat::BGRA8>,
425 MortonCopy<true, PixelFormat::RGBA32F>,
426 MortonCopy<true, PixelFormat::RG32F>,
427 MortonCopy<true, PixelFormat::R32F>,
428 MortonCopy<true, PixelFormat::R16F>,
429 MortonCopy<true, PixelFormat::R16U>,
430 MortonCopy<true, PixelFormat::R16S>,
431 MortonCopy<true, PixelFormat::R16UI>,
432 MortonCopy<true, PixelFormat::R16I>,
433 MortonCopy<true, PixelFormat::RG16>,
434 MortonCopy<true, PixelFormat::RG16F>,
435 MortonCopy<true, PixelFormat::RG16UI>,
436 MortonCopy<true, PixelFormat::RG16I>,
437 MortonCopy<true, PixelFormat::RG16S>,
438 MortonCopy<true, PixelFormat::RGB32F>,
439 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
440 MortonCopy<true, PixelFormat::RG8U>,
441 MortonCopy<true, PixelFormat::RG8S>,
442 MortonCopy<true, PixelFormat::RG32UI>,
443 MortonCopy<true, PixelFormat::R32UI>,
444 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
445 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
446 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
447 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
448 MortonCopy<true, PixelFormat::DXT1_SRGB>,
449 MortonCopy<true, PixelFormat::DXT23_SRGB>,
450 MortonCopy<true, PixelFormat::DXT45_SRGB>,
451 MortonCopy<true, PixelFormat::BC7U_SRGB>,
452 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
453 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
454 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
455 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
456 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
457 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
458 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
459 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
460 MortonCopy<true, PixelFormat::Z32F>,
461 MortonCopy<true, PixelFormat::Z16>,
462 MortonCopy<true, PixelFormat::Z24S8>,
463 MortonCopy<true, PixelFormat::S8Z24>,
464 MortonCopy<true, PixelFormat::Z32FS8>,
465 // clang-format on
466};
467
468static constexpr GLConversionArray gl_to_morton_fns = {
469 // clang-format off
470 MortonCopy<false, PixelFormat::ABGR8U>,
471 MortonCopy<false, PixelFormat::ABGR8S>,
472 MortonCopy<false, PixelFormat::ABGR8UI>,
473 MortonCopy<false, PixelFormat::B5G6R5U>,
474 MortonCopy<false, PixelFormat::A2B10G10R10U>,
475 MortonCopy<false, PixelFormat::A1B5G5R5U>,
476 MortonCopy<false, PixelFormat::R8U>,
477 MortonCopy<false, PixelFormat::R8UI>,
478 MortonCopy<false, PixelFormat::RGBA16F>,
479 MortonCopy<false, PixelFormat::RGBA16U>,
480 MortonCopy<false, PixelFormat::RGBA16UI>,
481 MortonCopy<false, PixelFormat::R11FG11FB10F>,
482 MortonCopy<false, PixelFormat::RGBA32UI>,
483 MortonCopy<false, PixelFormat::DXT1>,
484 MortonCopy<false, PixelFormat::DXT23>,
485 MortonCopy<false, PixelFormat::DXT45>,
486 MortonCopy<false, PixelFormat::DXN1>,
487 MortonCopy<false, PixelFormat::DXN2UNORM>,
488 MortonCopy<false, PixelFormat::DXN2SNORM>,
489 MortonCopy<false, PixelFormat::BC7U>,
490 MortonCopy<false, PixelFormat::BC6H_UF16>,
491 MortonCopy<false, PixelFormat::BC6H_SF16>,
492 // TODO(Subv): Swizzling ASTC formats are not supported
493 nullptr,
494 MortonCopy<false, PixelFormat::G8R8U>,
495 MortonCopy<false, PixelFormat::G8R8S>,
496 MortonCopy<false, PixelFormat::BGRA8>,
497 MortonCopy<false, PixelFormat::RGBA32F>,
498 MortonCopy<false, PixelFormat::RG32F>,
499 MortonCopy<false, PixelFormat::R32F>,
500 MortonCopy<false, PixelFormat::R16F>,
501 MortonCopy<false, PixelFormat::R16U>,
502 MortonCopy<false, PixelFormat::R16S>,
503 MortonCopy<false, PixelFormat::R16UI>,
504 MortonCopy<false, PixelFormat::R16I>,
505 MortonCopy<false, PixelFormat::RG16>,
506 MortonCopy<false, PixelFormat::RG16F>,
507 MortonCopy<false, PixelFormat::RG16UI>,
508 MortonCopy<false, PixelFormat::RG16I>,
509 MortonCopy<false, PixelFormat::RG16S>,
510 MortonCopy<false, PixelFormat::RGB32F>,
511 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
512 MortonCopy<false, PixelFormat::RG8U>,
513 MortonCopy<false, PixelFormat::RG8S>,
514 MortonCopy<false, PixelFormat::RG32UI>,
515 MortonCopy<false, PixelFormat::R32UI>,
516 nullptr,
517 nullptr,
518 nullptr,
519 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
520 MortonCopy<false, PixelFormat::DXT1_SRGB>,
521 MortonCopy<false, PixelFormat::DXT23_SRGB>,
522 MortonCopy<false, PixelFormat::DXT45_SRGB>,
523 MortonCopy<false, PixelFormat::BC7U_SRGB>,
524 nullptr,
525 nullptr,
526 nullptr,
527 nullptr,
528 nullptr,
529 nullptr,
530 nullptr,
531 nullptr,
532 MortonCopy<false, PixelFormat::Z32F>,
533 MortonCopy<false, PixelFormat::Z16>,
534 MortonCopy<false, PixelFormat::Z24S8>,
535 MortonCopy<false, PixelFormat::S8Z24>,
536 MortonCopy<false, PixelFormat::Z32FS8>,
537 // clang-format on
538};
539
540void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
541 std::vector<u8>& gl_buffer, u32 mip_level) { 376 std::vector<u8>& gl_buffer, u32 mip_level) {
542 u32 depth = params.MipDepth(mip_level); 377 u32 depth = params.MipDepth(mip_level);
543 if (params.target == SurfaceTarget::Texture2D) { 378 if (params.target == SurfaceTarget::Texture2D) {
@@ -550,19 +385,19 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
550 const u64 layer_size = params.LayerMemorySize(); 385 const u64 layer_size = params.LayerMemorySize();
551 const u64 gl_size = params.LayerSizeGL(mip_level); 386 const u64 gl_size = params.LayerSizeGL(mip_level);
552 for (u32 i = 0; i < params.depth; i++) { 387 for (u32 i = 0; i < params.depth; i++) {
553 functions[static_cast<std::size_t>(params.pixel_format)]( 388 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
554 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 389 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
555 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, 390 params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
556 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 391 params.addr + offset);
557 offset += layer_size; 392 offset += layer_size;
558 offset_gl += gl_size; 393 offset_gl += gl_size;
559 } 394 }
560 } else { 395 } else {
561 const u64 offset = params.GetMipmapLevelOffset(mip_level); 396 const u64 offset = params.GetMipmapLevelOffset(mip_level);
562 functions[static_cast<std::size_t>(params.pixel_format)]( 397 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
563 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 398 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
564 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), 399 params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(),
565 gl_buffer.size(), params.addr + offset); 400 params.addr + offset);
566 } 401 }
567} 402}
568 403
@@ -996,7 +831,7 @@ void CachedSurface::LoadGLBuffer() {
996 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 831 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
997 params.block_width, static_cast<u32>(params.target)); 832 params.block_width, static_cast<u32>(params.target));
998 for (u32 i = 0; i < params.max_mip_level; i++) 833 for (u32 i = 0; i < params.max_mip_level; i++)
999 SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); 834 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
1000 } else { 835 } else {
1001 const auto texture_src_data{Memory::GetPointer(params.addr)}; 836 const auto texture_src_data{Memory::GetPointer(params.addr)};
1002 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 837 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
@@ -1035,7 +870,7 @@ void CachedSurface::FlushGLBuffer() {
1035 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 870 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
1036 params.block_width, static_cast<u32>(params.target)); 871 params.block_width, static_cast<u32>(params.target));
1037 872
1038 SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); 873 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
1039 } else { 874 } else {
1040 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 875 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
1041 } 876 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 97b9028c5..7c0935a4e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -500,27 +500,42 @@ public:
500 const Register& buf_reg) { 500 const Register& buf_reg) {
501 const std::string dest = GetOutputAttribute(attribute); 501 const std::string dest = GetOutputAttribute(attribute);
502 const std::string src = GetRegisterAsFloat(val_reg); 502 const std::string src = GetRegisterAsFloat(val_reg);
503 if (dest.empty())
504 return;
503 505
504 if (!dest.empty()) { 506 // Can happen with unknown/unimplemented output attributes, in which case we ignore the
505 // Can happen with unknown/unimplemented output attributes, in which case we ignore the 507 // instruction for now.
506 // instruction for now. 508 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
507 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { 509 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
508 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry 510 // shader. These instructions use a dirty register as buffer index, to avoid some
509 // shader. These instructions use a dirty register as buffer index, to avoid some 511 // drivers from complaining about out of boundary writes, guard them.
510 // drivers from complaining about out of boundary writes, guard them. 512 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
511 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + 513 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
512 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; 514 shader.AddLine("amem[" + buf_index + "][" +
513 shader.AddLine("amem[" + buf_index + "][" + 515 std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) +
514 std::to_string(static_cast<u32>(attribute)) + ']' + 516 " = " + src + ';');
515 GetSwizzle(elem) + " = " + src + ';'); 517 return;
516 } else { 518 }
517 if (attribute == Attribute::Index::PointSize) { 519
518 fixed_pipeline_output_attributes_used.insert(attribute); 520 switch (attribute) {
519 shader.AddLine(dest + " = " + src + ';'); 521 case Attribute::Index::ClipDistances0123:
520 } else { 522 case Attribute::Index::ClipDistances4567: {
521 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); 523 const u64 index = attribute == Attribute::Index::ClipDistances4567 ? 4 : 0 + elem;
522 } 524 UNIMPLEMENTED_IF_MSG(
523 } 525 ((header.vtg.clip_distances >> index) & 1) == 0,
526 "Shader is setting gl_ClipDistance{} without enabling it in the header", index);
527
528 fixed_pipeline_output_attributes_used.insert(attribute);
529 shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
530 break;
531 }
532 case Attribute::Index::PointSize:
533 fixed_pipeline_output_attributes_used.insert(attribute);
534 shader.AddLine(dest + " = " + src + ';');
535 break;
536 default:
537 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
538 break;
524 } 539 }
525 } 540 }
526 541
@@ -740,12 +755,19 @@ private:
740 void GenerateVertex() { 755 void GenerateVertex() {
741 if (stage != Maxwell3D::Regs::ShaderStage::Vertex) 756 if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
742 return; 757 return;
758 bool clip_distances_declared = false;
759
743 declarations.AddLine("out gl_PerVertex {"); 760 declarations.AddLine("out gl_PerVertex {");
744 ++declarations.scope; 761 ++declarations.scope;
745 declarations.AddLine("vec4 gl_Position;"); 762 declarations.AddLine("vec4 gl_Position;");
746 for (auto& o : fixed_pipeline_output_attributes_used) { 763 for (auto& o : fixed_pipeline_output_attributes_used) {
747 if (o == Attribute::Index::PointSize) 764 if (o == Attribute::Index::PointSize)
748 declarations.AddLine("float gl_PointSize;"); 765 declarations.AddLine("float gl_PointSize;");
766 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
767 o == Attribute::Index::ClipDistances4567)) {
768 declarations.AddLine("float gl_ClipDistance[];");
769 clip_distances_declared = true;
770 }
749 } 771 }
750 --declarations.scope; 772 --declarations.scope;
751 declarations.AddLine("};"); 773 declarations.AddLine("};");
@@ -845,7 +867,8 @@ private:
845 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 867 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
846 // shader. 868 // shader.
847 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); 869 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
848 return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))"; 870 // Config pack's first value is instance_id.
871 return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))";
849 case Attribute::Index::FrontFacing: 872 case Attribute::Index::FrontFacing:
850 // TODO(Subv): Find out what the values are for the other elements. 873 // TODO(Subv): Find out what the values are for the other elements.
851 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 874 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
@@ -916,6 +939,10 @@ private:
916 return "gl_PointSize"; 939 return "gl_PointSize";
917 case Attribute::Index::Position: 940 case Attribute::Index::Position:
918 return "position"; 941 return "position";
942 case Attribute::Index::ClipDistances0123:
943 case Attribute::Index::ClipDistances4567: {
944 return "gl_ClipDistance";
945 }
919 default: 946 default:
920 const u32 index{static_cast<u32>(attribute) - 947 const u32 index{static_cast<u32>(attribute) -
921 static_cast<u32>(Attribute::Index::Attribute_0)}; 948 static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -1266,7 +1293,15 @@ private:
1266 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); 1293 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
1267 } 1294 }
1268 1295
1269 void WriteTexsInstruction(const Instruction& instr, const std::string& texture) { 1296 void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
1297 const std::string& texture) {
1298 // Add an extra scope and declare the texture coords inside to prevent
1299 // overwriting them in case they are used as outputs of the texs instruction.
1300 shader.AddLine('{');
1301 ++shader.scope;
1302 shader.AddLine(coord);
1303 shader.AddLine("vec4 texture_tmp = " + texture + ';');
1304
1270 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle 1305 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
1271 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 1306 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
1272 1307
@@ -1278,17 +1313,19 @@ private:
1278 1313
1279 if (written_components < 2) { 1314 if (written_components < 2) {
1280 // Write the first two swizzle components to gpr0 and gpr0+1 1315 // Write the first two swizzle components to gpr0 and gpr0+1
1281 regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, 1316 regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false,
1282 written_components % 2); 1317 written_components % 2);
1283 } else { 1318 } else {
1284 ASSERT(instr.texs.HasTwoDestinations()); 1319 ASSERT(instr.texs.HasTwoDestinations());
1285 // Write the rest of the swizzle components to gpr28 and gpr28+1 1320 // Write the rest of the swizzle components to gpr28 and gpr28+1
1286 regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, 1321 regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false,
1287 written_components % 2); 1322 written_components % 2);
1288 } 1323 }
1289 1324
1290 ++written_components; 1325 ++written_components;
1291 } 1326 }
1327 --shader.scope;
1328 shader.AddLine('}');
1292 } 1329 }
1293 1330
1294 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { 1331 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
@@ -1685,6 +1722,26 @@ private:
1685 1722
1686 break; 1723 break;
1687 } 1724 }
1725 case OpCode::Type::Bfi: {
1726 UNIMPLEMENTED_IF(instr.generates_cc);
1727
1728 const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> {
1729 switch (opcode->get().GetId()) {
1730 case OpCode::Id::BFI_IMM_R:
1731 return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
1732 std::to_string(instr.alu.GetSignedImm20_20())};
1733 default:
1734 UNREACHABLE();
1735 }
1736 }();
1737 const std::string offset = '(' + packed_shift + " & 0xff)";
1738 const std::string bits = "((" + packed_shift + " >> 8) & 0xff)";
1739 const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
1740 regs.SetRegisterToInteger(
1741 instr.gpr0, false, 0,
1742 "bitfieldInsert(" + base + ", " + insert + ", " + offset + ", " + bits + ')', 1, 1);
1743 break;
1744 }
1688 case OpCode::Type::Shift: { 1745 case OpCode::Type::Shift: {
1689 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); 1746 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
1690 std::string op_b; 1747 std::string op_b;
@@ -2510,61 +2567,83 @@ private:
2510 const bool depth_compare = 2567 const bool depth_compare =
2511 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2568 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2512 u32 num_coordinates = TextureCoordinates(texture_type); 2569 u32 num_coordinates = TextureCoordinates(texture_type);
2513 if (depth_compare) 2570 u32 start_index = 0;
2514 num_coordinates += 1; 2571 std::string array_elem;
2572 if (is_array) {
2573 array_elem = regs.GetRegisterAsInteger(instr.gpr8);
2574 start_index = 1;
2575 }
2576 const auto process_mode = instr.tex.GetTextureProcessMode();
2577 u32 start_index_b = 0;
2578 std::string lod_value;
2579 if (process_mode != Tegra::Shader::TextureProcessMode::LZ &&
2580 process_mode != Tegra::Shader::TextureProcessMode::None) {
2581 start_index_b = 1;
2582 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2583 }
2584
2585 std::string depth_value;
2586 if (depth_compare) {
2587 depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b);
2588 }
2589
2590 bool depth_compare_extra = false;
2515 2591
2516 switch (num_coordinates) { 2592 switch (num_coordinates) {
2517 case 1: { 2593 case 1: {
2594 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2518 if (is_array) { 2595 if (is_array) {
2519 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2596 if (depth_compare) {
2520 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2597 coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " +
2521 coord = "vec2 coords = vec2(" + x + ", " + index + ");"; 2598 array_elem + ");";
2599 } else {
2600 coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");";
2601 }
2522 } else { 2602 } else {
2523 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2603 if (depth_compare) {
2524 coord = "float coords = " + x + ';'; 2604 coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");";
2605 } else {
2606 coord = "float coords = " + x + ';';
2607 }
2525 } 2608 }
2526 break; 2609 break;
2527 } 2610 }
2528 case 2: { 2611 case 2: {
2612 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2613 const std::string y =
2614 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2529 if (is_array) { 2615 if (is_array) {
2530 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2616 if (depth_compare) {
2531 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2617 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value +
2532 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2618 ", " + array_elem + ");";
2533 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; 2619 } else {
2620 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");";
2621 }
2534 } else { 2622 } else {
2535 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2623 if (depth_compare) {
2536 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2624 coord =
2537 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2625 "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");";
2626 } else {
2627 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2628 }
2538 } 2629 }
2539 break; 2630 break;
2540 } 2631 }
2541 case 3: { 2632 case 3: {
2542 if (depth_compare) { 2633 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2543 if (is_array) { 2634 const std::string y =
2544 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2635 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2545 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2636 const std::string z =
2546 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2637 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
2547 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2638 if (is_array) {
2548 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + 2639 depth_compare_extra = depth_compare;
2549 ");"; 2640 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2550 } else { 2641 array_elem + ");";
2551 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2552 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2553 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2554 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2555 }
2556 } else { 2642 } else {
2557 if (is_array) { 2643 if (depth_compare) {
2558 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2644 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2559 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2645 depth_value + ");";
2560 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2561 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 3);
2562 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2563 ");";
2564 } else { 2646 } else {
2565 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2566 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2567 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2568 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; 2647 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2569 } 2648 }
2570 } 2649 }
@@ -2580,82 +2659,85 @@ private:
2580 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2659 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2581 texture_type = Tegra::Shader::TextureType::Texture2D; 2660 texture_type = Tegra::Shader::TextureType::Texture2D;
2582 } 2661 }
2583 // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
2584 // or lod.
2585 2662
2586 const std::string sampler = 2663 const std::string sampler =
2587 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2664 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2588 // Add an extra scope and declare the texture coords inside to prevent 2665 // Add an extra scope and declare the texture coords inside to prevent
2589 // overwriting them in case they are used as outputs of the texs instruction. 2666 // overwriting them in case they are used as outputs of the texs instruction.
2590 2667
2591 shader.AddLine("{"); 2668 shader.AddLine('{');
2592 ++shader.scope; 2669 ++shader.scope;
2593 shader.AddLine(coord); 2670 shader.AddLine(coord);
2594 std::string texture; 2671 std::string texture;
2595 2672
2596 switch (instr.tex.GetTextureProcessMode()) { 2673 switch (instr.tex.GetTextureProcessMode()) {
2597 case Tegra::Shader::TextureProcessMode::None: { 2674 case Tegra::Shader::TextureProcessMode::None: {
2598 texture = "texture(" + sampler + ", coords)"; 2675 if (!depth_compare_extra) {
2676 texture = "texture(" + sampler + ", coords)";
2677 } else {
2678 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2679 }
2599 break; 2680 break;
2600 } 2681 }
2601 case Tegra::Shader::TextureProcessMode::LZ: { 2682 case Tegra::Shader::TextureProcessMode::LZ: {
2602 texture = "textureLod(" + sampler + ", coords, 0.0)"; 2683 if (!depth_compare_extra) {
2684 texture = "textureLod(" + sampler + ", coords, 0.0)";
2685 } else {
2686 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2687 }
2603 break; 2688 break;
2604 } 2689 }
2605 case Tegra::Shader::TextureProcessMode::LB: 2690 case Tegra::Shader::TextureProcessMode::LB:
2606 case Tegra::Shader::TextureProcessMode::LBA: { 2691 case Tegra::Shader::TextureProcessMode::LBA: {
2607 const std::string bias = [&]() {
2608 if (depth_compare) {
2609 if (is_array)
2610 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 2);
2611 else
2612 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2613 } else {
2614 return regs.GetRegisterAsFloat(instr.gpr20);
2615 }
2616 }();
2617 shader.AddLine("float bias = " + bias + ';');
2618
2619 // TODO: Figure if A suffix changes the equation at all. 2692 // TODO: Figure if A suffix changes the equation at all.
2620 texture = "texture(" + sampler + ", coords, bias)"; 2693 if (!depth_compare_extra) {
2694 texture = "texture(" + sampler + ", coords, " + lod_value + ')';
2695 } else {
2696 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2697 LOG_WARNING(HW_GPU,
2698 "OpenGL Limitation: can't set bias value along depth compare");
2699 }
2621 break; 2700 break;
2622 } 2701 }
2623 case Tegra::Shader::TextureProcessMode::LL: 2702 case Tegra::Shader::TextureProcessMode::LL:
2624 case Tegra::Shader::TextureProcessMode::LLA: { 2703 case Tegra::Shader::TextureProcessMode::LLA: {
2625 const std::string lod = [&]() {
2626 if (num_coordinates <= 2) {
2627 return regs.GetRegisterAsFloat(instr.gpr20);
2628 } else {
2629 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2630 }
2631 }();
2632 shader.AddLine("float lod = " + lod + ';');
2633
2634 // TODO: Figure if A suffix changes the equation at all. 2704 // TODO: Figure if A suffix changes the equation at all.
2635 texture = "textureLod(" + sampler + ", coords, lod)"; 2705 if (!depth_compare_extra) {
2706 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2707 } else {
2708 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2709 LOG_WARNING(HW_GPU,
2710 "OpenGL Limitation: can't set lod value along depth compare");
2711 }
2636 break; 2712 break;
2637 } 2713 }
2638 default: { 2714 default: {
2639 texture = "texture(" + sampler + ", coords)"; 2715 if (!depth_compare_extra) {
2716 texture = "texture(" + sampler + ", coords)";
2717 } else {
2718 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2719 }
2640 UNIMPLEMENTED_MSG("Unhandled texture process mode {}", 2720 UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
2641 static_cast<u32>(instr.tex.GetTextureProcessMode())); 2721 static_cast<u32>(instr.tex.GetTextureProcessMode()));
2642 } 2722 }
2643 } 2723 }
2644 if (!depth_compare) { 2724 if (!depth_compare) {
2725 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2645 std::size_t dest_elem{}; 2726 std::size_t dest_elem{};
2646 for (std::size_t elem = 0; elem < 4; ++elem) { 2727 for (std::size_t elem = 0; elem < 4; ++elem) {
2647 if (!instr.tex.IsComponentEnabled(elem)) { 2728 if (!instr.tex.IsComponentEnabled(elem)) {
2648 // Skip disabled components 2729 // Skip disabled components
2649 continue; 2730 continue;
2650 } 2731 }
2651 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2732 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
2733 dest_elem);
2652 ++dest_elem; 2734 ++dest_elem;
2653 } 2735 }
2654 } else { 2736 } else {
2655 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); 2737 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
2656 } 2738 }
2657 --shader.scope; 2739 --shader.scope;
2658 shader.AddLine("}"); 2740 shader.AddLine('}');
2659 break; 2741 break;
2660 } 2742 }
2661 case OpCode::Id::TEXS: { 2743 case OpCode::Id::TEXS: {
@@ -2668,41 +2750,76 @@ private:
2668 const bool depth_compare = 2750 const bool depth_compare =
2669 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2751 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2670 u32 num_coordinates = TextureCoordinates(texture_type); 2752 u32 num_coordinates = TextureCoordinates(texture_type);
2671 if (depth_compare) 2753 const auto process_mode = instr.texs.GetTextureProcessMode();
2672 num_coordinates += 1; 2754 std::string lod_value;
2673 2755 std::string coord;
2674 // Scope to avoid variable name overlaps. 2756 u32 lod_offset = 0;
2675 shader.AddLine('{'); 2757 if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
2676 ++shader.scope; 2758 if (num_coordinates > 2) {
2759 lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2760 lod_offset = 2;
2761 } else {
2762 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2763 lod_offset = 1;
2764 }
2765 }
2677 2766
2678 switch (num_coordinates) { 2767 switch (num_coordinates) {
2768 case 1: {
2769 coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';';
2770 break;
2771 }
2679 case 2: { 2772 case 2: {
2680 if (is_array) { 2773 if (is_array) {
2681 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2774 if (depth_compare) {
2682 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2775 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2683 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2776 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2684 shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"); 2777 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2778 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2779 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2780 ");";
2781 } else {
2782 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2783 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2784 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2785 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
2786 }
2685 } else { 2787 } else {
2686 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2788 if (lod_offset != 0) {
2687 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2789 if (depth_compare) {
2688 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); 2790 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2791 const std::string y =
2792 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2793 const std::string z =
2794 regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
2795 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2796 } else {
2797 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2798 const std::string y =
2799 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2800 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2801 }
2802 } else {
2803 if (depth_compare) {
2804 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2805 const std::string y =
2806 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2807 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2808 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2809 } else {
2810 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2811 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2812 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2813 }
2814 }
2689 } 2815 }
2690 break; 2816 break;
2691 } 2817 }
2692 case 3: { 2818 case 3: {
2693 if (is_array) { 2819 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2694 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2820 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2695 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2821 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2696 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2822 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2697 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2698 shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2699 index + ");");
2700 } else {
2701 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2702 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2703 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2704 shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
2705 }
2706 break; 2823 break;
2707 } 2824 }
2708 default: 2825 default:
@@ -2712,14 +2829,14 @@ private:
2712 // Fallback to interpreting as a 2D texture for now 2829 // Fallback to interpreting as a 2D texture for now
2713 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2830 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2714 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2831 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2715 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); 2832 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2716 texture_type = Tegra::Shader::TextureType::Texture2D; 2833 texture_type = Tegra::Shader::TextureType::Texture2D;
2717 is_array = false; 2834 is_array = false;
2718 } 2835 }
2719 const std::string sampler = 2836 const std::string sampler =
2720 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2837 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2721 std::string texture; 2838 std::string texture;
2722 switch (instr.texs.GetTextureProcessMode()) { 2839 switch (process_mode) {
2723 case Tegra::Shader::TextureProcessMode::None: { 2840 case Tegra::Shader::TextureProcessMode::None: {
2724 texture = "texture(" + sampler + ", coords)"; 2841 texture = "texture(" + sampler + ", coords)";
2725 break; 2842 break;
@@ -2733,8 +2850,7 @@ private:
2733 break; 2850 break;
2734 } 2851 }
2735 case Tegra::Shader::TextureProcessMode::LL: { 2852 case Tegra::Shader::TextureProcessMode::LL: {
2736 const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2853 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2737 texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
2738 break; 2854 break;
2739 } 2855 }
2740 default: { 2856 default: {
@@ -2744,13 +2860,11 @@ private:
2744 } 2860 }
2745 } 2861 }
2746 if (!depth_compare) { 2862 if (!depth_compare) {
2747 WriteTexsInstruction(instr, texture); 2863 WriteTexsInstruction(instr, coord, texture);
2748 } else { 2864 } else {
2749 WriteTexsInstruction(instr, "vec4(" + texture + ')'); 2865 WriteTexsInstruction(instr, coord, "vec4(" + texture + ')');
2750 } 2866 }
2751 2867
2752 shader.AddLine('}');
2753 --shader.scope;
2754 break; 2868 break;
2755 } 2869 }
2756 case OpCode::Id::TLDS: { 2870 case OpCode::Id::TLDS: {
@@ -2772,11 +2886,12 @@ private:
2772 // Scope to avoid variable name overlaps. 2886 // Scope to avoid variable name overlaps.
2773 shader.AddLine('{'); 2887 shader.AddLine('{');
2774 ++shader.scope; 2888 ++shader.scope;
2889 std::string coords;
2775 2890
2776 switch (texture_type) { 2891 switch (texture_type) {
2777 case Tegra::Shader::TextureType::Texture1D: { 2892 case Tegra::Shader::TextureType::Texture1D: {
2778 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2893 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2779 shader.AddLine("int coords = " + x + ';'); 2894 coords = "float coords = " + x + ';';
2780 break; 2895 break;
2781 } 2896 }
2782 case Tegra::Shader::TextureType::Texture2D: { 2897 case Tegra::Shader::TextureType::Texture2D: {
@@ -2784,7 +2899,8 @@ private:
2784 2899
2785 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2900 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2786 const std::string y = regs.GetRegisterAsInteger(instr.gpr20); 2901 const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
2787 shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); 2902 // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
2903 coords = "ivec2 coords = ivec2(" + x + ", " + y + ");";
2788 extra_op_offset = 1; 2904 extra_op_offset = 1;
2789 break; 2905 break;
2790 } 2906 }
@@ -2812,7 +2928,7 @@ private:
2812 static_cast<u32>(instr.tlds.GetTextureProcessMode())); 2928 static_cast<u32>(instr.tlds.GetTextureProcessMode()));
2813 } 2929 }
2814 } 2930 }
2815 WriteTexsInstruction(instr, texture); 2931 WriteTexsInstruction(instr, coords, texture);
2816 2932
2817 --shader.scope; 2933 --shader.scope;
2818 shader.AddLine('}'); 2934 shader.AddLine('}');
@@ -2871,14 +2987,17 @@ private:
2871 2987
2872 const std::string texture = "textureGather(" + sampler + ", coords, " + 2988 const std::string texture = "textureGather(" + sampler + ", coords, " +
2873 std::to_string(instr.tld4.component) + ')'; 2989 std::to_string(instr.tld4.component) + ')';
2990
2874 if (!depth_compare) { 2991 if (!depth_compare) {
2992 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2875 std::size_t dest_elem{}; 2993 std::size_t dest_elem{};
2876 for (std::size_t elem = 0; elem < 4; ++elem) { 2994 for (std::size_t elem = 0; elem < 4; ++elem) {
2877 if (!instr.tex.IsComponentEnabled(elem)) { 2995 if (!instr.tex.IsComponentEnabled(elem)) {
2878 // Skip disabled components 2996 // Skip disabled components
2879 continue; 2997 continue;
2880 } 2998 }
2881 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2999 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
3000 dest_elem);
2882 ++dest_elem; 3001 ++dest_elem;
2883 } 3002 }
2884 } else { 3003 } else {
@@ -2899,6 +3018,7 @@ private:
2899 // Scope to avoid variable name overlaps. 3018 // Scope to avoid variable name overlaps.
2900 shader.AddLine('{'); 3019 shader.AddLine('{');
2901 ++shader.scope; 3020 ++shader.scope;
3021 std::string coords;
2902 3022
2903 const bool depth_compare = 3023 const bool depth_compare =
2904 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 3024 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
@@ -2908,20 +3028,19 @@ private:
2908 const std::string sampler = GetSampler( 3028 const std::string sampler = GetSampler(
2909 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); 3029 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
2910 if (!depth_compare) { 3030 if (!depth_compare) {
2911 shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");"); 3031 coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
2912 } else { 3032 } else {
2913 // Note: TLD4S coordinate encoding works just like TEXS's 3033 // Note: TLD4S coordinate encoding works just like TEXS's
2914 shader.AddLine( 3034 const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2915 "float op_y = " + regs.GetRegisterAsFloat(instr.gpr8.Value() + 1) + ';'); 3035 coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");";
2916 shader.AddLine("vec3 coords = vec3(" + op_a + ", op_y, " + op_b + ");");
2917 } 3036 }
2918 const std::string texture = "textureGather(" + sampler + ", coords, " + 3037 const std::string texture = "textureGather(" + sampler + ", coords, " +
2919 std::to_string(instr.tld4s.component) + ')'; 3038 std::to_string(instr.tld4s.component) + ')';
2920 3039
2921 if (!depth_compare) { 3040 if (!depth_compare) {
2922 WriteTexsInstruction(instr, texture); 3041 WriteTexsInstruction(instr, coords, texture);
2923 } else { 3042 } else {
2924 WriteTexsInstruction(instr, "vec4(" + texture + ')'); 3043 WriteTexsInstruction(instr, coords, "vec4(" + texture + ')');
2925 } 3044 }
2926 3045
2927 --shader.scope; 3046 --shader.scope;
@@ -3217,6 +3336,34 @@ private:
3217 } 3336 }
3218 break; 3337 break;
3219 } 3338 }
3339 case OpCode::Type::RegisterSetPredicate: {
3340 UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
3341
3342 const std::string apply_mask = [&]() {
3343 switch (opcode->get().GetId()) {
3344 case OpCode::Id::R2P_IMM:
3345 return std::to_string(instr.r2p.immediate_mask);
3346 default:
3347 UNREACHABLE();
3348 }
3349 }();
3350 const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
3351 " >> " + std::to_string(instr.r2p.byte) + ')';
3352
3353 constexpr u64 programmable_preds = 7;
3354 for (u64 pred = 0; pred < programmable_preds; ++pred) {
3355 const auto shift = std::to_string(1 << pred);
3356
3357 shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {");
3358 ++shader.scope;
3359
3360 SetPredicate(pred, '(' + mask + " & " + shift + ") != 0");
3361
3362 --shader.scope;
3363 shader.AddLine('}');
3364 }
3365 break;
3366 }
3220 case OpCode::Type::FloatSet: { 3367 case OpCode::Type::FloatSet: {
3221 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), 3368 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8),
3222 instr.fset.abs_a != 0, instr.fset.neg_a != 0); 3369 instr.fset.abs_a != 0, instr.fset.neg_a != 0);
@@ -3254,6 +3401,10 @@ private:
3254 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, 3401 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
3255 1); 3402 1);
3256 } 3403 }
3404 if (instr.generates_cc.Value() != 0) {
3405 regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate);
3406 LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete");
3407 }
3257 break; 3408 break;
3258 } 3409 }
3259 case OpCode::Type::IntegerSet: { 3410 case OpCode::Type::IntegerSet: {
@@ -3507,6 +3658,11 @@ private:
3507 regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); 3658 regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
3508 break; 3659 break;
3509 } 3660 }
3661 case Tegra::Shader::SystemVariable::Ydirection: {
3662 // Config pack's third value is Y_NEGATE's state.
3663 regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1);
3664 break;
3665 }
3510 default: { 3666 default: {
3511 UNIMPLEMENTED_MSG("Unhandled system move: {}", 3667 UNIMPLEMENTED_MSG("Unhandled system move: {}",
3512 static_cast<u32>(instr.sys20.Value())); 3668 static_cast<u32>(instr.sys20.Value()));
@@ -3530,11 +3686,17 @@ private:
3530 "BRA with constant buffers are not implemented"); 3686 "BRA with constant buffers are not implemented");
3531 3687
3532 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 3688 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3533 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3534 "BRA condition code used: {}", static_cast<u32>(cc));
3535
3536 const u32 target = offset + instr.bra.GetBranchTarget(); 3689 const u32 target = offset + instr.bra.GetBranchTarget();
3537 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); 3690 if (cc != Tegra::Shader::ConditionCode::T) {
3691 const std::string condition_code = regs.GetConditionCode(cc);
3692 shader.AddLine("if (" + condition_code + "){");
3693 shader.scope++;
3694 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3695 shader.scope--;
3696 shader.AddLine('}');
3697 } else {
3698 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3699 }
3538 break; 3700 break;
3539 } 3701 }
3540 case OpCode::Id::IPA: { 3702 case OpCode::Id::IPA: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index eea090e52..23ed91e27 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -24,8 +24,7 @@ layout (location = 0) out vec4 position;
24 24
25layout(std140) uniform vs_config { 25layout(std140) uniform vs_config {
26 vec4 viewport_flip; 26 vec4 viewport_flip;
27 uvec4 instance_id; 27 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
28 uvec4 flip_stage;
29 uvec4 alpha_test; 28 uvec4 alpha_test;
30}; 29};
31)"; 30)";
@@ -63,7 +62,8 @@ void main() {
63 out += R"( 62 out += R"(
64 63
65 // Check if the flip stage is VertexB 64 // Check if the flip stage is VertexB
66 if (flip_stage[0] == 1) { 65 // Config pack's second value is flip_stage
66 if (config_pack[1] == 1) {
67 // Viewport can be flipped, which is unsupported by glViewport 67 // Viewport can be flipped, which is unsupported by glViewport
68 position.xy *= viewport_flip.xy; 68 position.xy *= viewport_flip.xy;
69 } 69 }
@@ -71,7 +71,7 @@ void main() {
71 71
72 // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 72 // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
73 // For now, this is here to bring order in lieu of proper emulation 73 // For now, this is here to bring order in lieu of proper emulation
74 if (flip_stage[0] == 1) { 74 if (config_pack[1] == 1) {
75 position.w = 1.0; 75 position.w = 1.0;
76 } 76 }
77} 77}
@@ -101,8 +101,7 @@ layout (location = 0) out vec4 position;
101 101
102layout (std140) uniform gs_config { 102layout (std140) uniform gs_config {
103 vec4 viewport_flip; 103 vec4 viewport_flip;
104 uvec4 instance_id; 104 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
105 uvec4 flip_stage;
106 uvec4 alpha_test; 105 uvec4 alpha_test;
107}; 106};
108 107
@@ -139,8 +138,7 @@ layout (location = 0) in vec4 position;
139 138
140layout (std140) uniform fs_config { 139layout (std140) uniform fs_config {
141 vec4 viewport_flip; 140 vec4 viewport_flip;
142 uvec4 instance_id; 141 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
143 uvec4 flip_stage;
144 uvec4 alpha_test; 142 uvec4 alpha_test;
145}; 143};
146 144
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 8b8869ecb..6a30c28d2 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -27,16 +27,18 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
27 alpha_test.func = func; 27 alpha_test.func = func;
28 alpha_test.ref = regs.alpha_test_ref; 28 alpha_test.ref = regs.alpha_test_ref;
29 29
30 // We only assign the instance to the first component of the vector, the rest is just padding. 30 instance_id = state.current_instance;
31 instance_id[0] = state.current_instance;
32 31
33 // Assign in which stage the position has to be flipped 32 // Assign in which stage the position has to be flipped
34 // (the last stage before the fragment shader). 33 // (the last stage before the fragment shader).
35 if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { 34 if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
36 flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); 35 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
37 } else { 36 } else {
38 flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); 37 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
39 } 38 }
39
40 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
41 y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
40} 42}
41 43
42} // namespace OpenGL::GLShader 44} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 9a5d7e289..b757f5f44 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -21,8 +21,11 @@ using Tegra::Engines::Maxwell3D;
21struct MaxwellUniformData { 21struct MaxwellUniformData {
22 void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); 22 void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
23 alignas(16) GLvec4 viewport_flip; 23 alignas(16) GLvec4 viewport_flip;
24 alignas(16) GLuvec4 instance_id; 24 struct alignas(16) {
25 alignas(16) GLuvec4 flip_stage; 25 GLuint instance_id;
26 GLuint flip_stage;
27 GLfloat y_direction;
28 };
26 struct alignas(16) { 29 struct alignas(16) {
27 GLuint enabled; 30 GLuint enabled;
28 GLuint func; 31 GLuint func;
@@ -30,7 +33,7 @@ struct MaxwellUniformData {
30 GLuint padding; 33 GLuint padding;
31 } alpha_test; 34 } alpha_test;
32}; 35};
33static_assert(sizeof(MaxwellUniformData) == 64, "MaxwellUniformData structure size is incorrect"); 36static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
34static_assert(sizeof(MaxwellUniformData) < 16384, 37static_assert(sizeof(MaxwellUniformData) < 16384,
35 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); 38 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
36 39
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 934f4db78..b3bfad6a0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -92,6 +92,13 @@ OpenGLState::OpenGLState() {
92 92
93 point.size = 1; 93 point.size = 1;
94 fragment_color_clamp.enabled = false; 94 fragment_color_clamp.enabled = false;
95
96 polygon_offset.fill_enable = false;
97 polygon_offset.line_enable = false;
98 polygon_offset.point_enable = false;
99 polygon_offset.factor = 0.0f;
100 polygon_offset.units = 0.0f;
101 polygon_offset.clamp = 0.0f;
95} 102}
96 103
97void OpenGLState::ApplyDefaultState() { 104void OpenGLState::ApplyDefaultState() {
@@ -406,6 +413,55 @@ void OpenGLState::ApplyLogicOp() const {
406 } 413 }
407} 414}
408 415
416void OpenGLState::ApplyPolygonOffset() const {
417
418 const bool fill_enable_changed =
419 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
420 const bool line_enable_changed =
421 polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
422 const bool point_enable_changed =
423 polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
424 const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
425 const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
426 const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
427
428 if (fill_enable_changed) {
429 if (polygon_offset.fill_enable) {
430 glEnable(GL_POLYGON_OFFSET_FILL);
431 } else {
432 glDisable(GL_POLYGON_OFFSET_FILL);
433 }
434 }
435
436 if (line_enable_changed) {
437 if (polygon_offset.line_enable) {
438 glEnable(GL_POLYGON_OFFSET_LINE);
439 } else {
440 glDisable(GL_POLYGON_OFFSET_LINE);
441 }
442 }
443
444 if (point_enable_changed) {
445 if (polygon_offset.point_enable) {
446 glEnable(GL_POLYGON_OFFSET_POINT);
447 } else {
448 glDisable(GL_POLYGON_OFFSET_POINT);
449 }
450 }
451
452 if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
453 (factor_changed || units_changed || clamp_changed)) {
454
455 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
456 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
457 } else {
458 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
459 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
460 "Unimplemented Depth polygon offset clamp.");
461 }
462 }
463}
464
409void OpenGLState::ApplyTextures() const { 465void OpenGLState::ApplyTextures() const {
410 for (std::size_t i = 0; i < std::size(texture_units); ++i) { 466 for (std::size_t i = 0; i < std::size(texture_units); ++i) {
411 const auto& texture_unit = texture_units[i]; 467 const auto& texture_unit = texture_units[i];
@@ -532,6 +588,7 @@ void OpenGLState::Apply() const {
532 ApplyLogicOp(); 588 ApplyLogicOp();
533 ApplyTextures(); 589 ApplyTextures();
534 ApplySamplers(); 590 ApplySamplers();
591 ApplyPolygonOffset();
535 cur_state = *this; 592 cur_state = *this;
536} 593}
537 594
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 032fc43f0..0bf19ed07 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -176,6 +176,15 @@ public:
176 float size; // GL_POINT_SIZE 176 float size; // GL_POINT_SIZE
177 } point; 177 } point;
178 178
179 struct {
180 bool point_enable;
181 bool line_enable;
182 bool fill_enable;
183 GLfloat units;
184 GLfloat factor;
185 GLfloat clamp;
186 } polygon_offset;
187
179 std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE 188 std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE
180 189
181 OpenGLState(); 190 OpenGLState();
@@ -226,6 +235,7 @@ private:
226 void ApplyLogicOp() const; 235 void ApplyLogicOp() const;
227 void ApplyTextures() const; 236 void ApplyTextures() const;
228 void ApplySamplers() const; 237 void ApplySamplers() const;
238 void ApplyPolygonOffset() const;
229}; 239};
230 240
231} // namespace OpenGL 241} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1492e063a..4fd0d66c5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -19,9 +19,9 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "core/telemetry_session.h" 20#include "core/telemetry_session.h"
21#include "core/tracer/recorder.h" 21#include "core/tracer/recorder.h"
22#include "video_core/morton.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/renderer_opengl.h" 24#include "video_core/renderer_opengl/renderer_opengl.h"
24#include "video_core/utils.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27 27
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
deleted file mode 100644
index e0a14d48f..000000000
--- a/src/video_core/utils.h
+++ /dev/null
@@ -1,164 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore {
10
11// 8x8 Z-Order coordinate from 2D coordinates
12static inline u32 MortonInterleave(u32 x, u32 y) {
13 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
14 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
15 return xlut[x % 8] + ylut[y % 8];
16}
17
18/**
19 * Calculates the offset of the position of the pixel in Morton order
20 */
21static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
22 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
23 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
24 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
25 // texels are laid out in a 2x2 subtile like this:
26 // 2 3
27 // 0 1
28 //
29 // The full 8x8 tile has the texels arranged like this:
30 //
31 // 42 43 46 47 58 59 62 63
32 // 40 41 44 45 56 57 60 61
33 // 34 35 38 39 50 51 54 55
34 // 32 33 36 37 48 49 52 53
35 // 10 11 14 15 26 27 30 31
36 // 08 09 12 13 24 25 28 29
37 // 02 03 06 07 18 19 22 23
38 // 00 01 04 05 16 17 20 21
39 //
40 // This pattern is what's called Z-order curve, or Morton order.
41
42 const unsigned int block_height = 8;
43 const unsigned int coarse_x = x & ~7;
44
45 u32 i = VideoCore::MortonInterleave(x, y);
46
47 const unsigned int offset = coarse_x * block_height;
48
49 return (i + offset) * bytes_per_pixel;
50}
51
52static inline u32 MortonInterleave128(u32 x, u32 y) {
53 // 128x128 Z-Order coordinate from 2D coordinates
54 static constexpr u32 xlut[] = {
55 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
56 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
57 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
58 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
59 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
60 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
61 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
62 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
63 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
64 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
65 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
66 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
67 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
68 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
69 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
70 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
71 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
72 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
73 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
74 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
75 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
76 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
77 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
78 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
79 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
80 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
81 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
82 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
83 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
84 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
85 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
86 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
87 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
88 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
89 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
90 };
91 static constexpr u32 ylut[] = {
92 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
93 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
94 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
95 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
96 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
97 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
98 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
99 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
100 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
101 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
102 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
103 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
104 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
105 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
106 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
107 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
108 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
109 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
110 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
111 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
112 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
113 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
114 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
115 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
116 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
117 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
118 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
119 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
120 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
121 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
122 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
123 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
124 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
125 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
126 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
127 };
128 return xlut[x % 128] + ylut[y % 128];
129}
130
131static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
132 // Calculates the offset of the position of the pixel in Morton order
133 // Framebuffer images are split into 128x128 tiles.
134
135 const unsigned int block_height = 128;
136 const unsigned int coarse_x = x & ~127;
137
138 u32 i = MortonInterleave128(x, y);
139
140 const unsigned int offset = coarse_x * block_height;
141
142 return (i + offset) * bytes_per_pixel;
143}
144
145static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel,
146 u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data,
147 bool morton_to_gl) {
148 u8* data_ptrs[2];
149 for (unsigned y = 0; y < height; ++y) {
150 for (unsigned x = 0; x < width; ++x) {
151 const u32 coarse_y = y & ~127;
152 u32 morton_offset =
153 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
154 u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
155
156 data_ptrs[morton_to_gl] = morton_data + morton_offset;
157 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
158
159 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
160 }
161 }
162}
163
164} // namespace VideoCore