summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/kernel/svc.cpp3
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp18
-rw-r--r--src/core/hle/service/nvdrv/interface.h2
-rw-r--r--src/core/hle/service/sm/sm.cpp2
-rw-r--r--src/core/hle/service/sm/sm.h2
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/engines/shader_bytecode.h19
-rw-r--r--src/video_core/engines/shader_header.h11
-rw-r--r--src/video_core/memory_manager.cpp7
-rw-r--r--src/video_core/memory_manager.h3
-rw-r--r--src/video_core/morton.cpp353
-rw-r--r--src/video_core/morton.h21
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp193
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp418
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/utils.h164
16 files changed, 733 insertions, 488 deletions
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index b8b6b4d49..f287f7c97 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -671,7 +671,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
671 break; 671 break;
672 } 672 }
673 default: 673 default:
674 UNIMPLEMENTED(); 674 LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
675 return ERR_INVALID_ENUM_VALUE;
675 } 676 }
676 677
677 return RESULT_SUCCESS; 678 return RESULT_SUCCESS;
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index ac3859353..602086eed 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -88,6 +88,20 @@ void NVDRV::FinishInitialize(Kernel::HLERequestContext& ctx) {
88 rb.Push(RESULT_SUCCESS); 88 rb.Push(RESULT_SUCCESS);
89} 89}
90 90
91void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) {
92 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
93 IPC::ResponseBuilder rb{ctx, 2};
94 rb.Push(RESULT_SUCCESS);
95}
96
97void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
98 // According to SwitchBrew, this has no inputs and no outputs, so effectively does nothing on
99 // retail hardware.
100 LOG_DEBUG(Service_NVDRV, "called");
101 IPC::ResponseBuilder rb{ctx, 2};
102 rb.Push(RESULT_SUCCESS);
103}
104
91NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) 105NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
92 : ServiceFramework(name), nvdrv(std::move(nvdrv)) { 106 : ServiceFramework(name), nvdrv(std::move(nvdrv)) {
93 static const FunctionInfo functions[] = { 107 static const FunctionInfo functions[] = {
@@ -97,10 +111,10 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
97 {3, &NVDRV::Initialize, "Initialize"}, 111 {3, &NVDRV::Initialize, "Initialize"},
98 {4, &NVDRV::QueryEvent, "QueryEvent"}, 112 {4, &NVDRV::QueryEvent, "QueryEvent"},
99 {5, nullptr, "MapSharedMem"}, 113 {5, nullptr, "MapSharedMem"},
100 {6, nullptr, "GetStatus"}, 114 {6, &NVDRV::GetStatus, "GetStatus"},
101 {7, nullptr, "ForceSetClientPID"}, 115 {7, nullptr, "ForceSetClientPID"},
102 {8, &NVDRV::SetClientPID, "SetClientPID"}, 116 {8, &NVDRV::SetClientPID, "SetClientPID"},
103 {9, nullptr, "DumpGraphicsMemoryInfo"}, 117 {9, &NVDRV::DumpGraphicsMemoryInfo, "DumpGraphicsMemoryInfo"},
104 {10, nullptr, "InitializeDevtools"}, 118 {10, nullptr, "InitializeDevtools"},
105 {11, &NVDRV::Ioctl, "Ioctl2"}, 119 {11, &NVDRV::Ioctl, "Ioctl2"},
106 {12, nullptr, "Ioctl3"}, 120 {12, nullptr, "Ioctl3"},
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index d340893c2..5a1e4baa7 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -24,6 +24,8 @@ private:
24 void QueryEvent(Kernel::HLERequestContext& ctx); 24 void QueryEvent(Kernel::HLERequestContext& ctx);
25 void SetClientPID(Kernel::HLERequestContext& ctx); 25 void SetClientPID(Kernel::HLERequestContext& ctx);
26 void FinishInitialize(Kernel::HLERequestContext& ctx); 26 void FinishInitialize(Kernel::HLERequestContext& ctx);
27 void GetStatus(Kernel::HLERequestContext& ctx);
28 void DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx);
27 29
28 std::shared_ptr<Module> nvdrv; 30 std::shared_ptr<Module> nvdrv;
29 31
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index c1b2f33b9..9ca8483a5 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -63,7 +63,7 @@ ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> ServiceManager::RegisterService
63 return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port)); 63 return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port));
64} 64}
65 65
66ResultCode ServiceManager::UnregisterService(std::string name) { 66ResultCode ServiceManager::UnregisterService(const std::string& name) {
67 CASCADE_CODE(ValidateServiceName(name)); 67 CASCADE_CODE(ValidateServiceName(name));
68 68
69 const auto iter = registered_services.find(name); 69 const auto iter = registered_services.find(name);
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index c4714b3e3..bef25433e 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -50,7 +50,7 @@ public:
50 50
51 ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name, 51 ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name,
52 unsigned int max_sessions); 52 unsigned int max_sessions);
53 ResultCode UnregisterService(std::string name); 53 ResultCode UnregisterService(const std::string& name);
54 ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name); 54 ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name);
55 ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name); 55 ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name);
56 56
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a780215c1..3f906a517 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -21,6 +21,8 @@ add_library(video_core STATIC
21 macro_interpreter.h 21 macro_interpreter.h
22 memory_manager.cpp 22 memory_manager.cpp
23 memory_manager.h 23 memory_manager.h
24 morton.cpp
25 morton.h
24 rasterizer_cache.cpp 26 rasterizer_cache.cpp
25 rasterizer_cache.h 27 rasterizer_cache.h
26 rasterizer_interface.h 28 rasterizer_interface.h
@@ -62,7 +64,6 @@ add_library(video_core STATIC
62 textures/decoders.cpp 64 textures/decoders.cpp
63 textures/decoders.h 65 textures/decoders.h
64 textures/texture.h 66 textures/texture.h
65 utils.h
66 video_core.cpp 67 video_core.cpp
67 video_core.h 68 video_core.h
68) 69)
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7e8449bc4..b9faaf8e0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -82,6 +82,8 @@ union Attribute {
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
85 ClipDistances0123 = 44,
86 ClipDistances4567 = 45,
85 PointCoord = 46, 87 PointCoord = 46,
86 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex 88 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
87 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval 89 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
@@ -366,6 +368,11 @@ enum class HalfPrecision : u64 {
366 FMZ = 2, 368 FMZ = 2,
367}; 369};
368 370
371enum class R2pMode : u64 {
372 Pr = 0,
373 Cc = 1,
374};
375
369enum class IpaInterpMode : u64 { 376enum class IpaInterpMode : u64 {
370 Linear = 0, 377 Linear = 0,
371 Perspective = 1, 378 Perspective = 1,
@@ -855,6 +862,12 @@ union Instruction {
855 } hsetp2; 862 } hsetp2;
856 863
857 union { 864 union {
865 BitField<40, 1, R2pMode> mode;
866 BitField<41, 2, u64> byte;
867 BitField<20, 7, u64> immediate_mask;
868 } r2p;
869
870 union {
858 BitField<39, 3, u64> pred39; 871 BitField<39, 3, u64> pred39;
859 BitField<42, 1, u64> neg_pred; 872 BitField<42, 1, u64> neg_pred;
860 BitField<43, 1, u64> neg_a; 873 BitField<43, 1, u64> neg_a;
@@ -1256,6 +1269,7 @@ public:
1256 BFE_C, 1269 BFE_C,
1257 BFE_R, 1270 BFE_R,
1258 BFE_IMM, 1271 BFE_IMM,
1272 BFI_IMM_R,
1259 BRA, 1273 BRA,
1260 PBK, 1274 PBK,
1261 LD_A, 1275 LD_A,
@@ -1381,6 +1395,7 @@ public:
1381 PSETP, 1395 PSETP,
1382 PSET, 1396 PSET,
1383 CSETP, 1397 CSETP,
1398 R2P_IMM,
1384 XMAD_IMM, 1399 XMAD_IMM,
1385 XMAD_CR, 1400 XMAD_CR,
1386 XMAD_RC, 1401 XMAD_RC,
@@ -1396,6 +1411,7 @@ public:
1396 ArithmeticHalf, 1411 ArithmeticHalf,
1397 ArithmeticHalfImmediate, 1412 ArithmeticHalfImmediate,
1398 Bfe, 1413 Bfe,
1414 Bfi,
1399 Shift, 1415 Shift,
1400 Ffma, 1416 Ffma,
1401 Hfma2, 1417 Hfma2,
@@ -1410,6 +1426,7 @@ public:
1410 HalfSetPredicate, 1426 HalfSetPredicate,
1411 PredicateSetPredicate, 1427 PredicateSetPredicate,
1412 PredicateSetRegister, 1428 PredicateSetRegister,
1429 RegisterSetPredicate,
1413 Conversion, 1430 Conversion,
1414 Xmad, 1431 Xmad,
1415 Unknown, 1432 Unknown,
@@ -1613,6 +1630,7 @@ private:
1613 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), 1630 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
1614 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), 1631 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
1615 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), 1632 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
1633 INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
1616 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), 1634 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
1617 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), 1635 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
1618 INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), 1636 INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
@@ -1647,6 +1665,7 @@ private:
1647 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), 1665 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
1648 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 1666 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
1649 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), 1667 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
1668 INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
1650 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), 1669 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
1651 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), 1670 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
1652 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), 1671 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a0e015c4b..99c34649f 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -62,7 +62,16 @@ struct Header {
62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32] 63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
64 INSERT_PADDING_BYTES(2); // ImapColor 64 INSERT_PADDING_BYTES(2); // ImapColor
65 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 65 union {
66 BitField<0, 8, u16> clip_distances;
67 BitField<8, 1, u16> point_sprite_s;
68 BitField<9, 1, u16> point_sprite_t;
69 BitField<10, 1, u16> fog_coordinate;
70 BitField<12, 1, u16> tessellation_eval_point_u;
71 BitField<13, 1, u16> tessellation_eval_point_v;
72 BitField<14, 1, u16> instance_id;
73 BitField<15, 1, u16> vertex_id;
74 };
66 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] 75 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
67 INSERT_PADDING_BYTES(1); // ImapReserved 76 INSERT_PADDING_BYTES(1); // ImapReserved
68 INSERT_PADDING_BYTES(3); // OmapSystemValuesA 77 INSERT_PADDING_BYTES(3); // OmapSystemValuesA
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 77a20bb84..47247f097 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,6 +9,13 @@
9 9
10namespace Tegra { 10namespace Tegra {
11 11
12MemoryManager::MemoryManager() {
13 // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
14 // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
15 // Undertale using 0 for a render target.
16 PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
17}
18
12GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { 19GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
13 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; 20 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
14 21
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 4eb338aa2..fb03497ca 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -18,7 +18,7 @@ using GPUVAddr = u64;
18 18
19class MemoryManager final { 19class MemoryManager final {
20public: 20public:
21 MemoryManager() = default; 21 MemoryManager();
22 22
23 GPUVAddr AllocateSpace(u64 size, u64 align); 23 GPUVAddr AllocateSpace(u64 size, u64 align);
24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); 24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
@@ -37,6 +37,7 @@ private:
37 enum class PageStatus : u64 { 37 enum class PageStatus : u64 {
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 38 Unmapped = 0xFFFFFFFFFFFFFFFFULL,
39 Allocated = 0xFFFFFFFFFFFFFFFEULL, 39 Allocated = 0xFFFFFFFFFFFFFFFEULL,
40 Reserved = 0xFFFFFFFFFFFFFFFDULL,
40 }; 41 };
41 42
42 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 43 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
new file mode 100644
index 000000000..f14abba7d
--- /dev/null
+++ b/src/video_core/morton.cpp
@@ -0,0 +1,353 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h"
11#include "video_core/surface.h"
12#include "video_core/textures/decoders.h"
13
14namespace VideoCore {
15
16using Surface::GetBytesPerPixel;
17using Surface::PixelFormat;
18
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21
22template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u8* buffer, std::size_t buffer_size, VAddr addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
28 // pixel values.
29 const u32 tile_size_x{GetDefaultBlockWidth(format)};
30 const u32 tile_size_y{GetDefaultBlockHeight(format)};
31
32 if constexpr (morton_to_linear) {
33 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
34 stride, height, depth, block_height, block_depth);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
39 buffer, false, block_height, block_depth);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 // clang-format off
45 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::G8R8U>,
69 MortonCopy<true, PixelFormat::G8R8S>,
70 MortonCopy<true, PixelFormat::BGRA8>,
71 MortonCopy<true, PixelFormat::RGBA32F>,
72 MortonCopy<true, PixelFormat::RG32F>,
73 MortonCopy<true, PixelFormat::R32F>,
74 MortonCopy<true, PixelFormat::R16F>,
75 MortonCopy<true, PixelFormat::R16U>,
76 MortonCopy<true, PixelFormat::R16S>,
77 MortonCopy<true, PixelFormat::R16UI>,
78 MortonCopy<true, PixelFormat::R16I>,
79 MortonCopy<true, PixelFormat::RG16>,
80 MortonCopy<true, PixelFormat::RG16F>,
81 MortonCopy<true, PixelFormat::RG16UI>,
82 MortonCopy<true, PixelFormat::RG16I>,
83 MortonCopy<true, PixelFormat::RG16S>,
84 MortonCopy<true, PixelFormat::RGB32F>,
85 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
86 MortonCopy<true, PixelFormat::RG8U>,
87 MortonCopy<true, PixelFormat::RG8S>,
88 MortonCopy<true, PixelFormat::RG32UI>,
89 MortonCopy<true, PixelFormat::R32UI>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
91 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
92 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
93 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
94 MortonCopy<true, PixelFormat::DXT1_SRGB>,
95 MortonCopy<true, PixelFormat::DXT23_SRGB>,
96 MortonCopy<true, PixelFormat::DXT45_SRGB>,
97 MortonCopy<true, PixelFormat::BC7U_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
103 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
105 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
106 MortonCopy<true, PixelFormat::Z32F>,
107 MortonCopy<true, PixelFormat::Z16>,
108 MortonCopy<true, PixelFormat::Z24S8>,
109 MortonCopy<true, PixelFormat::S8Z24>,
110 MortonCopy<true, PixelFormat::Z32FS8>,
111 // clang-format on
112};
113
114static constexpr ConversionArray linear_to_morton_fns = {
115 // clang-format off
116 MortonCopy<false, PixelFormat::ABGR8U>,
117 MortonCopy<false, PixelFormat::ABGR8S>,
118 MortonCopy<false, PixelFormat::ABGR8UI>,
119 MortonCopy<false, PixelFormat::B5G6R5U>,
120 MortonCopy<false, PixelFormat::A2B10G10R10U>,
121 MortonCopy<false, PixelFormat::A1B5G5R5U>,
122 MortonCopy<false, PixelFormat::R8U>,
123 MortonCopy<false, PixelFormat::R8UI>,
124 MortonCopy<false, PixelFormat::RGBA16F>,
125 MortonCopy<false, PixelFormat::RGBA16U>,
126 MortonCopy<false, PixelFormat::RGBA16UI>,
127 MortonCopy<false, PixelFormat::R11FG11FB10F>,
128 MortonCopy<false, PixelFormat::RGBA32UI>,
129 MortonCopy<false, PixelFormat::DXT1>,
130 MortonCopy<false, PixelFormat::DXT23>,
131 MortonCopy<false, PixelFormat::DXT45>,
132 MortonCopy<false, PixelFormat::DXN1>,
133 MortonCopy<false, PixelFormat::DXN2UNORM>,
134 MortonCopy<false, PixelFormat::DXN2SNORM>,
135 MortonCopy<false, PixelFormat::BC7U>,
136 MortonCopy<false, PixelFormat::BC6H_UF16>,
137 MortonCopy<false, PixelFormat::BC6H_SF16>,
138 // TODO(Subv): Swizzling ASTC formats are not supported
139 nullptr,
140 MortonCopy<false, PixelFormat::G8R8U>,
141 MortonCopy<false, PixelFormat::G8R8S>,
142 MortonCopy<false, PixelFormat::BGRA8>,
143 MortonCopy<false, PixelFormat::RGBA32F>,
144 MortonCopy<false, PixelFormat::RG32F>,
145 MortonCopy<false, PixelFormat::R32F>,
146 MortonCopy<false, PixelFormat::R16F>,
147 MortonCopy<false, PixelFormat::R16U>,
148 MortonCopy<false, PixelFormat::R16S>,
149 MortonCopy<false, PixelFormat::R16UI>,
150 MortonCopy<false, PixelFormat::R16I>,
151 MortonCopy<false, PixelFormat::RG16>,
152 MortonCopy<false, PixelFormat::RG16F>,
153 MortonCopy<false, PixelFormat::RG16UI>,
154 MortonCopy<false, PixelFormat::RG16I>,
155 MortonCopy<false, PixelFormat::RG16S>,
156 MortonCopy<false, PixelFormat::RGB32F>,
157 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
158 MortonCopy<false, PixelFormat::RG8U>,
159 MortonCopy<false, PixelFormat::RG8S>,
160 MortonCopy<false, PixelFormat::RG32UI>,
161 MortonCopy<false, PixelFormat::R32UI>,
162 nullptr,
163 nullptr,
164 nullptr,
165 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
166 MortonCopy<false, PixelFormat::DXT1_SRGB>,
167 MortonCopy<false, PixelFormat::DXT23_SRGB>,
168 MortonCopy<false, PixelFormat::DXT45_SRGB>,
169 MortonCopy<false, PixelFormat::BC7U_SRGB>,
170 nullptr,
171 nullptr,
172 nullptr,
173 nullptr,
174 nullptr,
175 nullptr,
176 nullptr,
177 nullptr,
178 MortonCopy<false, PixelFormat::Z32F>,
179 MortonCopy<false, PixelFormat::Z16>,
180 MortonCopy<false, PixelFormat::Z24S8>,
181 MortonCopy<false, PixelFormat::S8Z24>,
182 MortonCopy<false, PixelFormat::Z32FS8>,
183 // clang-format on
184};
185
186constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
187 switch (mode) {
188 case MortonSwizzleMode::MortonToLinear:
189 return morton_to_linear_fns[static_cast<std::size_t>(format)];
190 case MortonSwizzleMode::LinearToMorton:
191 return linear_to_morton_fns[static_cast<std::size_t>(format)];
192 }
193}
194
195/// 8x8 Z-Order coordinate from 2D coordinates
196static u32 MortonInterleave(u32 x, u32 y) {
197 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
198 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
199 return xlut[x % 8] + ylut[y % 8];
200}
201
202/// Calculates the offset of the position of the pixel in Morton order
203static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
204 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
205 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
206 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
207 // texels are laid out in a 2x2 subtile like this:
208 // 2 3
209 // 0 1
210 //
211 // The full 8x8 tile has the texels arranged like this:
212 //
213 // 42 43 46 47 58 59 62 63
214 // 40 41 44 45 56 57 60 61
215 // 34 35 38 39 50 51 54 55
216 // 32 33 36 37 48 49 52 53
217 // 10 11 14 15 26 27 30 31
218 // 08 09 12 13 24 25 28 29
219 // 02 03 06 07 18 19 22 23
220 // 00 01 04 05 16 17 20 21
221 //
222 // This pattern is what's called Z-order curve, or Morton order.
223
224 const unsigned int block_height = 8;
225 const unsigned int coarse_x = x & ~7;
226
227 u32 i = MortonInterleave(x, y);
228
229 const unsigned int offset = coarse_x * block_height;
230
231 return (i + offset) * bytes_per_pixel;
232}
233
234static u32 MortonInterleave128(u32 x, u32 y) {
235 // 128x128 Z-Order coordinate from 2D coordinates
236 static constexpr u32 xlut[] = {
237 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
238 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
239 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
240 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
241 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
242 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
243 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
244 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
245 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
246 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
247 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
248 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
249 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
250 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
251 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
252 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
253 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
254 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
255 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
256 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
257 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
258 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
259 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
260 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
261 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
262 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
263 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
264 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
265 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
266 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
267 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
268 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
269 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
270 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
271 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
272 };
273 static constexpr u32 ylut[] = {
274 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
275 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
276 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
277 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
278 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
279 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
280 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
281 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
282 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
283 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
284 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
285 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
286 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
287 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
288 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
289 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
290 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
291 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
292 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
293 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
294 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
295 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
296 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
297 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
298 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
299 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
300 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
301 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
302 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
303 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
304 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
305 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
306 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
307 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
308 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
309 };
310 return xlut[x % 128] + ylut[y % 128];
311}
312
313static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
314 // Calculates the offset of the position of the pixel in Morton order
315 // Framebuffer images are split into 128x128 tiles.
316
317 constexpr u32 block_height = 128;
318 const u32 coarse_x = x & ~127;
319
320 const u32 i = MortonInterleave128(x, y);
321
322 const u32 offset = coarse_x * block_height;
323
324 return (i + offset) * bytes_per_pixel;
325}
326
327void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
328 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
329 std::size_t buffer_size, VAddr addr) {
330
331 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer,
332 buffer_size, addr);
333}
334
335void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
336 u8* morton_data, u8* linear_data, bool morton_to_linear) {
337 u8* data_ptrs[2];
338 for (u32 y = 0; y < height; ++y) {
339 for (u32 x = 0; x < width; ++x) {
340 const u32 coarse_y = y & ~127;
341 const u32 morton_offset =
342 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
343 const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
344
345 data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
346 data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
347
348 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
349 }
350 }
351}
352
353} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
new file mode 100644
index 000000000..b9b9eca86
--- /dev/null
+++ b/src/video_core/morton.h
@@ -0,0 +1,21 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
16 std::size_t buffer_size, VAddr addr);
17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear);
20
21} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 4f434fc31..d458f77e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
15#include "core/memory.h" 15#include "core/memory.h"
16#include "core/settings.h" 16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/morton.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h" 19#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 20#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 21#include "video_core/renderer_opengl/gl_state.h"
@@ -22,10 +23,11 @@
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25#include "video_core/utils.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
28 28
29using VideoCore::MortonSwizzle;
30using VideoCore::MortonSwizzleMode;
29using VideoCore::Surface::ComponentTypeFromDepthFormat; 31using VideoCore::Surface::ComponentTypeFromDepthFormat;
30using VideoCore::Surface::ComponentTypeFromRenderTarget; 32using VideoCore::Surface::ComponentTypeFromRenderTarget;
31using VideoCore::Surface::ComponentTypeFromTexture; 33using VideoCore::Surface::ComponentTypeFromTexture;
@@ -370,174 +372,7 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
370 return {0, actual_height, MipWidth(mip_level), 0}; 372 return {0, actual_height, MipWidth(mip_level), 0};
371} 373}
372 374
373template <bool morton_to_gl, PixelFormat format> 375void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
374void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
375 std::size_t gl_buffer_size, VAddr addr) {
376 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
377
378 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
379 // pixel values.
380 const u32 tile_size_x{GetDefaultBlockWidth(format)};
381 const u32 tile_size_y{GetDefaultBlockHeight(format)};
382
383 if (morton_to_gl) {
384 Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
385 stride, height, depth, block_height, block_depth);
386 } else {
387 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
388 (height + tile_size_y - 1) / tile_size_y, depth,
389 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
390 gl_buffer, false, block_height, block_depth);
391 }
392}
393
394using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
395 VideoCore::Surface::MaxPixelFormat>;
396
397static constexpr GLConversionArray morton_to_gl_fns = {
398 // clang-format off
399 MortonCopy<true, PixelFormat::ABGR8U>,
400 MortonCopy<true, PixelFormat::ABGR8S>,
401 MortonCopy<true, PixelFormat::ABGR8UI>,
402 MortonCopy<true, PixelFormat::B5G6R5U>,
403 MortonCopy<true, PixelFormat::A2B10G10R10U>,
404 MortonCopy<true, PixelFormat::A1B5G5R5U>,
405 MortonCopy<true, PixelFormat::R8U>,
406 MortonCopy<true, PixelFormat::R8UI>,
407 MortonCopy<true, PixelFormat::RGBA16F>,
408 MortonCopy<true, PixelFormat::RGBA16U>,
409 MortonCopy<true, PixelFormat::RGBA16UI>,
410 MortonCopy<true, PixelFormat::R11FG11FB10F>,
411 MortonCopy<true, PixelFormat::RGBA32UI>,
412 MortonCopy<true, PixelFormat::DXT1>,
413 MortonCopy<true, PixelFormat::DXT23>,
414 MortonCopy<true, PixelFormat::DXT45>,
415 MortonCopy<true, PixelFormat::DXN1>,
416 MortonCopy<true, PixelFormat::DXN2UNORM>,
417 MortonCopy<true, PixelFormat::DXN2SNORM>,
418 MortonCopy<true, PixelFormat::BC7U>,
419 MortonCopy<true, PixelFormat::BC6H_UF16>,
420 MortonCopy<true, PixelFormat::BC6H_SF16>,
421 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
422 MortonCopy<true, PixelFormat::G8R8U>,
423 MortonCopy<true, PixelFormat::G8R8S>,
424 MortonCopy<true, PixelFormat::BGRA8>,
425 MortonCopy<true, PixelFormat::RGBA32F>,
426 MortonCopy<true, PixelFormat::RG32F>,
427 MortonCopy<true, PixelFormat::R32F>,
428 MortonCopy<true, PixelFormat::R16F>,
429 MortonCopy<true, PixelFormat::R16U>,
430 MortonCopy<true, PixelFormat::R16S>,
431 MortonCopy<true, PixelFormat::R16UI>,
432 MortonCopy<true, PixelFormat::R16I>,
433 MortonCopy<true, PixelFormat::RG16>,
434 MortonCopy<true, PixelFormat::RG16F>,
435 MortonCopy<true, PixelFormat::RG16UI>,
436 MortonCopy<true, PixelFormat::RG16I>,
437 MortonCopy<true, PixelFormat::RG16S>,
438 MortonCopy<true, PixelFormat::RGB32F>,
439 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
440 MortonCopy<true, PixelFormat::RG8U>,
441 MortonCopy<true, PixelFormat::RG8S>,
442 MortonCopy<true, PixelFormat::RG32UI>,
443 MortonCopy<true, PixelFormat::R32UI>,
444 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
445 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
446 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
447 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
448 MortonCopy<true, PixelFormat::DXT1_SRGB>,
449 MortonCopy<true, PixelFormat::DXT23_SRGB>,
450 MortonCopy<true, PixelFormat::DXT45_SRGB>,
451 MortonCopy<true, PixelFormat::BC7U_SRGB>,
452 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
453 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
454 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
455 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
456 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
457 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
458 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
459 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
460 MortonCopy<true, PixelFormat::Z32F>,
461 MortonCopy<true, PixelFormat::Z16>,
462 MortonCopy<true, PixelFormat::Z24S8>,
463 MortonCopy<true, PixelFormat::S8Z24>,
464 MortonCopy<true, PixelFormat::Z32FS8>,
465 // clang-format on
466};
467
468static constexpr GLConversionArray gl_to_morton_fns = {
469 // clang-format off
470 MortonCopy<false, PixelFormat::ABGR8U>,
471 MortonCopy<false, PixelFormat::ABGR8S>,
472 MortonCopy<false, PixelFormat::ABGR8UI>,
473 MortonCopy<false, PixelFormat::B5G6R5U>,
474 MortonCopy<false, PixelFormat::A2B10G10R10U>,
475 MortonCopy<false, PixelFormat::A1B5G5R5U>,
476 MortonCopy<false, PixelFormat::R8U>,
477 MortonCopy<false, PixelFormat::R8UI>,
478 MortonCopy<false, PixelFormat::RGBA16F>,
479 MortonCopy<false, PixelFormat::RGBA16U>,
480 MortonCopy<false, PixelFormat::RGBA16UI>,
481 MortonCopy<false, PixelFormat::R11FG11FB10F>,
482 MortonCopy<false, PixelFormat::RGBA32UI>,
483 MortonCopy<false, PixelFormat::DXT1>,
484 MortonCopy<false, PixelFormat::DXT23>,
485 MortonCopy<false, PixelFormat::DXT45>,
486 MortonCopy<false, PixelFormat::DXN1>,
487 MortonCopy<false, PixelFormat::DXN2UNORM>,
488 MortonCopy<false, PixelFormat::DXN2SNORM>,
489 MortonCopy<false, PixelFormat::BC7U>,
490 MortonCopy<false, PixelFormat::BC6H_UF16>,
491 MortonCopy<false, PixelFormat::BC6H_SF16>,
492 // TODO(Subv): Swizzling ASTC formats are not supported
493 nullptr,
494 MortonCopy<false, PixelFormat::G8R8U>,
495 MortonCopy<false, PixelFormat::G8R8S>,
496 MortonCopy<false, PixelFormat::BGRA8>,
497 MortonCopy<false, PixelFormat::RGBA32F>,
498 MortonCopy<false, PixelFormat::RG32F>,
499 MortonCopy<false, PixelFormat::R32F>,
500 MortonCopy<false, PixelFormat::R16F>,
501 MortonCopy<false, PixelFormat::R16U>,
502 MortonCopy<false, PixelFormat::R16S>,
503 MortonCopy<false, PixelFormat::R16UI>,
504 MortonCopy<false, PixelFormat::R16I>,
505 MortonCopy<false, PixelFormat::RG16>,
506 MortonCopy<false, PixelFormat::RG16F>,
507 MortonCopy<false, PixelFormat::RG16UI>,
508 MortonCopy<false, PixelFormat::RG16I>,
509 MortonCopy<false, PixelFormat::RG16S>,
510 MortonCopy<false, PixelFormat::RGB32F>,
511 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
512 MortonCopy<false, PixelFormat::RG8U>,
513 MortonCopy<false, PixelFormat::RG8S>,
514 MortonCopy<false, PixelFormat::RG32UI>,
515 MortonCopy<false, PixelFormat::R32UI>,
516 nullptr,
517 nullptr,
518 nullptr,
519 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
520 MortonCopy<false, PixelFormat::DXT1_SRGB>,
521 MortonCopy<false, PixelFormat::DXT23_SRGB>,
522 MortonCopy<false, PixelFormat::DXT45_SRGB>,
523 MortonCopy<false, PixelFormat::BC7U_SRGB>,
524 nullptr,
525 nullptr,
526 nullptr,
527 nullptr,
528 nullptr,
529 nullptr,
530 nullptr,
531 nullptr,
532 MortonCopy<false, PixelFormat::Z32F>,
533 MortonCopy<false, PixelFormat::Z16>,
534 MortonCopy<false, PixelFormat::Z24S8>,
535 MortonCopy<false, PixelFormat::S8Z24>,
536 MortonCopy<false, PixelFormat::Z32FS8>,
537 // clang-format on
538};
539
540void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
541 std::vector<u8>& gl_buffer, u32 mip_level) { 376 std::vector<u8>& gl_buffer, u32 mip_level) {
542 u32 depth = params.MipDepth(mip_level); 377 u32 depth = params.MipDepth(mip_level);
543 if (params.target == SurfaceTarget::Texture2D) { 378 if (params.target == SurfaceTarget::Texture2D) {
@@ -550,19 +385,19 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
550 const u64 layer_size = params.LayerMemorySize(); 385 const u64 layer_size = params.LayerMemorySize();
551 const u64 gl_size = params.LayerSizeGL(mip_level); 386 const u64 gl_size = params.LayerSizeGL(mip_level);
552 for (u32 i = 0; i < params.depth; i++) { 387 for (u32 i = 0; i < params.depth; i++) {
553 functions[static_cast<std::size_t>(params.pixel_format)]( 388 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
554 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 389 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
555 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, 390 params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
556 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 391 params.addr + offset);
557 offset += layer_size; 392 offset += layer_size;
558 offset_gl += gl_size; 393 offset_gl += gl_size;
559 } 394 }
560 } else { 395 } else {
561 const u64 offset = params.GetMipmapLevelOffset(mip_level); 396 const u64 offset = params.GetMipmapLevelOffset(mip_level);
562 functions[static_cast<std::size_t>(params.pixel_format)]( 397 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
563 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 398 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
564 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), 399 params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(),
565 gl_buffer.size(), params.addr + offset); 400 params.addr + offset);
566 } 401 }
567} 402}
568 403
@@ -996,7 +831,7 @@ void CachedSurface::LoadGLBuffer() {
996 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 831 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
997 params.block_width, static_cast<u32>(params.target)); 832 params.block_width, static_cast<u32>(params.target));
998 for (u32 i = 0; i < params.max_mip_level; i++) 833 for (u32 i = 0; i < params.max_mip_level; i++)
999 SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); 834 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
1000 } else { 835 } else {
1001 const auto texture_src_data{Memory::GetPointer(params.addr)}; 836 const auto texture_src_data{Memory::GetPointer(params.addr)};
1002 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 837 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
@@ -1035,7 +870,7 @@ void CachedSurface::FlushGLBuffer() {
1035 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 870 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
1036 params.block_width, static_cast<u32>(params.target)); 871 params.block_width, static_cast<u32>(params.target));
1037 872
1038 SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); 873 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
1039 } else { 874 } else {
1040 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 875 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
1041 } 876 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a9d29501f..3d4d0f23e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -500,27 +500,42 @@ public:
500 const Register& buf_reg) { 500 const Register& buf_reg) {
501 const std::string dest = GetOutputAttribute(attribute); 501 const std::string dest = GetOutputAttribute(attribute);
502 const std::string src = GetRegisterAsFloat(val_reg); 502 const std::string src = GetRegisterAsFloat(val_reg);
503 if (dest.empty())
504 return;
503 505
504 if (!dest.empty()) { 506 // Can happen with unknown/unimplemented output attributes, in which case we ignore the
505 // Can happen with unknown/unimplemented output attributes, in which case we ignore the 507 // instruction for now.
506 // instruction for now. 508 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
507 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { 509 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
508 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry 510 // shader. These instructions use a dirty register as buffer index, to avoid some
509 // shader. These instructions use a dirty register as buffer index, to avoid some 511 // drivers from complaining about out of boundary writes, guard them.
510 // drivers from complaining about out of boundary writes, guard them. 512 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
511 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + 513 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
512 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; 514 shader.AddLine("amem[" + buf_index + "][" +
513 shader.AddLine("amem[" + buf_index + "][" + 515 std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) +
514 std::to_string(static_cast<u32>(attribute)) + ']' + 516 " = " + src + ';');
515 GetSwizzle(elem) + " = " + src + ';'); 517 return;
516 } else { 518 }
517 if (attribute == Attribute::Index::PointSize) { 519
518 fixed_pipeline_output_attributes_used.insert(attribute); 520 switch (attribute) {
519 shader.AddLine(dest + " = " + src + ';'); 521 case Attribute::Index::ClipDistances0123:
520 } else { 522 case Attribute::Index::ClipDistances4567: {
521 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); 523 const u64 index = attribute == Attribute::Index::ClipDistances4567 ? 4 : 0 + elem;
522 } 524 UNIMPLEMENTED_IF_MSG(
523 } 525 ((header.vtg.clip_distances >> index) & 1) == 0,
526 "Shader is setting gl_ClipDistance{} without enabling it in the header", index);
527
528 fixed_pipeline_output_attributes_used.insert(attribute);
529 shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
530 break;
531 }
532 case Attribute::Index::PointSize:
533 fixed_pipeline_output_attributes_used.insert(attribute);
534 shader.AddLine(dest + " = " + src + ';');
535 break;
536 default:
537 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
538 break;
524 } 539 }
525 } 540 }
526 541
@@ -740,12 +755,19 @@ private:
740 void GenerateVertex() { 755 void GenerateVertex() {
741 if (stage != Maxwell3D::Regs::ShaderStage::Vertex) 756 if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
742 return; 757 return;
758 bool clip_distances_declared = false;
759
743 declarations.AddLine("out gl_PerVertex {"); 760 declarations.AddLine("out gl_PerVertex {");
744 ++declarations.scope; 761 ++declarations.scope;
745 declarations.AddLine("vec4 gl_Position;"); 762 declarations.AddLine("vec4 gl_Position;");
746 for (auto& o : fixed_pipeline_output_attributes_used) { 763 for (auto& o : fixed_pipeline_output_attributes_used) {
747 if (o == Attribute::Index::PointSize) 764 if (o == Attribute::Index::PointSize)
748 declarations.AddLine("float gl_PointSize;"); 765 declarations.AddLine("float gl_PointSize;");
766 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
767 o == Attribute::Index::ClipDistances4567)) {
768 declarations.AddLine("float gl_ClipDistance[];");
769 clip_distances_declared = true;
770 }
749 } 771 }
750 --declarations.scope; 772 --declarations.scope;
751 declarations.AddLine("};"); 773 declarations.AddLine("};");
@@ -917,6 +939,10 @@ private:
917 return "gl_PointSize"; 939 return "gl_PointSize";
918 case Attribute::Index::Position: 940 case Attribute::Index::Position:
919 return "position"; 941 return "position";
942 case Attribute::Index::ClipDistances0123:
943 case Attribute::Index::ClipDistances4567: {
944 return "gl_ClipDistance";
945 }
920 default: 946 default:
921 const u32 index{static_cast<u32>(attribute) - 947 const u32 index{static_cast<u32>(attribute) -
922 static_cast<u32>(Attribute::Index::Attribute_0)}; 948 static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -1267,7 +1293,15 @@ private:
1267 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); 1293 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
1268 } 1294 }
1269 1295
1270 void WriteTexsInstruction(const Instruction& instr, const std::string& texture) { 1296 void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
1297 const std::string& texture) {
1298 // Add an extra scope and declare the texture coords inside to prevent
1299 // overwriting them in case they are used as outputs of the texs instruction.
1300 shader.AddLine('{');
1301 ++shader.scope;
1302 shader.AddLine(coord);
1303 shader.AddLine("vec4 texture_tmp = " + texture + ';');
1304
1271 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle 1305 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
1272 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 1306 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
1273 1307
@@ -1279,17 +1313,19 @@ private:
1279 1313
1280 if (written_components < 2) { 1314 if (written_components < 2) {
1281 // Write the first two swizzle components to gpr0 and gpr0+1 1315 // Write the first two swizzle components to gpr0 and gpr0+1
1282 regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, 1316 regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false,
1283 written_components % 2); 1317 written_components % 2);
1284 } else { 1318 } else {
1285 ASSERT(instr.texs.HasTwoDestinations()); 1319 ASSERT(instr.texs.HasTwoDestinations());
1286 // Write the rest of the swizzle components to gpr28 and gpr28+1 1320 // Write the rest of the swizzle components to gpr28 and gpr28+1
1287 regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, 1321 regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false,
1288 written_components % 2); 1322 written_components % 2);
1289 } 1323 }
1290 1324
1291 ++written_components; 1325 ++written_components;
1292 } 1326 }
1327 --shader.scope;
1328 shader.AddLine('}');
1293 } 1329 }
1294 1330
1295 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { 1331 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
@@ -1686,6 +1722,26 @@ private:
1686 1722
1687 break; 1723 break;
1688 } 1724 }
1725 case OpCode::Type::Bfi: {
1726 UNIMPLEMENTED_IF(instr.generates_cc);
1727
1728 const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> {
1729 switch (opcode->get().GetId()) {
1730 case OpCode::Id::BFI_IMM_R:
1731 return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
1732 std::to_string(instr.alu.GetSignedImm20_20())};
1733 default:
1734 UNREACHABLE();
1735 }
1736 }();
1737 const std::string offset = '(' + packed_shift + " & 0xff)";
1738 const std::string bits = "((" + packed_shift + " >> 8) & 0xff)";
1739 const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
1740 regs.SetRegisterToInteger(
1741 instr.gpr0, false, 0,
1742 "bitfieldInsert(" + base + ", " + insert + ", " + offset + ", " + bits + ')', 1, 1);
1743 break;
1744 }
1689 case OpCode::Type::Shift: { 1745 case OpCode::Type::Shift: {
1690 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); 1746 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
1691 std::string op_b; 1747 std::string op_b;
@@ -2511,61 +2567,83 @@ private:
2511 const bool depth_compare = 2567 const bool depth_compare =
2512 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2568 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2513 u32 num_coordinates = TextureCoordinates(texture_type); 2569 u32 num_coordinates = TextureCoordinates(texture_type);
2514 if (depth_compare) 2570 u32 start_index = 0;
2515 num_coordinates += 1; 2571 std::string array_elem;
2572 if (is_array) {
2573 array_elem = regs.GetRegisterAsInteger(instr.gpr8);
2574 start_index = 1;
2575 }
2576 const auto process_mode = instr.tex.GetTextureProcessMode();
2577 u32 start_index_b = 0;
2578 std::string lod_value;
2579 if (process_mode != Tegra::Shader::TextureProcessMode::LZ &&
2580 process_mode != Tegra::Shader::TextureProcessMode::None) {
2581 start_index_b = 1;
2582 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2583 }
2584
2585 std::string depth_value;
2586 if (depth_compare) {
2587 depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b);
2588 }
2589
2590 bool depth_compare_extra = false;
2516 2591
2517 switch (num_coordinates) { 2592 switch (num_coordinates) {
2518 case 1: { 2593 case 1: {
2594 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2519 if (is_array) { 2595 if (is_array) {
2520 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2596 if (depth_compare) {
2521 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2597 coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " +
2522 coord = "vec2 coords = vec2(" + x + ", " + index + ");"; 2598 array_elem + ");";
2599 } else {
2600 coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");";
2601 }
2523 } else { 2602 } else {
2524 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2603 if (depth_compare) {
2525 coord = "float coords = " + x + ';'; 2604 coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");";
2605 } else {
2606 coord = "float coords = " + x + ';';
2607 }
2526 } 2608 }
2527 break; 2609 break;
2528 } 2610 }
2529 case 2: { 2611 case 2: {
2612 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2613 const std::string y =
2614 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2530 if (is_array) { 2615 if (is_array) {
2531 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2616 if (depth_compare) {
2532 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2617 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value +
2533 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2618 ", " + array_elem + ");";
2534 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; 2619 } else {
2620 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");";
2621 }
2535 } else { 2622 } else {
2536 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2623 if (depth_compare) {
2537 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2624 coord =
2538 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2625 "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");";
2626 } else {
2627 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2628 }
2539 } 2629 }
2540 break; 2630 break;
2541 } 2631 }
2542 case 3: { 2632 case 3: {
2543 if (depth_compare) { 2633 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2544 if (is_array) { 2634 const std::string y =
2545 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2635 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2546 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2636 const std::string z =
2547 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2637 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
2548 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2638 if (is_array) {
2549 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + 2639 depth_compare_extra = depth_compare;
2550 ");"; 2640 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2551 } else { 2641 array_elem + ");";
2552 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2553 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2554 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2555 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2556 }
2557 } else { 2642 } else {
2558 if (is_array) { 2643 if (depth_compare) {
2559 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2644 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2560 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2645 depth_value + ");";
2561 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2562 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 3);
2563 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2564 ");";
2565 } else { 2646 } else {
2566 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2567 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2568 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2569 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; 2647 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2570 } 2648 }
2571 } 2649 }
@@ -2581,82 +2659,85 @@ private:
2581 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2659 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2582 texture_type = Tegra::Shader::TextureType::Texture2D; 2660 texture_type = Tegra::Shader::TextureType::Texture2D;
2583 } 2661 }
2584 // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
2585 // or lod.
2586 2662
2587 const std::string sampler = 2663 const std::string sampler =
2588 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2664 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2589 // Add an extra scope and declare the texture coords inside to prevent 2665 // Add an extra scope and declare the texture coords inside to prevent
2590 // overwriting them in case they are used as outputs of the texs instruction. 2666 // overwriting them in case they are used as outputs of the texs instruction.
2591 2667
2592 shader.AddLine("{"); 2668 shader.AddLine('{');
2593 ++shader.scope; 2669 ++shader.scope;
2594 shader.AddLine(coord); 2670 shader.AddLine(coord);
2595 std::string texture; 2671 std::string texture;
2596 2672
2597 switch (instr.tex.GetTextureProcessMode()) { 2673 switch (instr.tex.GetTextureProcessMode()) {
2598 case Tegra::Shader::TextureProcessMode::None: { 2674 case Tegra::Shader::TextureProcessMode::None: {
2599 texture = "texture(" + sampler + ", coords)"; 2675 if (!depth_compare_extra) {
2676 texture = "texture(" + sampler + ", coords)";
2677 } else {
2678 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2679 }
2600 break; 2680 break;
2601 } 2681 }
2602 case Tegra::Shader::TextureProcessMode::LZ: { 2682 case Tegra::Shader::TextureProcessMode::LZ: {
2603 texture = "textureLod(" + sampler + ", coords, 0.0)"; 2683 if (!depth_compare_extra) {
2684 texture = "textureLod(" + sampler + ", coords, 0.0)";
2685 } else {
2686 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2687 }
2604 break; 2688 break;
2605 } 2689 }
2606 case Tegra::Shader::TextureProcessMode::LB: 2690 case Tegra::Shader::TextureProcessMode::LB:
2607 case Tegra::Shader::TextureProcessMode::LBA: { 2691 case Tegra::Shader::TextureProcessMode::LBA: {
2608 const std::string bias = [&]() {
2609 if (depth_compare) {
2610 if (is_array)
2611 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 2);
2612 else
2613 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2614 } else {
2615 return regs.GetRegisterAsFloat(instr.gpr20);
2616 }
2617 }();
2618 shader.AddLine("float bias = " + bias + ';');
2619
2620 // TODO: Figure if A suffix changes the equation at all. 2692 // TODO: Figure if A suffix changes the equation at all.
2621 texture = "texture(" + sampler + ", coords, bias)"; 2693 if (!depth_compare_extra) {
2694 texture = "texture(" + sampler + ", coords, " + lod_value + ')';
2695 } else {
2696 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2697 LOG_WARNING(HW_GPU,
2698 "OpenGL Limitation: can't set bias value along depth compare");
2699 }
2622 break; 2700 break;
2623 } 2701 }
2624 case Tegra::Shader::TextureProcessMode::LL: 2702 case Tegra::Shader::TextureProcessMode::LL:
2625 case Tegra::Shader::TextureProcessMode::LLA: { 2703 case Tegra::Shader::TextureProcessMode::LLA: {
2626 const std::string lod = [&]() {
2627 if (num_coordinates <= 2) {
2628 return regs.GetRegisterAsFloat(instr.gpr20);
2629 } else {
2630 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2631 }
2632 }();
2633 shader.AddLine("float lod = " + lod + ';');
2634
2635 // TODO: Figure if A suffix changes the equation at all. 2704 // TODO: Figure if A suffix changes the equation at all.
2636 texture = "textureLod(" + sampler + ", coords, lod)"; 2705 if (!depth_compare_extra) {
2706 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2707 } else {
2708 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2709 LOG_WARNING(HW_GPU,
2710 "OpenGL Limitation: can't set lod value along depth compare");
2711 }
2637 break; 2712 break;
2638 } 2713 }
2639 default: { 2714 default: {
2640 texture = "texture(" + sampler + ", coords)"; 2715 if (!depth_compare_extra) {
2716 texture = "texture(" + sampler + ", coords)";
2717 } else {
2718 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2719 }
2641 UNIMPLEMENTED_MSG("Unhandled texture process mode {}", 2720 UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
2642 static_cast<u32>(instr.tex.GetTextureProcessMode())); 2721 static_cast<u32>(instr.tex.GetTextureProcessMode()));
2643 } 2722 }
2644 } 2723 }
2645 if (!depth_compare) { 2724 if (!depth_compare) {
2725 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2646 std::size_t dest_elem{}; 2726 std::size_t dest_elem{};
2647 for (std::size_t elem = 0; elem < 4; ++elem) { 2727 for (std::size_t elem = 0; elem < 4; ++elem) {
2648 if (!instr.tex.IsComponentEnabled(elem)) { 2728 if (!instr.tex.IsComponentEnabled(elem)) {
2649 // Skip disabled components 2729 // Skip disabled components
2650 continue; 2730 continue;
2651 } 2731 }
2652 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2732 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
2733 dest_elem);
2653 ++dest_elem; 2734 ++dest_elem;
2654 } 2735 }
2655 } else { 2736 } else {
2656 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); 2737 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
2657 } 2738 }
2658 --shader.scope; 2739 --shader.scope;
2659 shader.AddLine("}"); 2740 shader.AddLine('}');
2660 break; 2741 break;
2661 } 2742 }
2662 case OpCode::Id::TEXS: { 2743 case OpCode::Id::TEXS: {
@@ -2669,41 +2750,76 @@ private:
2669 const bool depth_compare = 2750 const bool depth_compare =
2670 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2751 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2671 u32 num_coordinates = TextureCoordinates(texture_type); 2752 u32 num_coordinates = TextureCoordinates(texture_type);
2672 if (depth_compare) 2753 const auto process_mode = instr.texs.GetTextureProcessMode();
2673 num_coordinates += 1; 2754 std::string lod_value;
2674 2755 std::string coord;
2675 // Scope to avoid variable name overlaps. 2756 u32 lod_offset = 0;
2676 shader.AddLine('{'); 2757 if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
2677 ++shader.scope; 2758 if (num_coordinates > 2) {
2759 lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2760 lod_offset = 2;
2761 } else {
2762 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2763 lod_offset = 1;
2764 }
2765 }
2678 2766
2679 switch (num_coordinates) { 2767 switch (num_coordinates) {
2768 case 1: {
2769 coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';';
2770 break;
2771 }
2680 case 2: { 2772 case 2: {
2681 if (is_array) { 2773 if (is_array) {
2682 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2774 if (depth_compare) {
2683 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2775 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2684 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2776 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2685 shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"); 2777 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2778 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2779 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2780 ");";
2781 } else {
2782 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2783 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2784 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2785 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
2786 }
2686 } else { 2787 } else {
2687 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2788 if (lod_offset != 0) {
2688 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2789 if (depth_compare) {
2689 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); 2790 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2791 const std::string y =
2792 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2793 const std::string z =
2794 regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
2795 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2796 } else {
2797 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2798 const std::string y =
2799 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2800 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2801 }
2802 } else {
2803 if (depth_compare) {
2804 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2805 const std::string y =
2806 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2807 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2808 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2809 } else {
2810 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2811 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2812 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2813 }
2814 }
2690 } 2815 }
2691 break; 2816 break;
2692 } 2817 }
2693 case 3: { 2818 case 3: {
2694 if (is_array) { 2819 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2695 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2820 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2696 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2821 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2697 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2822 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2698 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2699 shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2700 index + ");");
2701 } else {
2702 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2703 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2704 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2705 shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
2706 }
2707 break; 2823 break;
2708 } 2824 }
2709 default: 2825 default:
@@ -2713,14 +2829,14 @@ private:
2713 // Fallback to interpreting as a 2D texture for now 2829 // Fallback to interpreting as a 2D texture for now
2714 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2830 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2715 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2831 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2716 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); 2832 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2717 texture_type = Tegra::Shader::TextureType::Texture2D; 2833 texture_type = Tegra::Shader::TextureType::Texture2D;
2718 is_array = false; 2834 is_array = false;
2719 } 2835 }
2720 const std::string sampler = 2836 const std::string sampler =
2721 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2837 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2722 std::string texture; 2838 std::string texture;
2723 switch (instr.texs.GetTextureProcessMode()) { 2839 switch (process_mode) {
2724 case Tegra::Shader::TextureProcessMode::None: { 2840 case Tegra::Shader::TextureProcessMode::None: {
2725 texture = "texture(" + sampler + ", coords)"; 2841 texture = "texture(" + sampler + ", coords)";
2726 break; 2842 break;
@@ -2734,8 +2850,7 @@ private:
2734 break; 2850 break;
2735 } 2851 }
2736 case Tegra::Shader::TextureProcessMode::LL: { 2852 case Tegra::Shader::TextureProcessMode::LL: {
2737 const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2853 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2738 texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
2739 break; 2854 break;
2740 } 2855 }
2741 default: { 2856 default: {
@@ -2745,13 +2860,11 @@ private:
2745 } 2860 }
2746 } 2861 }
2747 if (!depth_compare) { 2862 if (!depth_compare) {
2748 WriteTexsInstruction(instr, texture); 2863 WriteTexsInstruction(instr, coord, texture);
2749 } else { 2864 } else {
2750 WriteTexsInstruction(instr, "vec4(" + texture + ')'); 2865 WriteTexsInstruction(instr, coord, "vec4(" + texture + ')');
2751 } 2866 }
2752 2867
2753 shader.AddLine('}');
2754 --shader.scope;
2755 break; 2868 break;
2756 } 2869 }
2757 case OpCode::Id::TLDS: { 2870 case OpCode::Id::TLDS: {
@@ -2773,11 +2886,12 @@ private:
2773 // Scope to avoid variable name overlaps. 2886 // Scope to avoid variable name overlaps.
2774 shader.AddLine('{'); 2887 shader.AddLine('{');
2775 ++shader.scope; 2888 ++shader.scope;
2889 std::string coords;
2776 2890
2777 switch (texture_type) { 2891 switch (texture_type) {
2778 case Tegra::Shader::TextureType::Texture1D: { 2892 case Tegra::Shader::TextureType::Texture1D: {
2779 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2893 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2780 shader.AddLine("int coords = " + x + ';'); 2894 coords = "float coords = " + x + ';';
2781 break; 2895 break;
2782 } 2896 }
2783 case Tegra::Shader::TextureType::Texture2D: { 2897 case Tegra::Shader::TextureType::Texture2D: {
@@ -2785,7 +2899,8 @@ private:
2785 2899
2786 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2900 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2787 const std::string y = regs.GetRegisterAsInteger(instr.gpr20); 2901 const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
2788 shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); 2902 // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
2903 coords = "ivec2 coords = ivec2(" + x + ", " + y + ");";
2789 extra_op_offset = 1; 2904 extra_op_offset = 1;
2790 break; 2905 break;
2791 } 2906 }
@@ -2813,7 +2928,7 @@ private:
2813 static_cast<u32>(instr.tlds.GetTextureProcessMode())); 2928 static_cast<u32>(instr.tlds.GetTextureProcessMode()));
2814 } 2929 }
2815 } 2930 }
2816 WriteTexsInstruction(instr, texture); 2931 WriteTexsInstruction(instr, coords, texture);
2817 2932
2818 --shader.scope; 2933 --shader.scope;
2819 shader.AddLine('}'); 2934 shader.AddLine('}');
@@ -2872,14 +2987,17 @@ private:
2872 2987
2873 const std::string texture = "textureGather(" + sampler + ", coords, " + 2988 const std::string texture = "textureGather(" + sampler + ", coords, " +
2874 std::to_string(instr.tld4.component) + ')'; 2989 std::to_string(instr.tld4.component) + ')';
2990
2875 if (!depth_compare) { 2991 if (!depth_compare) {
2992 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2876 std::size_t dest_elem{}; 2993 std::size_t dest_elem{};
2877 for (std::size_t elem = 0; elem < 4; ++elem) { 2994 for (std::size_t elem = 0; elem < 4; ++elem) {
2878 if (!instr.tex.IsComponentEnabled(elem)) { 2995 if (!instr.tex.IsComponentEnabled(elem)) {
2879 // Skip disabled components 2996 // Skip disabled components
2880 continue; 2997 continue;
2881 } 2998 }
2882 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2999 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
3000 dest_elem);
2883 ++dest_elem; 3001 ++dest_elem;
2884 } 3002 }
2885 } else { 3003 } else {
@@ -2900,6 +3018,7 @@ private:
2900 // Scope to avoid variable name overlaps. 3018 // Scope to avoid variable name overlaps.
2901 shader.AddLine('{'); 3019 shader.AddLine('{');
2902 ++shader.scope; 3020 ++shader.scope;
3021 std::string coords;
2903 3022
2904 const bool depth_compare = 3023 const bool depth_compare =
2905 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 3024 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
@@ -2909,20 +3028,19 @@ private:
2909 const std::string sampler = GetSampler( 3028 const std::string sampler = GetSampler(
2910 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); 3029 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
2911 if (!depth_compare) { 3030 if (!depth_compare) {
2912 shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");"); 3031 coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
2913 } else { 3032 } else {
2914 // Note: TLD4S coordinate encoding works just like TEXS's 3033 // Note: TLD4S coordinate encoding works just like TEXS's
2915 shader.AddLine( 3034 const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2916 "float op_y = " + regs.GetRegisterAsFloat(instr.gpr8.Value() + 1) + ';'); 3035 coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");";
2917 shader.AddLine("vec3 coords = vec3(" + op_a + ", op_y, " + op_b + ");");
2918 } 3036 }
2919 const std::string texture = "textureGather(" + sampler + ", coords, " + 3037 const std::string texture = "textureGather(" + sampler + ", coords, " +
2920 std::to_string(instr.tld4s.component) + ')'; 3038 std::to_string(instr.tld4s.component) + ')';
2921 3039
2922 if (!depth_compare) { 3040 if (!depth_compare) {
2923 WriteTexsInstruction(instr, texture); 3041 WriteTexsInstruction(instr, coords, texture);
2924 } else { 3042 } else {
2925 WriteTexsInstruction(instr, "vec4(" + texture + ')'); 3043 WriteTexsInstruction(instr, coords, "vec4(" + texture + ')');
2926 } 3044 }
2927 3045
2928 --shader.scope; 3046 --shader.scope;
@@ -3218,6 +3336,34 @@ private:
3218 } 3336 }
3219 break; 3337 break;
3220 } 3338 }
3339 case OpCode::Type::RegisterSetPredicate: {
3340 UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
3341
3342 const std::string apply_mask = [&]() {
3343 switch (opcode->get().GetId()) {
3344 case OpCode::Id::R2P_IMM:
3345 return std::to_string(instr.r2p.immediate_mask);
3346 default:
3347 UNREACHABLE();
3348 }
3349 }();
3350 const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
3351 " >> " + std::to_string(instr.r2p.byte) + ')';
3352
3353 constexpr u64 programmable_preds = 7;
3354 for (u64 pred = 0; pred < programmable_preds; ++pred) {
3355 const auto shift = std::to_string(1 << pred);
3356
3357 shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {");
3358 ++shader.scope;
3359
3360 SetPredicate(pred, '(' + mask + " & " + shift + ") != 0");
3361
3362 --shader.scope;
3363 shader.AddLine('}');
3364 }
3365 break;
3366 }
3221 case OpCode::Type::FloatSet: { 3367 case OpCode::Type::FloatSet: {
3222 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), 3368 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8),
3223 instr.fset.abs_a != 0, instr.fset.neg_a != 0); 3369 instr.fset.abs_a != 0, instr.fset.neg_a != 0);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1492e063a..4fd0d66c5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -19,9 +19,9 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "core/telemetry_session.h" 20#include "core/telemetry_session.h"
21#include "core/tracer/recorder.h" 21#include "core/tracer/recorder.h"
22#include "video_core/morton.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/renderer_opengl.h" 24#include "video_core/renderer_opengl/renderer_opengl.h"
24#include "video_core/utils.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27 27
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
deleted file mode 100644
index e0a14d48f..000000000
--- a/src/video_core/utils.h
+++ /dev/null
@@ -1,164 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore {
10
11// 8x8 Z-Order coordinate from 2D coordinates
12static inline u32 MortonInterleave(u32 x, u32 y) {
13 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
14 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
15 return xlut[x % 8] + ylut[y % 8];
16}
17
18/**
19 * Calculates the offset of the position of the pixel in Morton order
20 */
21static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
22 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
23 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
24 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
25 // texels are laid out in a 2x2 subtile like this:
26 // 2 3
27 // 0 1
28 //
29 // The full 8x8 tile has the texels arranged like this:
30 //
31 // 42 43 46 47 58 59 62 63
32 // 40 41 44 45 56 57 60 61
33 // 34 35 38 39 50 51 54 55
34 // 32 33 36 37 48 49 52 53
35 // 10 11 14 15 26 27 30 31
36 // 08 09 12 13 24 25 28 29
37 // 02 03 06 07 18 19 22 23
38 // 00 01 04 05 16 17 20 21
39 //
40 // This pattern is what's called Z-order curve, or Morton order.
41
42 const unsigned int block_height = 8;
43 const unsigned int coarse_x = x & ~7;
44
45 u32 i = VideoCore::MortonInterleave(x, y);
46
47 const unsigned int offset = coarse_x * block_height;
48
49 return (i + offset) * bytes_per_pixel;
50}
51
52static inline u32 MortonInterleave128(u32 x, u32 y) {
53 // 128x128 Z-Order coordinate from 2D coordinates
54 static constexpr u32 xlut[] = {
55 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
56 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
57 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
58 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
59 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
60 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
61 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
62 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
63 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
64 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
65 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
66 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
67 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
68 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
69 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
70 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
71 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
72 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
73 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
74 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
75 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
76 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
77 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
78 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
79 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
80 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
81 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
82 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
83 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
84 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
85 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
86 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
87 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
88 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
89 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
90 };
91 static constexpr u32 ylut[] = {
92 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
93 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
94 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
95 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
96 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
97 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
98 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
99 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
100 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
101 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
102 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
103 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
104 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
105 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
106 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
107 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
108 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
109 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
110 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
111 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
112 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
113 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
114 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
115 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
116 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
117 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
118 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
119 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
120 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
121 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
122 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
123 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
124 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
125 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
126 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
127 };
128 return xlut[x % 128] + ylut[y % 128];
129}
130
131static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
132 // Calculates the offset of the position of the pixel in Morton order
133 // Framebuffer images are split into 128x128 tiles.
134
135 const unsigned int block_height = 128;
136 const unsigned int coarse_x = x & ~127;
137
138 u32 i = MortonInterleave128(x, y);
139
140 const unsigned int offset = coarse_x * block_height;
141
142 return (i + offset) * bytes_per_pixel;
143}
144
145static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel,
146 u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data,
147 bool morton_to_gl) {
148 u8* data_ptrs[2];
149 for (unsigned y = 0; y < height; ++y) {
150 for (unsigned x = 0; x < width; ++x) {
151 const u32 coarse_y = y & ~127;
152 u32 morton_offset =
153 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
154 u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
155
156 data_ptrs[morton_to_gl] = morton_data + morton_offset;
157 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
158
159 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
160 }
161 }
162}
163
164} // namespace VideoCore