summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/kernel/svc.cpp3
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp18
-rw-r--r--src/core/hle/service/nvdrv/interface.h2
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/engines/shader_bytecode.h16
-rw-r--r--src/video_core/engines/shader_header.h11
-rw-r--r--src/video_core/memory_manager.cpp7
-rw-r--r--src/video_core/memory_manager.h3
-rw-r--r--src/video_core/morton.cpp353
-rw-r--r--src/video_core/morton.h21
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp193
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp398
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/utils.h164
14 files changed, 708 insertions, 486 deletions
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index b8b6b4d49..f287f7c97 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -671,7 +671,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
671 break; 671 break;
672 } 672 }
673 default: 673 default:
674 UNIMPLEMENTED(); 674 LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
675 return ERR_INVALID_ENUM_VALUE;
675 } 676 }
676 677
677 return RESULT_SUCCESS; 678 return RESULT_SUCCESS;
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index ac3859353..602086eed 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -88,6 +88,20 @@ void NVDRV::FinishInitialize(Kernel::HLERequestContext& ctx) {
88 rb.Push(RESULT_SUCCESS); 88 rb.Push(RESULT_SUCCESS);
89} 89}
90 90
91void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) {
92 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
93 IPC::ResponseBuilder rb{ctx, 2};
94 rb.Push(RESULT_SUCCESS);
95}
96
97void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
98 // According to SwitchBrew, this has no inputs and no outputs, so effectively does nothing on
99 // retail hardware.
100 LOG_DEBUG(Service_NVDRV, "called");
101 IPC::ResponseBuilder rb{ctx, 2};
102 rb.Push(RESULT_SUCCESS);
103}
104
91NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) 105NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
92 : ServiceFramework(name), nvdrv(std::move(nvdrv)) { 106 : ServiceFramework(name), nvdrv(std::move(nvdrv)) {
93 static const FunctionInfo functions[] = { 107 static const FunctionInfo functions[] = {
@@ -97,10 +111,10 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
97 {3, &NVDRV::Initialize, "Initialize"}, 111 {3, &NVDRV::Initialize, "Initialize"},
98 {4, &NVDRV::QueryEvent, "QueryEvent"}, 112 {4, &NVDRV::QueryEvent, "QueryEvent"},
99 {5, nullptr, "MapSharedMem"}, 113 {5, nullptr, "MapSharedMem"},
100 {6, nullptr, "GetStatus"}, 114 {6, &NVDRV::GetStatus, "GetStatus"},
101 {7, nullptr, "ForceSetClientPID"}, 115 {7, nullptr, "ForceSetClientPID"},
102 {8, &NVDRV::SetClientPID, "SetClientPID"}, 116 {8, &NVDRV::SetClientPID, "SetClientPID"},
103 {9, nullptr, "DumpGraphicsMemoryInfo"}, 117 {9, &NVDRV::DumpGraphicsMemoryInfo, "DumpGraphicsMemoryInfo"},
104 {10, nullptr, "InitializeDevtools"}, 118 {10, nullptr, "InitializeDevtools"},
105 {11, &NVDRV::Ioctl, "Ioctl2"}, 119 {11, &NVDRV::Ioctl, "Ioctl2"},
106 {12, nullptr, "Ioctl3"}, 120 {12, nullptr, "Ioctl3"},
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index d340893c2..5a1e4baa7 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -24,6 +24,8 @@ private:
24 void QueryEvent(Kernel::HLERequestContext& ctx); 24 void QueryEvent(Kernel::HLERequestContext& ctx);
25 void SetClientPID(Kernel::HLERequestContext& ctx); 25 void SetClientPID(Kernel::HLERequestContext& ctx);
26 void FinishInitialize(Kernel::HLERequestContext& ctx); 26 void FinishInitialize(Kernel::HLERequestContext& ctx);
27 void GetStatus(Kernel::HLERequestContext& ctx);
28 void DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx);
27 29
28 std::shared_ptr<Module> nvdrv; 30 std::shared_ptr<Module> nvdrv;
29 31
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a780215c1..3f906a517 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -21,6 +21,8 @@ add_library(video_core STATIC
21 macro_interpreter.h 21 macro_interpreter.h
22 memory_manager.cpp 22 memory_manager.cpp
23 memory_manager.h 23 memory_manager.h
24 morton.cpp
25 morton.h
24 rasterizer_cache.cpp 26 rasterizer_cache.cpp
25 rasterizer_cache.h 27 rasterizer_cache.h
26 rasterizer_interface.h 28 rasterizer_interface.h
@@ -62,7 +64,6 @@ add_library(video_core STATIC
62 textures/decoders.cpp 64 textures/decoders.cpp
63 textures/decoders.h 65 textures/decoders.h
64 textures/texture.h 66 textures/texture.h
65 utils.h
66 video_core.cpp 67 video_core.cpp
67 video_core.h 68 video_core.h
68) 69)
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7e8449bc4..52d03aee8 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -82,6 +82,8 @@ union Attribute {
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
85 ClipDistances0123 = 44,
86 ClipDistances4567 = 45,
85 PointCoord = 46, 87 PointCoord = 46,
86 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex 88 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
87 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval 89 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
@@ -366,6 +368,11 @@ enum class HalfPrecision : u64 {
366 FMZ = 2, 368 FMZ = 2,
367}; 369};
368 370
371enum class R2pMode : u64 {
372 Pr = 0,
373 Cc = 1,
374};
375
369enum class IpaInterpMode : u64 { 376enum class IpaInterpMode : u64 {
370 Linear = 0, 377 Linear = 0,
371 Perspective = 1, 378 Perspective = 1,
@@ -855,6 +862,12 @@ union Instruction {
855 } hsetp2; 862 } hsetp2;
856 863
857 union { 864 union {
865 BitField<40, 1, R2pMode> mode;
866 BitField<41, 2, u64> byte;
867 BitField<20, 7, u64> immediate_mask;
868 } r2p;
869
870 union {
858 BitField<39, 3, u64> pred39; 871 BitField<39, 3, u64> pred39;
859 BitField<42, 1, u64> neg_pred; 872 BitField<42, 1, u64> neg_pred;
860 BitField<43, 1, u64> neg_a; 873 BitField<43, 1, u64> neg_a;
@@ -1381,6 +1394,7 @@ public:
1381 PSETP, 1394 PSETP,
1382 PSET, 1395 PSET,
1383 CSETP, 1396 CSETP,
1397 R2P_IMM,
1384 XMAD_IMM, 1398 XMAD_IMM,
1385 XMAD_CR, 1399 XMAD_CR,
1386 XMAD_RC, 1400 XMAD_RC,
@@ -1410,6 +1424,7 @@ public:
1410 HalfSetPredicate, 1424 HalfSetPredicate,
1411 PredicateSetPredicate, 1425 PredicateSetPredicate,
1412 PredicateSetRegister, 1426 PredicateSetRegister,
1427 RegisterSetPredicate,
1413 Conversion, 1428 Conversion,
1414 Xmad, 1429 Xmad,
1415 Unknown, 1430 Unknown,
@@ -1647,6 +1662,7 @@ private:
1647 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), 1662 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
1648 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 1663 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
1649 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), 1664 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
1665 INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
1650 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), 1666 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
1651 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), 1667 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
1652 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), 1668 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a0e015c4b..99c34649f 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -62,7 +62,16 @@ struct Header {
62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32] 63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
64 INSERT_PADDING_BYTES(2); // ImapColor 64 INSERT_PADDING_BYTES(2); // ImapColor
65 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 65 union {
66 BitField<0, 8, u16> clip_distances;
67 BitField<8, 1, u16> point_sprite_s;
68 BitField<9, 1, u16> point_sprite_t;
69 BitField<10, 1, u16> fog_coordinate;
70 BitField<12, 1, u16> tessellation_eval_point_u;
71 BitField<13, 1, u16> tessellation_eval_point_v;
72 BitField<14, 1, u16> instance_id;
73 BitField<15, 1, u16> vertex_id;
74 };
66 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] 75 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
67 INSERT_PADDING_BYTES(1); // ImapReserved 76 INSERT_PADDING_BYTES(1); // ImapReserved
68 INSERT_PADDING_BYTES(3); // OmapSystemValuesA 77 INSERT_PADDING_BYTES(3); // OmapSystemValuesA
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 77a20bb84..47247f097 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,6 +9,13 @@
9 9
10namespace Tegra { 10namespace Tegra {
11 11
12MemoryManager::MemoryManager() {
13 // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
14 // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
15 // Undertale using 0 for a render target.
16 PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
17}
18
12GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { 19GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
13 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; 20 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
14 21
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 4eb338aa2..fb03497ca 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -18,7 +18,7 @@ using GPUVAddr = u64;
18 18
19class MemoryManager final { 19class MemoryManager final {
20public: 20public:
21 MemoryManager() = default; 21 MemoryManager();
22 22
23 GPUVAddr AllocateSpace(u64 size, u64 align); 23 GPUVAddr AllocateSpace(u64 size, u64 align);
24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); 24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
@@ -37,6 +37,7 @@ private:
37 enum class PageStatus : u64 { 37 enum class PageStatus : u64 {
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 38 Unmapped = 0xFFFFFFFFFFFFFFFFULL,
39 Allocated = 0xFFFFFFFFFFFFFFFEULL, 39 Allocated = 0xFFFFFFFFFFFFFFFEULL,
40 Reserved = 0xFFFFFFFFFFFFFFFDULL,
40 }; 41 };
41 42
42 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 43 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
new file mode 100644
index 000000000..f14abba7d
--- /dev/null
+++ b/src/video_core/morton.cpp
@@ -0,0 +1,353 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h"
11#include "video_core/surface.h"
12#include "video_core/textures/decoders.h"
13
14namespace VideoCore {
15
16using Surface::GetBytesPerPixel;
17using Surface::PixelFormat;
18
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21
22template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u8* buffer, std::size_t buffer_size, VAddr addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
28 // pixel values.
29 const u32 tile_size_x{GetDefaultBlockWidth(format)};
30 const u32 tile_size_y{GetDefaultBlockHeight(format)};
31
32 if constexpr (morton_to_linear) {
33 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
34 stride, height, depth, block_height, block_depth);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
39 buffer, false, block_height, block_depth);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 // clang-format off
45 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::G8R8U>,
69 MortonCopy<true, PixelFormat::G8R8S>,
70 MortonCopy<true, PixelFormat::BGRA8>,
71 MortonCopy<true, PixelFormat::RGBA32F>,
72 MortonCopy<true, PixelFormat::RG32F>,
73 MortonCopy<true, PixelFormat::R32F>,
74 MortonCopy<true, PixelFormat::R16F>,
75 MortonCopy<true, PixelFormat::R16U>,
76 MortonCopy<true, PixelFormat::R16S>,
77 MortonCopy<true, PixelFormat::R16UI>,
78 MortonCopy<true, PixelFormat::R16I>,
79 MortonCopy<true, PixelFormat::RG16>,
80 MortonCopy<true, PixelFormat::RG16F>,
81 MortonCopy<true, PixelFormat::RG16UI>,
82 MortonCopy<true, PixelFormat::RG16I>,
83 MortonCopy<true, PixelFormat::RG16S>,
84 MortonCopy<true, PixelFormat::RGB32F>,
85 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
86 MortonCopy<true, PixelFormat::RG8U>,
87 MortonCopy<true, PixelFormat::RG8S>,
88 MortonCopy<true, PixelFormat::RG32UI>,
89 MortonCopy<true, PixelFormat::R32UI>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
91 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
92 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
93 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
94 MortonCopy<true, PixelFormat::DXT1_SRGB>,
95 MortonCopy<true, PixelFormat::DXT23_SRGB>,
96 MortonCopy<true, PixelFormat::DXT45_SRGB>,
97 MortonCopy<true, PixelFormat::BC7U_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
103 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
105 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
106 MortonCopy<true, PixelFormat::Z32F>,
107 MortonCopy<true, PixelFormat::Z16>,
108 MortonCopy<true, PixelFormat::Z24S8>,
109 MortonCopy<true, PixelFormat::S8Z24>,
110 MortonCopy<true, PixelFormat::Z32FS8>,
111 // clang-format on
112};
113
114static constexpr ConversionArray linear_to_morton_fns = {
115 // clang-format off
116 MortonCopy<false, PixelFormat::ABGR8U>,
117 MortonCopy<false, PixelFormat::ABGR8S>,
118 MortonCopy<false, PixelFormat::ABGR8UI>,
119 MortonCopy<false, PixelFormat::B5G6R5U>,
120 MortonCopy<false, PixelFormat::A2B10G10R10U>,
121 MortonCopy<false, PixelFormat::A1B5G5R5U>,
122 MortonCopy<false, PixelFormat::R8U>,
123 MortonCopy<false, PixelFormat::R8UI>,
124 MortonCopy<false, PixelFormat::RGBA16F>,
125 MortonCopy<false, PixelFormat::RGBA16U>,
126 MortonCopy<false, PixelFormat::RGBA16UI>,
127 MortonCopy<false, PixelFormat::R11FG11FB10F>,
128 MortonCopy<false, PixelFormat::RGBA32UI>,
129 MortonCopy<false, PixelFormat::DXT1>,
130 MortonCopy<false, PixelFormat::DXT23>,
131 MortonCopy<false, PixelFormat::DXT45>,
132 MortonCopy<false, PixelFormat::DXN1>,
133 MortonCopy<false, PixelFormat::DXN2UNORM>,
134 MortonCopy<false, PixelFormat::DXN2SNORM>,
135 MortonCopy<false, PixelFormat::BC7U>,
136 MortonCopy<false, PixelFormat::BC6H_UF16>,
137 MortonCopy<false, PixelFormat::BC6H_SF16>,
138 // TODO(Subv): Swizzling ASTC formats are not supported
139 nullptr,
140 MortonCopy<false, PixelFormat::G8R8U>,
141 MortonCopy<false, PixelFormat::G8R8S>,
142 MortonCopy<false, PixelFormat::BGRA8>,
143 MortonCopy<false, PixelFormat::RGBA32F>,
144 MortonCopy<false, PixelFormat::RG32F>,
145 MortonCopy<false, PixelFormat::R32F>,
146 MortonCopy<false, PixelFormat::R16F>,
147 MortonCopy<false, PixelFormat::R16U>,
148 MortonCopy<false, PixelFormat::R16S>,
149 MortonCopy<false, PixelFormat::R16UI>,
150 MortonCopy<false, PixelFormat::R16I>,
151 MortonCopy<false, PixelFormat::RG16>,
152 MortonCopy<false, PixelFormat::RG16F>,
153 MortonCopy<false, PixelFormat::RG16UI>,
154 MortonCopy<false, PixelFormat::RG16I>,
155 MortonCopy<false, PixelFormat::RG16S>,
156 MortonCopy<false, PixelFormat::RGB32F>,
157 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
158 MortonCopy<false, PixelFormat::RG8U>,
159 MortonCopy<false, PixelFormat::RG8S>,
160 MortonCopy<false, PixelFormat::RG32UI>,
161 MortonCopy<false, PixelFormat::R32UI>,
162 nullptr,
163 nullptr,
164 nullptr,
165 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
166 MortonCopy<false, PixelFormat::DXT1_SRGB>,
167 MortonCopy<false, PixelFormat::DXT23_SRGB>,
168 MortonCopy<false, PixelFormat::DXT45_SRGB>,
169 MortonCopy<false, PixelFormat::BC7U_SRGB>,
170 nullptr,
171 nullptr,
172 nullptr,
173 nullptr,
174 nullptr,
175 nullptr,
176 nullptr,
177 nullptr,
178 MortonCopy<false, PixelFormat::Z32F>,
179 MortonCopy<false, PixelFormat::Z16>,
180 MortonCopy<false, PixelFormat::Z24S8>,
181 MortonCopy<false, PixelFormat::S8Z24>,
182 MortonCopy<false, PixelFormat::Z32FS8>,
183 // clang-format on
184};
185
186constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
187 switch (mode) {
188 case MortonSwizzleMode::MortonToLinear:
189 return morton_to_linear_fns[static_cast<std::size_t>(format)];
190 case MortonSwizzleMode::LinearToMorton:
191 return linear_to_morton_fns[static_cast<std::size_t>(format)];
192 }
193}
194
195/// 8x8 Z-Order coordinate from 2D coordinates
196static u32 MortonInterleave(u32 x, u32 y) {
197 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
198 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
199 return xlut[x % 8] + ylut[y % 8];
200}
201
202/// Calculates the offset of the position of the pixel in Morton order
203static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
204 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
205 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
206 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
207 // texels are laid out in a 2x2 subtile like this:
208 // 2 3
209 // 0 1
210 //
211 // The full 8x8 tile has the texels arranged like this:
212 //
213 // 42 43 46 47 58 59 62 63
214 // 40 41 44 45 56 57 60 61
215 // 34 35 38 39 50 51 54 55
216 // 32 33 36 37 48 49 52 53
217 // 10 11 14 15 26 27 30 31
218 // 08 09 12 13 24 25 28 29
219 // 02 03 06 07 18 19 22 23
220 // 00 01 04 05 16 17 20 21
221 //
222 // This pattern is what's called Z-order curve, or Morton order.
223
224 const unsigned int block_height = 8;
225 const unsigned int coarse_x = x & ~7;
226
227 u32 i = MortonInterleave(x, y);
228
229 const unsigned int offset = coarse_x * block_height;
230
231 return (i + offset) * bytes_per_pixel;
232}
233
234static u32 MortonInterleave128(u32 x, u32 y) {
235 // 128x128 Z-Order coordinate from 2D coordinates
236 static constexpr u32 xlut[] = {
237 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
238 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
239 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
240 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
241 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
242 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
243 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
244 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
245 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
246 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
247 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
248 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
249 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
250 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
251 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
252 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
253 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
254 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
255 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
256 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
257 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
258 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
259 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
260 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
261 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
262 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
263 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
264 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
265 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
266 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
267 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
268 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
269 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
270 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
271 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
272 };
273 static constexpr u32 ylut[] = {
274 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
275 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
276 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
277 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
278 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
279 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
280 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
281 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
282 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
283 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
284 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
285 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
286 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
287 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
288 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
289 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
290 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
291 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
292 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
293 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
294 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
295 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
296 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
297 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
298 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
299 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
300 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
301 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
302 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
303 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
304 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
305 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
306 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
307 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
308 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
309 };
310 return xlut[x % 128] + ylut[y % 128];
311}
312
313static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
314 // Calculates the offset of the position of the pixel in Morton order
315 // Framebuffer images are split into 128x128 tiles.
316
317 constexpr u32 block_height = 128;
318 const u32 coarse_x = x & ~127;
319
320 const u32 i = MortonInterleave128(x, y);
321
322 const u32 offset = coarse_x * block_height;
323
324 return (i + offset) * bytes_per_pixel;
325}
326
327void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
328 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
329 std::size_t buffer_size, VAddr addr) {
330
331 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer,
332 buffer_size, addr);
333}
334
335void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
336 u8* morton_data, u8* linear_data, bool morton_to_linear) {
337 u8* data_ptrs[2];
338 for (u32 y = 0; y < height; ++y) {
339 for (u32 x = 0; x < width; ++x) {
340 const u32 coarse_y = y & ~127;
341 const u32 morton_offset =
342 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
343 const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
344
345 data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
346 data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
347
348 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
349 }
350 }
351}
352
353} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
new file mode 100644
index 000000000..b9b9eca86
--- /dev/null
+++ b/src/video_core/morton.h
@@ -0,0 +1,21 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
16 std::size_t buffer_size, VAddr addr);
17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear);
20
21} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 4f434fc31..d458f77e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
15#include "core/memory.h" 15#include "core/memory.h"
16#include "core/settings.h" 16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/morton.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h" 19#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 20#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 21#include "video_core/renderer_opengl/gl_state.h"
@@ -22,10 +23,11 @@
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25#include "video_core/utils.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
28 28
29using VideoCore::MortonSwizzle;
30using VideoCore::MortonSwizzleMode;
29using VideoCore::Surface::ComponentTypeFromDepthFormat; 31using VideoCore::Surface::ComponentTypeFromDepthFormat;
30using VideoCore::Surface::ComponentTypeFromRenderTarget; 32using VideoCore::Surface::ComponentTypeFromRenderTarget;
31using VideoCore::Surface::ComponentTypeFromTexture; 33using VideoCore::Surface::ComponentTypeFromTexture;
@@ -370,174 +372,7 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
370 return {0, actual_height, MipWidth(mip_level), 0}; 372 return {0, actual_height, MipWidth(mip_level), 0};
371} 373}
372 374
373template <bool morton_to_gl, PixelFormat format> 375void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
374void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
375 std::size_t gl_buffer_size, VAddr addr) {
376 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
377
378 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
379 // pixel values.
380 const u32 tile_size_x{GetDefaultBlockWidth(format)};
381 const u32 tile_size_y{GetDefaultBlockHeight(format)};
382
383 if (morton_to_gl) {
384 Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
385 stride, height, depth, block_height, block_depth);
386 } else {
387 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
388 (height + tile_size_y - 1) / tile_size_y, depth,
389 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
390 gl_buffer, false, block_height, block_depth);
391 }
392}
393
394using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
395 VideoCore::Surface::MaxPixelFormat>;
396
397static constexpr GLConversionArray morton_to_gl_fns = {
398 // clang-format off
399 MortonCopy<true, PixelFormat::ABGR8U>,
400 MortonCopy<true, PixelFormat::ABGR8S>,
401 MortonCopy<true, PixelFormat::ABGR8UI>,
402 MortonCopy<true, PixelFormat::B5G6R5U>,
403 MortonCopy<true, PixelFormat::A2B10G10R10U>,
404 MortonCopy<true, PixelFormat::A1B5G5R5U>,
405 MortonCopy<true, PixelFormat::R8U>,
406 MortonCopy<true, PixelFormat::R8UI>,
407 MortonCopy<true, PixelFormat::RGBA16F>,
408 MortonCopy<true, PixelFormat::RGBA16U>,
409 MortonCopy<true, PixelFormat::RGBA16UI>,
410 MortonCopy<true, PixelFormat::R11FG11FB10F>,
411 MortonCopy<true, PixelFormat::RGBA32UI>,
412 MortonCopy<true, PixelFormat::DXT1>,
413 MortonCopy<true, PixelFormat::DXT23>,
414 MortonCopy<true, PixelFormat::DXT45>,
415 MortonCopy<true, PixelFormat::DXN1>,
416 MortonCopy<true, PixelFormat::DXN2UNORM>,
417 MortonCopy<true, PixelFormat::DXN2SNORM>,
418 MortonCopy<true, PixelFormat::BC7U>,
419 MortonCopy<true, PixelFormat::BC6H_UF16>,
420 MortonCopy<true, PixelFormat::BC6H_SF16>,
421 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
422 MortonCopy<true, PixelFormat::G8R8U>,
423 MortonCopy<true, PixelFormat::G8R8S>,
424 MortonCopy<true, PixelFormat::BGRA8>,
425 MortonCopy<true, PixelFormat::RGBA32F>,
426 MortonCopy<true, PixelFormat::RG32F>,
427 MortonCopy<true, PixelFormat::R32F>,
428 MortonCopy<true, PixelFormat::R16F>,
429 MortonCopy<true, PixelFormat::R16U>,
430 MortonCopy<true, PixelFormat::R16S>,
431 MortonCopy<true, PixelFormat::R16UI>,
432 MortonCopy<true, PixelFormat::R16I>,
433 MortonCopy<true, PixelFormat::RG16>,
434 MortonCopy<true, PixelFormat::RG16F>,
435 MortonCopy<true, PixelFormat::RG16UI>,
436 MortonCopy<true, PixelFormat::RG16I>,
437 MortonCopy<true, PixelFormat::RG16S>,
438 MortonCopy<true, PixelFormat::RGB32F>,
439 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
440 MortonCopy<true, PixelFormat::RG8U>,
441 MortonCopy<true, PixelFormat::RG8S>,
442 MortonCopy<true, PixelFormat::RG32UI>,
443 MortonCopy<true, PixelFormat::R32UI>,
444 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
445 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
446 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
447 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
448 MortonCopy<true, PixelFormat::DXT1_SRGB>,
449 MortonCopy<true, PixelFormat::DXT23_SRGB>,
450 MortonCopy<true, PixelFormat::DXT45_SRGB>,
451 MortonCopy<true, PixelFormat::BC7U_SRGB>,
452 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
453 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
454 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
455 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
456 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
457 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
458 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
459 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
460 MortonCopy<true, PixelFormat::Z32F>,
461 MortonCopy<true, PixelFormat::Z16>,
462 MortonCopy<true, PixelFormat::Z24S8>,
463 MortonCopy<true, PixelFormat::S8Z24>,
464 MortonCopy<true, PixelFormat::Z32FS8>,
465 // clang-format on
466};
467
468static constexpr GLConversionArray gl_to_morton_fns = {
469 // clang-format off
470 MortonCopy<false, PixelFormat::ABGR8U>,
471 MortonCopy<false, PixelFormat::ABGR8S>,
472 MortonCopy<false, PixelFormat::ABGR8UI>,
473 MortonCopy<false, PixelFormat::B5G6R5U>,
474 MortonCopy<false, PixelFormat::A2B10G10R10U>,
475 MortonCopy<false, PixelFormat::A1B5G5R5U>,
476 MortonCopy<false, PixelFormat::R8U>,
477 MortonCopy<false, PixelFormat::R8UI>,
478 MortonCopy<false, PixelFormat::RGBA16F>,
479 MortonCopy<false, PixelFormat::RGBA16U>,
480 MortonCopy<false, PixelFormat::RGBA16UI>,
481 MortonCopy<false, PixelFormat::R11FG11FB10F>,
482 MortonCopy<false, PixelFormat::RGBA32UI>,
483 MortonCopy<false, PixelFormat::DXT1>,
484 MortonCopy<false, PixelFormat::DXT23>,
485 MortonCopy<false, PixelFormat::DXT45>,
486 MortonCopy<false, PixelFormat::DXN1>,
487 MortonCopy<false, PixelFormat::DXN2UNORM>,
488 MortonCopy<false, PixelFormat::DXN2SNORM>,
489 MortonCopy<false, PixelFormat::BC7U>,
490 MortonCopy<false, PixelFormat::BC6H_UF16>,
491 MortonCopy<false, PixelFormat::BC6H_SF16>,
492 // TODO(Subv): Swizzling ASTC formats are not supported
493 nullptr,
494 MortonCopy<false, PixelFormat::G8R8U>,
495 MortonCopy<false, PixelFormat::G8R8S>,
496 MortonCopy<false, PixelFormat::BGRA8>,
497 MortonCopy<false, PixelFormat::RGBA32F>,
498 MortonCopy<false, PixelFormat::RG32F>,
499 MortonCopy<false, PixelFormat::R32F>,
500 MortonCopy<false, PixelFormat::R16F>,
501 MortonCopy<false, PixelFormat::R16U>,
502 MortonCopy<false, PixelFormat::R16S>,
503 MortonCopy<false, PixelFormat::R16UI>,
504 MortonCopy<false, PixelFormat::R16I>,
505 MortonCopy<false, PixelFormat::RG16>,
506 MortonCopy<false, PixelFormat::RG16F>,
507 MortonCopy<false, PixelFormat::RG16UI>,
508 MortonCopy<false, PixelFormat::RG16I>,
509 MortonCopy<false, PixelFormat::RG16S>,
510 MortonCopy<false, PixelFormat::RGB32F>,
511 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
512 MortonCopy<false, PixelFormat::RG8U>,
513 MortonCopy<false, PixelFormat::RG8S>,
514 MortonCopy<false, PixelFormat::RG32UI>,
515 MortonCopy<false, PixelFormat::R32UI>,
516 nullptr,
517 nullptr,
518 nullptr,
519 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
520 MortonCopy<false, PixelFormat::DXT1_SRGB>,
521 MortonCopy<false, PixelFormat::DXT23_SRGB>,
522 MortonCopy<false, PixelFormat::DXT45_SRGB>,
523 MortonCopy<false, PixelFormat::BC7U_SRGB>,
524 nullptr,
525 nullptr,
526 nullptr,
527 nullptr,
528 nullptr,
529 nullptr,
530 nullptr,
531 nullptr,
532 MortonCopy<false, PixelFormat::Z32F>,
533 MortonCopy<false, PixelFormat::Z16>,
534 MortonCopy<false, PixelFormat::Z24S8>,
535 MortonCopy<false, PixelFormat::S8Z24>,
536 MortonCopy<false, PixelFormat::Z32FS8>,
537 // clang-format on
538};
539
540void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
541 std::vector<u8>& gl_buffer, u32 mip_level) { 376 std::vector<u8>& gl_buffer, u32 mip_level) {
542 u32 depth = params.MipDepth(mip_level); 377 u32 depth = params.MipDepth(mip_level);
543 if (params.target == SurfaceTarget::Texture2D) { 378 if (params.target == SurfaceTarget::Texture2D) {
@@ -550,19 +385,19 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
550 const u64 layer_size = params.LayerMemorySize(); 385 const u64 layer_size = params.LayerMemorySize();
551 const u64 gl_size = params.LayerSizeGL(mip_level); 386 const u64 gl_size = params.LayerSizeGL(mip_level);
552 for (u32 i = 0; i < params.depth; i++) { 387 for (u32 i = 0; i < params.depth; i++) {
553 functions[static_cast<std::size_t>(params.pixel_format)]( 388 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
554 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 389 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
555 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, 390 params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
556 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 391 params.addr + offset);
557 offset += layer_size; 392 offset += layer_size;
558 offset_gl += gl_size; 393 offset_gl += gl_size;
559 } 394 }
560 } else { 395 } else {
561 const u64 offset = params.GetMipmapLevelOffset(mip_level); 396 const u64 offset = params.GetMipmapLevelOffset(mip_level);
562 functions[static_cast<std::size_t>(params.pixel_format)]( 397 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
563 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 398 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
564 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), 399 params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(),
565 gl_buffer.size(), params.addr + offset); 400 params.addr + offset);
566 } 401 }
567} 402}
568 403
@@ -996,7 +831,7 @@ void CachedSurface::LoadGLBuffer() {
996 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 831 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
997 params.block_width, static_cast<u32>(params.target)); 832 params.block_width, static_cast<u32>(params.target));
998 for (u32 i = 0; i < params.max_mip_level; i++) 833 for (u32 i = 0; i < params.max_mip_level; i++)
999 SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); 834 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
1000 } else { 835 } else {
1001 const auto texture_src_data{Memory::GetPointer(params.addr)}; 836 const auto texture_src_data{Memory::GetPointer(params.addr)};
1002 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 837 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
@@ -1035,7 +870,7 @@ void CachedSurface::FlushGLBuffer() {
1035 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 870 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
1036 params.block_width, static_cast<u32>(params.target)); 871 params.block_width, static_cast<u32>(params.target));
1037 872
1038 SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); 873 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
1039 } else { 874 } else {
1040 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 875 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
1041 } 876 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 97b9028c5..3a75f9d16 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -500,27 +500,42 @@ public:
500 const Register& buf_reg) { 500 const Register& buf_reg) {
501 const std::string dest = GetOutputAttribute(attribute); 501 const std::string dest = GetOutputAttribute(attribute);
502 const std::string src = GetRegisterAsFloat(val_reg); 502 const std::string src = GetRegisterAsFloat(val_reg);
503 if (dest.empty())
504 return;
503 505
504 if (!dest.empty()) { 506 // Can happen with unknown/unimplemented output attributes, in which case we ignore the
505 // Can happen with unknown/unimplemented output attributes, in which case we ignore the 507 // instruction for now.
506 // instruction for now. 508 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
507 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { 509 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
508 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry 510 // shader. These instructions use a dirty register as buffer index, to avoid some
509 // shader. These instructions use a dirty register as buffer index, to avoid some 511 // drivers from complaining about out of boundary writes, guard them.
510 // drivers from complaining about out of boundary writes, guard them. 512 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
511 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + 513 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
512 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; 514 shader.AddLine("amem[" + buf_index + "][" +
513 shader.AddLine("amem[" + buf_index + "][" + 515 std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) +
514 std::to_string(static_cast<u32>(attribute)) + ']' + 516 " = " + src + ';');
515 GetSwizzle(elem) + " = " + src + ';'); 517 return;
516 } else { 518 }
517 if (attribute == Attribute::Index::PointSize) { 519
518 fixed_pipeline_output_attributes_used.insert(attribute); 520 switch (attribute) {
519 shader.AddLine(dest + " = " + src + ';'); 521 case Attribute::Index::ClipDistances0123:
520 } else { 522 case Attribute::Index::ClipDistances4567: {
521 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); 523 const u64 index = attribute == Attribute::Index::ClipDistances4567 ? 4 : 0 + elem;
522 } 524 UNIMPLEMENTED_IF_MSG(
523 } 525 ((header.vtg.clip_distances >> index) & 1) == 0,
526 "Shader is setting gl_ClipDistance{} without enabling it in the header", index);
527
528 fixed_pipeline_output_attributes_used.insert(attribute);
529 shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
530 break;
531 }
532 case Attribute::Index::PointSize:
533 fixed_pipeline_output_attributes_used.insert(attribute);
534 shader.AddLine(dest + " = " + src + ';');
535 break;
536 default:
537 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
538 break;
524 } 539 }
525 } 540 }
526 541
@@ -740,12 +755,19 @@ private:
740 void GenerateVertex() { 755 void GenerateVertex() {
741 if (stage != Maxwell3D::Regs::ShaderStage::Vertex) 756 if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
742 return; 757 return;
758 bool clip_distances_declared = false;
759
743 declarations.AddLine("out gl_PerVertex {"); 760 declarations.AddLine("out gl_PerVertex {");
744 ++declarations.scope; 761 ++declarations.scope;
745 declarations.AddLine("vec4 gl_Position;"); 762 declarations.AddLine("vec4 gl_Position;");
746 for (auto& o : fixed_pipeline_output_attributes_used) { 763 for (auto& o : fixed_pipeline_output_attributes_used) {
747 if (o == Attribute::Index::PointSize) 764 if (o == Attribute::Index::PointSize)
748 declarations.AddLine("float gl_PointSize;"); 765 declarations.AddLine("float gl_PointSize;");
766 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
767 o == Attribute::Index::ClipDistances4567)) {
768 declarations.AddLine("float gl_ClipDistance[];");
769 clip_distances_declared = true;
770 }
749 } 771 }
750 --declarations.scope; 772 --declarations.scope;
751 declarations.AddLine("};"); 773 declarations.AddLine("};");
@@ -916,6 +938,10 @@ private:
916 return "gl_PointSize"; 938 return "gl_PointSize";
917 case Attribute::Index::Position: 939 case Attribute::Index::Position:
918 return "position"; 940 return "position";
941 case Attribute::Index::ClipDistances0123:
942 case Attribute::Index::ClipDistances4567: {
943 return "gl_ClipDistance";
944 }
919 default: 945 default:
920 const u32 index{static_cast<u32>(attribute) - 946 const u32 index{static_cast<u32>(attribute) -
921 static_cast<u32>(Attribute::Index::Attribute_0)}; 947 static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -1266,7 +1292,15 @@ private:
1266 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); 1292 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
1267 } 1293 }
1268 1294
1269 void WriteTexsInstruction(const Instruction& instr, const std::string& texture) { 1295 void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
1296 const std::string& texture) {
1297 // Add an extra scope and declare the texture coords inside to prevent
1298 // overwriting them in case they are used as outputs of the texs instruction.
1299 shader.AddLine('{');
1300 ++shader.scope;
1301 shader.AddLine(coord);
1302 shader.AddLine("vec4 texture_tmp = " + texture + ';');
1303
1270 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle 1304 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
1271 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 1305 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
1272 1306
@@ -1278,17 +1312,19 @@ private:
1278 1312
1279 if (written_components < 2) { 1313 if (written_components < 2) {
1280 // Write the first two swizzle components to gpr0 and gpr0+1 1314 // Write the first two swizzle components to gpr0 and gpr0+1
1281 regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, 1315 regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false,
1282 written_components % 2); 1316 written_components % 2);
1283 } else { 1317 } else {
1284 ASSERT(instr.texs.HasTwoDestinations()); 1318 ASSERT(instr.texs.HasTwoDestinations());
1285 // Write the rest of the swizzle components to gpr28 and gpr28+1 1319 // Write the rest of the swizzle components to gpr28 and gpr28+1
1286 regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, 1320 regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false,
1287 written_components % 2); 1321 written_components % 2);
1288 } 1322 }
1289 1323
1290 ++written_components; 1324 ++written_components;
1291 } 1325 }
1326 --shader.scope;
1327 shader.AddLine('}');
1292 } 1328 }
1293 1329
1294 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { 1330 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
@@ -2510,61 +2546,83 @@ private:
2510 const bool depth_compare = 2546 const bool depth_compare =
2511 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2547 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2512 u32 num_coordinates = TextureCoordinates(texture_type); 2548 u32 num_coordinates = TextureCoordinates(texture_type);
2513 if (depth_compare) 2549 u32 start_index = 0;
2514 num_coordinates += 1; 2550 std::string array_elem;
2551 if (is_array) {
2552 array_elem = regs.GetRegisterAsInteger(instr.gpr8);
2553 start_index = 1;
2554 }
2555 const auto process_mode = instr.tex.GetTextureProcessMode();
2556 u32 start_index_b = 0;
2557 std::string lod_value;
2558 if (process_mode != Tegra::Shader::TextureProcessMode::LZ &&
2559 process_mode != Tegra::Shader::TextureProcessMode::None) {
2560 start_index_b = 1;
2561 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2562 }
2563
2564 std::string depth_value;
2565 if (depth_compare) {
2566 depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b);
2567 }
2568
2569 bool depth_compare_extra = false;
2515 2570
2516 switch (num_coordinates) { 2571 switch (num_coordinates) {
2517 case 1: { 2572 case 1: {
2573 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2518 if (is_array) { 2574 if (is_array) {
2519 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2575 if (depth_compare) {
2520 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2576 coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " +
2521 coord = "vec2 coords = vec2(" + x + ", " + index + ");"; 2577 array_elem + ");";
2578 } else {
2579 coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");";
2580 }
2522 } else { 2581 } else {
2523 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2582 if (depth_compare) {
2524 coord = "float coords = " + x + ';'; 2583 coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");";
2584 } else {
2585 coord = "float coords = " + x + ';';
2586 }
2525 } 2587 }
2526 break; 2588 break;
2527 } 2589 }
2528 case 2: { 2590 case 2: {
2591 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2592 const std::string y =
2593 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2529 if (is_array) { 2594 if (is_array) {
2530 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2595 if (depth_compare) {
2531 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2596 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value +
2532 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2597 ", " + array_elem + ");";
2533 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; 2598 } else {
2599 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");";
2600 }
2534 } else { 2601 } else {
2535 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2602 if (depth_compare) {
2536 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2603 coord =
2537 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2604 "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");";
2605 } else {
2606 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2607 }
2538 } 2608 }
2539 break; 2609 break;
2540 } 2610 }
2541 case 3: { 2611 case 3: {
2542 if (depth_compare) { 2612 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2543 if (is_array) { 2613 const std::string y =
2544 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2614 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2545 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2615 const std::string z =
2546 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2616 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
2547 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2617 if (is_array) {
2548 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + 2618 depth_compare_extra = depth_compare;
2549 ");"; 2619 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2550 } else { 2620 array_elem + ");";
2551 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2552 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2553 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2554 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2555 }
2556 } else { 2621 } else {
2557 if (is_array) { 2622 if (depth_compare) {
2558 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2623 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2559 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2624 depth_value + ");";
2560 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2561 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 3);
2562 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2563 ");";
2564 } else { 2625 } else {
2565 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2566 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2567 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2568 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; 2626 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2569 } 2627 }
2570 } 2628 }
@@ -2580,82 +2638,85 @@ private:
2580 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2638 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2581 texture_type = Tegra::Shader::TextureType::Texture2D; 2639 texture_type = Tegra::Shader::TextureType::Texture2D;
2582 } 2640 }
2583 // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
2584 // or lod.
2585 2641
2586 const std::string sampler = 2642 const std::string sampler =
2587 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2643 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2588 // Add an extra scope and declare the texture coords inside to prevent 2644 // Add an extra scope and declare the texture coords inside to prevent
2589 // overwriting them in case they are used as outputs of the texs instruction. 2645 // overwriting them in case they are used as outputs of the texs instruction.
2590 2646
2591 shader.AddLine("{"); 2647 shader.AddLine('{');
2592 ++shader.scope; 2648 ++shader.scope;
2593 shader.AddLine(coord); 2649 shader.AddLine(coord);
2594 std::string texture; 2650 std::string texture;
2595 2651
2596 switch (instr.tex.GetTextureProcessMode()) { 2652 switch (instr.tex.GetTextureProcessMode()) {
2597 case Tegra::Shader::TextureProcessMode::None: { 2653 case Tegra::Shader::TextureProcessMode::None: {
2598 texture = "texture(" + sampler + ", coords)"; 2654 if (!depth_compare_extra) {
2655 texture = "texture(" + sampler + ", coords)";
2656 } else {
2657 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2658 }
2599 break; 2659 break;
2600 } 2660 }
2601 case Tegra::Shader::TextureProcessMode::LZ: { 2661 case Tegra::Shader::TextureProcessMode::LZ: {
2602 texture = "textureLod(" + sampler + ", coords, 0.0)"; 2662 if (!depth_compare_extra) {
2663 texture = "textureLod(" + sampler + ", coords, 0.0)";
2664 } else {
2665 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2666 }
2603 break; 2667 break;
2604 } 2668 }
2605 case Tegra::Shader::TextureProcessMode::LB: 2669 case Tegra::Shader::TextureProcessMode::LB:
2606 case Tegra::Shader::TextureProcessMode::LBA: { 2670 case Tegra::Shader::TextureProcessMode::LBA: {
2607 const std::string bias = [&]() {
2608 if (depth_compare) {
2609 if (is_array)
2610 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 2);
2611 else
2612 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2613 } else {
2614 return regs.GetRegisterAsFloat(instr.gpr20);
2615 }
2616 }();
2617 shader.AddLine("float bias = " + bias + ';');
2618
2619 // TODO: Figure if A suffix changes the equation at all. 2671 // TODO: Figure if A suffix changes the equation at all.
2620 texture = "texture(" + sampler + ", coords, bias)"; 2672 if (!depth_compare_extra) {
2673 texture = "texture(" + sampler + ", coords, " + lod_value + ')';
2674 } else {
2675 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2676 LOG_WARNING(HW_GPU,
2677 "OpenGL Limitation: can't set bias value along depth compare");
2678 }
2621 break; 2679 break;
2622 } 2680 }
2623 case Tegra::Shader::TextureProcessMode::LL: 2681 case Tegra::Shader::TextureProcessMode::LL:
2624 case Tegra::Shader::TextureProcessMode::LLA: { 2682 case Tegra::Shader::TextureProcessMode::LLA: {
2625 const std::string lod = [&]() {
2626 if (num_coordinates <= 2) {
2627 return regs.GetRegisterAsFloat(instr.gpr20);
2628 } else {
2629 return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2630 }
2631 }();
2632 shader.AddLine("float lod = " + lod + ';');
2633
2634 // TODO: Figure if A suffix changes the equation at all. 2683 // TODO: Figure if A suffix changes the equation at all.
2635 texture = "textureLod(" + sampler + ", coords, lod)"; 2684 if (!depth_compare_extra) {
2685 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2686 } else {
2687 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2688 LOG_WARNING(HW_GPU,
2689 "OpenGL Limitation: can't set lod value along depth compare");
2690 }
2636 break; 2691 break;
2637 } 2692 }
2638 default: { 2693 default: {
2639 texture = "texture(" + sampler + ", coords)"; 2694 if (!depth_compare_extra) {
2695 texture = "texture(" + sampler + ", coords)";
2696 } else {
2697 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2698 }
2640 UNIMPLEMENTED_MSG("Unhandled texture process mode {}", 2699 UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
2641 static_cast<u32>(instr.tex.GetTextureProcessMode())); 2700 static_cast<u32>(instr.tex.GetTextureProcessMode()));
2642 } 2701 }
2643 } 2702 }
2644 if (!depth_compare) { 2703 if (!depth_compare) {
2704 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2645 std::size_t dest_elem{}; 2705 std::size_t dest_elem{};
2646 for (std::size_t elem = 0; elem < 4; ++elem) { 2706 for (std::size_t elem = 0; elem < 4; ++elem) {
2647 if (!instr.tex.IsComponentEnabled(elem)) { 2707 if (!instr.tex.IsComponentEnabled(elem)) {
2648 // Skip disabled components 2708 // Skip disabled components
2649 continue; 2709 continue;
2650 } 2710 }
2651 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2711 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
2712 dest_elem);
2652 ++dest_elem; 2713 ++dest_elem;
2653 } 2714 }
2654 } else { 2715 } else {
2655 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); 2716 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
2656 } 2717 }
2657 --shader.scope; 2718 --shader.scope;
2658 shader.AddLine("}"); 2719 shader.AddLine('}');
2659 break; 2720 break;
2660 } 2721 }
2661 case OpCode::Id::TEXS: { 2722 case OpCode::Id::TEXS: {
@@ -2668,41 +2729,76 @@ private:
2668 const bool depth_compare = 2729 const bool depth_compare =
2669 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2730 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2670 u32 num_coordinates = TextureCoordinates(texture_type); 2731 u32 num_coordinates = TextureCoordinates(texture_type);
2671 if (depth_compare) 2732 const auto process_mode = instr.texs.GetTextureProcessMode();
2672 num_coordinates += 1; 2733 std::string lod_value;
2673 2734 std::string coord;
2674 // Scope to avoid variable name overlaps. 2735 u32 lod_offset = 0;
2675 shader.AddLine('{'); 2736 if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
2676 ++shader.scope; 2737 if (num_coordinates > 2) {
2738 lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2739 lod_offset = 2;
2740 } else {
2741 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2742 lod_offset = 1;
2743 }
2744 }
2677 2745
2678 switch (num_coordinates) { 2746 switch (num_coordinates) {
2747 case 1: {
2748 coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';';
2749 break;
2750 }
2679 case 2: { 2751 case 2: {
2680 if (is_array) { 2752 if (is_array) {
2681 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2753 if (depth_compare) {
2682 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2754 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2683 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2755 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2684 shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"); 2756 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2757 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2758 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2759 ");";
2760 } else {
2761 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2762 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2763 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2764 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
2765 }
2685 } else { 2766 } else {
2686 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2767 if (lod_offset != 0) {
2687 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2768 if (depth_compare) {
2688 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); 2769 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2770 const std::string y =
2771 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2772 const std::string z =
2773 regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
2774 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2775 } else {
2776 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2777 const std::string y =
2778 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2779 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2780 }
2781 } else {
2782 if (depth_compare) {
2783 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2784 const std::string y =
2785 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2786 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2787 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2788 } else {
2789 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2790 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2791 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2792 }
2793 }
2689 } 2794 }
2690 break; 2795 break;
2691 } 2796 }
2692 case 3: { 2797 case 3: {
2693 if (is_array) { 2798 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2694 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2799 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2695 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2800 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2696 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2801 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2697 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2698 shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2699 index + ");");
2700 } else {
2701 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2702 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2703 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2704 shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
2705 }
2706 break; 2802 break;
2707 } 2803 }
2708 default: 2804 default:
@@ -2712,14 +2808,14 @@ private:
2712 // Fallback to interpreting as a 2D texture for now 2808 // Fallback to interpreting as a 2D texture for now
2713 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2809 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2714 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2810 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2715 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); 2811 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2716 texture_type = Tegra::Shader::TextureType::Texture2D; 2812 texture_type = Tegra::Shader::TextureType::Texture2D;
2717 is_array = false; 2813 is_array = false;
2718 } 2814 }
2719 const std::string sampler = 2815 const std::string sampler =
2720 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2816 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2721 std::string texture; 2817 std::string texture;
2722 switch (instr.texs.GetTextureProcessMode()) { 2818 switch (process_mode) {
2723 case Tegra::Shader::TextureProcessMode::None: { 2819 case Tegra::Shader::TextureProcessMode::None: {
2724 texture = "texture(" + sampler + ", coords)"; 2820 texture = "texture(" + sampler + ", coords)";
2725 break; 2821 break;
@@ -2733,8 +2829,7 @@ private:
2733 break; 2829 break;
2734 } 2830 }
2735 case Tegra::Shader::TextureProcessMode::LL: { 2831 case Tegra::Shader::TextureProcessMode::LL: {
2736 const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2832 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2737 texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
2738 break; 2833 break;
2739 } 2834 }
2740 default: { 2835 default: {
@@ -2744,13 +2839,11 @@ private:
2744 } 2839 }
2745 } 2840 }
2746 if (!depth_compare) { 2841 if (!depth_compare) {
2747 WriteTexsInstruction(instr, texture); 2842 WriteTexsInstruction(instr, coord, texture);
2748 } else { 2843 } else {
2749 WriteTexsInstruction(instr, "vec4(" + texture + ')'); 2844 WriteTexsInstruction(instr, coord, "vec4(" + texture + ')');
2750 } 2845 }
2751 2846
2752 shader.AddLine('}');
2753 --shader.scope;
2754 break; 2847 break;
2755 } 2848 }
2756 case OpCode::Id::TLDS: { 2849 case OpCode::Id::TLDS: {
@@ -2772,11 +2865,12 @@ private:
2772 // Scope to avoid variable name overlaps. 2865 // Scope to avoid variable name overlaps.
2773 shader.AddLine('{'); 2866 shader.AddLine('{');
2774 ++shader.scope; 2867 ++shader.scope;
2868 std::string coords;
2775 2869
2776 switch (texture_type) { 2870 switch (texture_type) {
2777 case Tegra::Shader::TextureType::Texture1D: { 2871 case Tegra::Shader::TextureType::Texture1D: {
2778 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2872 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2779 shader.AddLine("int coords = " + x + ';'); 2873 coords = "float coords = " + x + ';';
2780 break; 2874 break;
2781 } 2875 }
2782 case Tegra::Shader::TextureType::Texture2D: { 2876 case Tegra::Shader::TextureType::Texture2D: {
@@ -2784,7 +2878,8 @@ private:
2784 2878
2785 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2879 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2786 const std::string y = regs.GetRegisterAsInteger(instr.gpr20); 2880 const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
2787 shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); 2881 // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
2882 coords = "ivec2 coords = ivec2(" + x + ", " + y + ");";
2788 extra_op_offset = 1; 2883 extra_op_offset = 1;
2789 break; 2884 break;
2790 } 2885 }
@@ -2812,7 +2907,7 @@ private:
2812 static_cast<u32>(instr.tlds.GetTextureProcessMode())); 2907 static_cast<u32>(instr.tlds.GetTextureProcessMode()));
2813 } 2908 }
2814 } 2909 }
2815 WriteTexsInstruction(instr, texture); 2910 WriteTexsInstruction(instr, coords, texture);
2816 2911
2817 --shader.scope; 2912 --shader.scope;
2818 shader.AddLine('}'); 2913 shader.AddLine('}');
@@ -2871,14 +2966,17 @@ private:
2871 2966
2872 const std::string texture = "textureGather(" + sampler + ", coords, " + 2967 const std::string texture = "textureGather(" + sampler + ", coords, " +
2873 std::to_string(instr.tld4.component) + ')'; 2968 std::to_string(instr.tld4.component) + ')';
2969
2874 if (!depth_compare) { 2970 if (!depth_compare) {
2971 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2875 std::size_t dest_elem{}; 2972 std::size_t dest_elem{};
2876 for (std::size_t elem = 0; elem < 4; ++elem) { 2973 for (std::size_t elem = 0; elem < 4; ++elem) {
2877 if (!instr.tex.IsComponentEnabled(elem)) { 2974 if (!instr.tex.IsComponentEnabled(elem)) {
2878 // Skip disabled components 2975 // Skip disabled components
2879 continue; 2976 continue;
2880 } 2977 }
2881 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2978 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
2979 dest_elem);
2882 ++dest_elem; 2980 ++dest_elem;
2883 } 2981 }
2884 } else { 2982 } else {
@@ -2899,6 +2997,7 @@ private:
2899 // Scope to avoid variable name overlaps. 2997 // Scope to avoid variable name overlaps.
2900 shader.AddLine('{'); 2998 shader.AddLine('{');
2901 ++shader.scope; 2999 ++shader.scope;
3000 std::string coords;
2902 3001
2903 const bool depth_compare = 3002 const bool depth_compare =
2904 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 3003 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
@@ -2908,20 +3007,19 @@ private:
2908 const std::string sampler = GetSampler( 3007 const std::string sampler = GetSampler(
2909 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); 3008 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
2910 if (!depth_compare) { 3009 if (!depth_compare) {
2911 shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");"); 3010 coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
2912 } else { 3011 } else {
2913 // Note: TLD4S coordinate encoding works just like TEXS's 3012 // Note: TLD4S coordinate encoding works just like TEXS's
2914 shader.AddLine( 3013 const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2915 "float op_y = " + regs.GetRegisterAsFloat(instr.gpr8.Value() + 1) + ';'); 3014 coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");";
2916 shader.AddLine("vec3 coords = vec3(" + op_a + ", op_y, " + op_b + ");");
2917 } 3015 }
2918 const std::string texture = "textureGather(" + sampler + ", coords, " + 3016 const std::string texture = "textureGather(" + sampler + ", coords, " +
2919 std::to_string(instr.tld4s.component) + ')'; 3017 std::to_string(instr.tld4s.component) + ')';
2920 3018
2921 if (!depth_compare) { 3019 if (!depth_compare) {
2922 WriteTexsInstruction(instr, texture); 3020 WriteTexsInstruction(instr, coords, texture);
2923 } else { 3021 } else {
2924 WriteTexsInstruction(instr, "vec4(" + texture + ')'); 3022 WriteTexsInstruction(instr, coords, "vec4(" + texture + ')');
2925 } 3023 }
2926 3024
2927 --shader.scope; 3025 --shader.scope;
@@ -3217,6 +3315,34 @@ private:
3217 } 3315 }
3218 break; 3316 break;
3219 } 3317 }
3318 case OpCode::Type::RegisterSetPredicate: {
3319 UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
3320
3321 const std::string apply_mask = [&]() {
3322 switch (opcode->get().GetId()) {
3323 case OpCode::Id::R2P_IMM:
3324 return std::to_string(instr.r2p.immediate_mask);
3325 default:
3326 UNREACHABLE();
3327 }
3328 }();
3329 const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
3330 " >> " + std::to_string(instr.r2p.byte) + ')';
3331
3332 constexpr u64 programmable_preds = 7;
3333 for (u64 pred = 0; pred < programmable_preds; ++pred) {
3334 const auto shift = std::to_string(1 << pred);
3335
3336 shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {");
3337 ++shader.scope;
3338
3339 SetPredicate(pred, '(' + mask + " & " + shift + ") != 0");
3340
3341 --shader.scope;
3342 shader.AddLine('}');
3343 }
3344 break;
3345 }
3220 case OpCode::Type::FloatSet: { 3346 case OpCode::Type::FloatSet: {
3221 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), 3347 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8),
3222 instr.fset.abs_a != 0, instr.fset.neg_a != 0); 3348 instr.fset.abs_a != 0, instr.fset.neg_a != 0);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1492e063a..4fd0d66c5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -19,9 +19,9 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "core/telemetry_session.h" 20#include "core/telemetry_session.h"
21#include "core/tracer/recorder.h" 21#include "core/tracer/recorder.h"
22#include "video_core/morton.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/renderer_opengl.h" 24#include "video_core/renderer_opengl/renderer_opengl.h"
24#include "video_core/utils.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27 27
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
deleted file mode 100644
index e0a14d48f..000000000
--- a/src/video_core/utils.h
+++ /dev/null
@@ -1,164 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore {
10
11// 8x8 Z-Order coordinate from 2D coordinates
12static inline u32 MortonInterleave(u32 x, u32 y) {
13 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
14 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
15 return xlut[x % 8] + ylut[y % 8];
16}
17
18/**
19 * Calculates the offset of the position of the pixel in Morton order
20 */
21static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
22 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
23 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
24 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
25 // texels are laid out in a 2x2 subtile like this:
26 // 2 3
27 // 0 1
28 //
29 // The full 8x8 tile has the texels arranged like this:
30 //
31 // 42 43 46 47 58 59 62 63
32 // 40 41 44 45 56 57 60 61
33 // 34 35 38 39 50 51 54 55
34 // 32 33 36 37 48 49 52 53
35 // 10 11 14 15 26 27 30 31
36 // 08 09 12 13 24 25 28 29
37 // 02 03 06 07 18 19 22 23
38 // 00 01 04 05 16 17 20 21
39 //
40 // This pattern is what's called Z-order curve, or Morton order.
41
42 const unsigned int block_height = 8;
43 const unsigned int coarse_x = x & ~7;
44
45 u32 i = VideoCore::MortonInterleave(x, y);
46
47 const unsigned int offset = coarse_x * block_height;
48
49 return (i + offset) * bytes_per_pixel;
50}
51
52static inline u32 MortonInterleave128(u32 x, u32 y) {
53 // 128x128 Z-Order coordinate from 2D coordinates
54 static constexpr u32 xlut[] = {
55 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
56 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
57 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
58 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
59 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
60 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
61 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
62 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
63 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
64 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
65 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
66 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
67 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
68 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
69 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
70 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
71 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
72 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
73 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
74 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
75 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
76 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
77 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
78 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
79 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
80 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
81 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
82 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
83 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
84 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
85 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
86 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
87 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
88 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
89 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
90 };
91 static constexpr u32 ylut[] = {
92 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
93 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
94 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
95 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
96 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
97 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
98 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
99 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
100 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
101 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
102 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
103 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
104 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
105 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
106 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
107 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
108 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
109 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
110 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
111 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
112 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
113 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
114 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
115 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
116 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
117 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
118 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
119 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
120 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
121 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
122 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
123 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
124 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
125 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
126 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
127 };
128 return xlut[x % 128] + ylut[y % 128];
129}
130
131static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
132 // Calculates the offset of the position of the pixel in Morton order
133 // Framebuffer images are split into 128x128 tiles.
134
135 const unsigned int block_height = 128;
136 const unsigned int coarse_x = x & ~127;
137
138 u32 i = MortonInterleave128(x, y);
139
140 const unsigned int offset = coarse_x * block_height;
141
142 return (i + offset) * bytes_per_pixel;
143}
144
145static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel,
146 u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data,
147 bool morton_to_gl) {
148 u8* data_ptrs[2];
149 for (unsigned y = 0; y < height; ++y) {
150 for (unsigned x = 0; x < width; ++x) {
151 const u32 coarse_y = y & ~127;
152 u32 morton_offset =
153 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
154 u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
155
156 data_ptrs[morton_to_gl] = morton_data + morton_offset;
157 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
158
159 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
160 }
161 }
162}
163
164} // namespace VideoCore