diff options
31 files changed, 940 insertions, 519 deletions
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index b8b6b4d49..f287f7c97 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -671,7 +671,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) | |||
| 671 | break; | 671 | break; |
| 672 | } | 672 | } |
| 673 | default: | 673 | default: |
| 674 | UNIMPLEMENTED(); | 674 | LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id); |
| 675 | return ERR_INVALID_ENUM_VALUE; | ||
| 675 | } | 676 | } |
| 676 | 677 | ||
| 677 | return RESULT_SUCCESS; | 678 | return RESULT_SUCCESS; |
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index ac3859353..602086eed 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp | |||
| @@ -88,6 +88,20 @@ void NVDRV::FinishInitialize(Kernel::HLERequestContext& ctx) { | |||
| 88 | rb.Push(RESULT_SUCCESS); | 88 | rb.Push(RESULT_SUCCESS); |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) { | ||
| 92 | LOG_WARNING(Service_NVDRV, "(STUBBED) called"); | ||
| 93 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 94 | rb.Push(RESULT_SUCCESS); | ||
| 95 | } | ||
| 96 | |||
| 97 | void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) { | ||
| 98 | // According to SwitchBrew, this has no inputs and no outputs, so effectively does nothing on | ||
| 99 | // retail hardware. | ||
| 100 | LOG_DEBUG(Service_NVDRV, "called"); | ||
| 101 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 102 | rb.Push(RESULT_SUCCESS); | ||
| 103 | } | ||
| 104 | |||
| 91 | NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) | 105 | NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) |
| 92 | : ServiceFramework(name), nvdrv(std::move(nvdrv)) { | 106 | : ServiceFramework(name), nvdrv(std::move(nvdrv)) { |
| 93 | static const FunctionInfo functions[] = { | 107 | static const FunctionInfo functions[] = { |
| @@ -97,10 +111,10 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) | |||
| 97 | {3, &NVDRV::Initialize, "Initialize"}, | 111 | {3, &NVDRV::Initialize, "Initialize"}, |
| 98 | {4, &NVDRV::QueryEvent, "QueryEvent"}, | 112 | {4, &NVDRV::QueryEvent, "QueryEvent"}, |
| 99 | {5, nullptr, "MapSharedMem"}, | 113 | {5, nullptr, "MapSharedMem"}, |
| 100 | {6, nullptr, "GetStatus"}, | 114 | {6, &NVDRV::GetStatus, "GetStatus"}, |
| 101 | {7, nullptr, "ForceSetClientPID"}, | 115 | {7, nullptr, "ForceSetClientPID"}, |
| 102 | {8, &NVDRV::SetClientPID, "SetClientPID"}, | 116 | {8, &NVDRV::SetClientPID, "SetClientPID"}, |
| 103 | {9, nullptr, "DumpGraphicsMemoryInfo"}, | 117 | {9, &NVDRV::DumpGraphicsMemoryInfo, "DumpGraphicsMemoryInfo"}, |
| 104 | {10, nullptr, "InitializeDevtools"}, | 118 | {10, nullptr, "InitializeDevtools"}, |
| 105 | {11, &NVDRV::Ioctl, "Ioctl2"}, | 119 | {11, &NVDRV::Ioctl, "Ioctl2"}, |
| 106 | {12, nullptr, "Ioctl3"}, | 120 | {12, nullptr, "Ioctl3"}, |
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h index d340893c2..5a1e4baa7 100644 --- a/src/core/hle/service/nvdrv/interface.h +++ b/src/core/hle/service/nvdrv/interface.h | |||
| @@ -24,6 +24,8 @@ private: | |||
| 24 | void QueryEvent(Kernel::HLERequestContext& ctx); | 24 | void QueryEvent(Kernel::HLERequestContext& ctx); |
| 25 | void SetClientPID(Kernel::HLERequestContext& ctx); | 25 | void SetClientPID(Kernel::HLERequestContext& ctx); |
| 26 | void FinishInitialize(Kernel::HLERequestContext& ctx); | 26 | void FinishInitialize(Kernel::HLERequestContext& ctx); |
| 27 | void GetStatus(Kernel::HLERequestContext& ctx); | ||
| 28 | void DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx); | ||
| 27 | 29 | ||
| 28 | std::shared_ptr<Module> nvdrv; | 30 | std::shared_ptr<Module> nvdrv; |
| 29 | 31 | ||
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index c1b2f33b9..9ca8483a5 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp | |||
| @@ -63,7 +63,7 @@ ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> ServiceManager::RegisterService | |||
| 63 | return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port)); | 63 | return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port)); |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | ResultCode ServiceManager::UnregisterService(std::string name) { | 66 | ResultCode ServiceManager::UnregisterService(const std::string& name) { |
| 67 | CASCADE_CODE(ValidateServiceName(name)); | 67 | CASCADE_CODE(ValidateServiceName(name)); |
| 68 | 68 | ||
| 69 | const auto iter = registered_services.find(name); | 69 | const auto iter = registered_services.find(name); |
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h index c4714b3e3..bef25433e 100644 --- a/src/core/hle/service/sm/sm.h +++ b/src/core/hle/service/sm/sm.h | |||
| @@ -50,7 +50,7 @@ public: | |||
| 50 | 50 | ||
| 51 | ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name, | 51 | ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name, |
| 52 | unsigned int max_sessions); | 52 | unsigned int max_sessions); |
| 53 | ResultCode UnregisterService(std::string name); | 53 | ResultCode UnregisterService(const std::string& name); |
| 54 | ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name); | 54 | ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name); |
| 55 | ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name); | 55 | ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name); |
| 56 | 56 | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a780215c1..3f906a517 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -21,6 +21,8 @@ add_library(video_core STATIC | |||
| 21 | macro_interpreter.h | 21 | macro_interpreter.h |
| 22 | memory_manager.cpp | 22 | memory_manager.cpp |
| 23 | memory_manager.h | 23 | memory_manager.h |
| 24 | morton.cpp | ||
| 25 | morton.h | ||
| 24 | rasterizer_cache.cpp | 26 | rasterizer_cache.cpp |
| 25 | rasterizer_cache.h | 27 | rasterizer_cache.h |
| 26 | rasterizer_interface.h | 28 | rasterizer_interface.h |
| @@ -62,7 +64,6 @@ add_library(video_core STATIC | |||
| 62 | textures/decoders.cpp | 64 | textures/decoders.cpp |
| 63 | textures/decoders.h | 65 | textures/decoders.h |
| 64 | textures/texture.h | 66 | textures/texture.h |
| 65 | utils.h | ||
| 66 | video_core.cpp | 67 | video_core.cpp |
| 67 | video_core.h | 68 | video_core.h |
| 68 | ) | 69 | ) |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 28e8c13aa..8b9c548cc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB | |||
| 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { | 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { |
| 35 | MICROPROFILE_SCOPE(ProcessCommandLists); | 35 | MICROPROFILE_SCOPE(ProcessCommandLists); |
| 36 | 36 | ||
| 37 | // On entering GPU code, assume all memory may be touched by the ARM core. | ||
| 38 | maxwell_3d->dirty_flags.OnMemoryWrite(); | ||
| 39 | |||
| 37 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { | 40 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { |
| 38 | LOG_TRACE(HW_GPU, | 41 | LOG_TRACE(HW_GPU, |
| 39 | "Processing method {:08X} on subchannel {} value " | 42 | "Processing method {:08X} on subchannel {} value " |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 74e44c7fe..8d0700d13 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -2,8 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 6 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| 9 | 11 | ||
| @@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() { | |||
| 47 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); | 49 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); |
| 48 | 50 | ||
| 49 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { | 51 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { |
| 52 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 53 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 54 | |||
| 50 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); | 55 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); |
| 51 | // We have to invalidate the destination region to evict any outdated surfaces from the | 56 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 52 | // cache. We do this before actually writing the new data because the destination address | 57 | // cache. We do this before actually writing the new data because the destination address |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 585290d9f..2adbc9eaf 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -3,8 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/core.h" | ||
| 6 | #include "core/memory.h" | 7 | #include "core/memory.h" |
| 7 | #include "video_core/engines/kepler_memory.h" | 8 | #include "video_core/engines/kepler_memory.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/rasterizer_interface.h" | 10 | #include "video_core/rasterizer_interface.h" |
| 9 | 11 | ||
| 10 | namespace Tegra::Engines { | 12 | namespace Tegra::Engines { |
| @@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 47 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); | 49 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); |
| 48 | 50 | ||
| 49 | Memory::Write32(dest_address, data); | 51 | Memory::Write32(dest_address, data); |
| 52 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 50 | 53 | ||
| 51 | state.write_offset++; | 54 | state.write_offset++; |
| 52 | } | 55 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2bc534be3..f0a5470b9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 135 | 135 | ||
| 136 | if (regs.reg_array[method] != value) { | 136 | if (regs.reg_array[method] != value) { |
| 137 | regs.reg_array[method] = value; | 137 | regs.reg_array[method] = value; |
| 138 | // Vertex format | ||
| 138 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | 139 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && |
| 139 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | 140 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { |
| 140 | dirty_flags.vertex_attrib_format = true; | 141 | dirty_flags.vertex_attrib_format = true; |
| 141 | } | 142 | } |
| 143 | |||
| 144 | // Vertex buffer | ||
| 145 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 146 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { | ||
| 147 | dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 148 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 149 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { | ||
| 150 | dirty_flags.vertex_array |= | ||
| 151 | 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 152 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 153 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { | ||
| 154 | dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 155 | } | ||
| 142 | } | 156 | } |
| 143 | 157 | ||
| 144 | switch (method) { | 158 | switch (method) { |
| @@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 270 | query_result.timestamp = CoreTiming::GetTicks(); | 284 | query_result.timestamp = CoreTiming::GetTicks(); |
| 271 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); | 285 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); |
| 272 | } | 286 | } |
| 287 | dirty_flags.OnMemoryWrite(); | ||
| 273 | break; | 288 | break; |
| 274 | } | 289 | } |
| 275 | default: | 290 | default: |
| @@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 346 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | 361 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |
| 347 | 362 | ||
| 348 | Memory::Write32(*address, value); | 363 | Memory::Write32(*address, value); |
| 364 | dirty_flags.OnMemoryWrite(); | ||
| 349 | 365 | ||
| 350 | // Increment the current buffer position. | 366 | // Increment the current buffer position. |
| 351 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 367 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index c5dcdeb31..9324d9710 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -590,10 +590,18 @@ public: | |||
| 590 | 590 | ||
| 591 | float clear_color[4]; | 591 | float clear_color[4]; |
| 592 | float clear_depth; | 592 | float clear_depth; |
| 593 | |||
| 593 | INSERT_PADDING_WORDS(0x3); | 594 | INSERT_PADDING_WORDS(0x3); |
| 595 | |||
| 594 | s32 clear_stencil; | 596 | s32 clear_stencil; |
| 595 | 597 | ||
| 596 | INSERT_PADDING_WORDS(0x17); | 598 | INSERT_PADDING_WORDS(0x7); |
| 599 | |||
| 600 | u32 polygon_offset_point_enable; | ||
| 601 | u32 polygon_offset_line_enable; | ||
| 602 | u32 polygon_offset_fill_enable; | ||
| 603 | |||
| 604 | INSERT_PADDING_WORDS(0xD); | ||
| 597 | 605 | ||
| 598 | std::array<ScissorTest, NumViewports> scissor_test; | 606 | std::array<ScissorTest, NumViewports> scissor_test; |
| 599 | 607 | ||
| @@ -728,6 +736,7 @@ public: | |||
| 728 | u32 frag_color_clamp; | 736 | u32 frag_color_clamp; |
| 729 | 737 | ||
| 730 | union { | 738 | union { |
| 739 | BitField<0, 1, u32> y_negate; | ||
| 731 | BitField<4, 1, u32> triangle_rast_flip; | 740 | BitField<4, 1, u32> triangle_rast_flip; |
| 732 | } screen_y_control; | 741 | } screen_y_control; |
| 733 | 742 | ||
| @@ -761,7 +770,11 @@ public: | |||
| 761 | } | 770 | } |
| 762 | } tsc; | 771 | } tsc; |
| 763 | 772 | ||
| 764 | INSERT_PADDING_WORDS(0x3); | 773 | INSERT_PADDING_WORDS(0x1); |
| 774 | |||
| 775 | float polygon_offset_factor; | ||
| 776 | |||
| 777 | INSERT_PADDING_WORDS(0x1); | ||
| 765 | 778 | ||
| 766 | struct { | 779 | struct { |
| 767 | u32 tic_address_high; | 780 | u32 tic_address_high; |
| @@ -786,7 +799,9 @@ public: | |||
| 786 | 799 | ||
| 787 | u32 framebuffer_srgb; | 800 | u32 framebuffer_srgb; |
| 788 | 801 | ||
| 789 | INSERT_PADDING_WORDS(0x12); | 802 | float polygon_offset_units; |
| 803 | |||
| 804 | INSERT_PADDING_WORDS(0x11); | ||
| 790 | 805 | ||
| 791 | union { | 806 | union { |
| 792 | BitField<2, 1, u32> coord_origin; | 807 | BitField<2, 1, u32> coord_origin; |
| @@ -863,7 +878,9 @@ public: | |||
| 863 | 878 | ||
| 864 | INSERT_PADDING_WORDS(0x7); | 879 | INSERT_PADDING_WORDS(0x7); |
| 865 | 880 | ||
| 866 | INSERT_PADDING_WORDS(0x20); | 881 | INSERT_PADDING_WORDS(0x1F); |
| 882 | |||
| 883 | float polygon_offset_clamp; | ||
| 867 | 884 | ||
| 868 | struct { | 885 | struct { |
| 869 | u32 is_instanced[NumVertexArrays]; | 886 | u32 is_instanced[NumVertexArrays]; |
| @@ -1050,6 +1067,11 @@ public: | |||
| 1050 | 1067 | ||
| 1051 | struct DirtyFlags { | 1068 | struct DirtyFlags { |
| 1052 | bool vertex_attrib_format = true; | 1069 | bool vertex_attrib_format = true; |
| 1070 | u32 vertex_array = 0xFFFFFFFF; | ||
| 1071 | |||
| 1072 | void OnMemoryWrite() { | ||
| 1073 | vertex_array = 0xFFFFFFFF; | ||
| 1074 | } | ||
| 1053 | }; | 1075 | }; |
| 1054 | 1076 | ||
| 1055 | DirtyFlags dirty_flags; | 1077 | DirtyFlags dirty_flags; |
| @@ -1142,6 +1164,9 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D); | |||
| 1142 | ASSERT_REG_POSITION(clear_color[0], 0x360); | 1164 | ASSERT_REG_POSITION(clear_color[0], 0x360); |
| 1143 | ASSERT_REG_POSITION(clear_depth, 0x364); | 1165 | ASSERT_REG_POSITION(clear_depth, 0x364); |
| 1144 | ASSERT_REG_POSITION(clear_stencil, 0x368); | 1166 | ASSERT_REG_POSITION(clear_stencil, 0x368); |
| 1167 | ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); | ||
| 1168 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); | ||
| 1169 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); | ||
| 1145 | ASSERT_REG_POSITION(scissor_test, 0x380); | 1170 | ASSERT_REG_POSITION(scissor_test, 0x380); |
| 1146 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); | 1171 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); |
| 1147 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); | 1172 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); |
| @@ -1180,6 +1205,7 @@ ASSERT_REG_POSITION(point_size, 0x546); | |||
| 1180 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1205 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1181 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1206 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1182 | ASSERT_REG_POSITION(tsc, 0x557); | 1207 | ASSERT_REG_POSITION(tsc, 0x557); |
| 1208 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); | ||
| 1183 | ASSERT_REG_POSITION(tic, 0x55D); | 1209 | ASSERT_REG_POSITION(tic, 0x55D); |
| 1184 | ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); | 1210 | ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); |
| 1185 | ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); | 1211 | ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); |
| @@ -1187,11 +1213,13 @@ ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567); | |||
| 1187 | ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); | 1213 | ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); |
| 1188 | ASSERT_REG_POSITION(stencil_back_func_func, 0x569); | 1214 | ASSERT_REG_POSITION(stencil_back_func_func, 0x569); |
| 1189 | ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); | 1215 | ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); |
| 1216 | ASSERT_REG_POSITION(polygon_offset_units, 0x56F); | ||
| 1190 | ASSERT_REG_POSITION(point_coord_replace, 0x581); | 1217 | ASSERT_REG_POSITION(point_coord_replace, 0x581); |
| 1191 | ASSERT_REG_POSITION(code_address, 0x582); | 1218 | ASSERT_REG_POSITION(code_address, 0x582); |
| 1192 | ASSERT_REG_POSITION(draw, 0x585); | 1219 | ASSERT_REG_POSITION(draw, 0x585); |
| 1193 | ASSERT_REG_POSITION(primitive_restart, 0x591); | 1220 | ASSERT_REG_POSITION(primitive_restart, 0x591); |
| 1194 | ASSERT_REG_POSITION(index_array, 0x5F2); | 1221 | ASSERT_REG_POSITION(index_array, 0x5F2); |
| 1222 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); | ||
| 1195 | ASSERT_REG_POSITION(instanced_arrays, 0x620); | 1223 | ASSERT_REG_POSITION(instanced_arrays, 0x620); |
| 1196 | ASSERT_REG_POSITION(cull, 0x646); | 1224 | ASSERT_REG_POSITION(cull, 0x646); |
| 1197 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); | 1225 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index b8a78cf82..a34e884fe 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -2,7 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 6 | #include "video_core/engines/maxwell_dma.h" | 8 | #include "video_core/engines/maxwell_dma.h" |
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| @@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() { | |||
| 54 | return; | 56 | return; |
| 55 | } | 57 | } |
| 56 | 58 | ||
| 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 60 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 61 | |||
| 57 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 58 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
| 59 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, | 64 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7e8449bc4..b9faaf8e0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -82,6 +82,8 @@ union Attribute { | |||
| 82 | Position = 7, | 82 | Position = 7, |
| 83 | Attribute_0 = 8, | 83 | Attribute_0 = 8, |
| 84 | Attribute_31 = 39, | 84 | Attribute_31 = 39, |
| 85 | ClipDistances0123 = 44, | ||
| 86 | ClipDistances4567 = 45, | ||
| 85 | PointCoord = 46, | 87 | PointCoord = 46, |
| 86 | // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex | 88 | // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex |
| 87 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval | 89 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval |
| @@ -366,6 +368,11 @@ enum class HalfPrecision : u64 { | |||
| 366 | FMZ = 2, | 368 | FMZ = 2, |
| 367 | }; | 369 | }; |
| 368 | 370 | ||
| 371 | enum class R2pMode : u64 { | ||
| 372 | Pr = 0, | ||
| 373 | Cc = 1, | ||
| 374 | }; | ||
| 375 | |||
| 369 | enum class IpaInterpMode : u64 { | 376 | enum class IpaInterpMode : u64 { |
| 370 | Linear = 0, | 377 | Linear = 0, |
| 371 | Perspective = 1, | 378 | Perspective = 1, |
| @@ -855,6 +862,12 @@ union Instruction { | |||
| 855 | } hsetp2; | 862 | } hsetp2; |
| 856 | 863 | ||
| 857 | union { | 864 | union { |
| 865 | BitField<40, 1, R2pMode> mode; | ||
| 866 | BitField<41, 2, u64> byte; | ||
| 867 | BitField<20, 7, u64> immediate_mask; | ||
| 868 | } r2p; | ||
| 869 | |||
| 870 | union { | ||
| 858 | BitField<39, 3, u64> pred39; | 871 | BitField<39, 3, u64> pred39; |
| 859 | BitField<42, 1, u64> neg_pred; | 872 | BitField<42, 1, u64> neg_pred; |
| 860 | BitField<43, 1, u64> neg_a; | 873 | BitField<43, 1, u64> neg_a; |
| @@ -1256,6 +1269,7 @@ public: | |||
| 1256 | BFE_C, | 1269 | BFE_C, |
| 1257 | BFE_R, | 1270 | BFE_R, |
| 1258 | BFE_IMM, | 1271 | BFE_IMM, |
| 1272 | BFI_IMM_R, | ||
| 1259 | BRA, | 1273 | BRA, |
| 1260 | PBK, | 1274 | PBK, |
| 1261 | LD_A, | 1275 | LD_A, |
| @@ -1381,6 +1395,7 @@ public: | |||
| 1381 | PSETP, | 1395 | PSETP, |
| 1382 | PSET, | 1396 | PSET, |
| 1383 | CSETP, | 1397 | CSETP, |
| 1398 | R2P_IMM, | ||
| 1384 | XMAD_IMM, | 1399 | XMAD_IMM, |
| 1385 | XMAD_CR, | 1400 | XMAD_CR, |
| 1386 | XMAD_RC, | 1401 | XMAD_RC, |
| @@ -1396,6 +1411,7 @@ public: | |||
| 1396 | ArithmeticHalf, | 1411 | ArithmeticHalf, |
| 1397 | ArithmeticHalfImmediate, | 1412 | ArithmeticHalfImmediate, |
| 1398 | Bfe, | 1413 | Bfe, |
| 1414 | Bfi, | ||
| 1399 | Shift, | 1415 | Shift, |
| 1400 | Ffma, | 1416 | Ffma, |
| 1401 | Hfma2, | 1417 | Hfma2, |
| @@ -1410,6 +1426,7 @@ public: | |||
| 1410 | HalfSetPredicate, | 1426 | HalfSetPredicate, |
| 1411 | PredicateSetPredicate, | 1427 | PredicateSetPredicate, |
| 1412 | PredicateSetRegister, | 1428 | PredicateSetRegister, |
| 1429 | RegisterSetPredicate, | ||
| 1413 | Conversion, | 1430 | Conversion, |
| 1414 | Xmad, | 1431 | Xmad, |
| 1415 | Unknown, | 1432 | Unknown, |
| @@ -1613,6 +1630,7 @@ private: | |||
| 1613 | INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), | 1630 | INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), |
| 1614 | INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), | 1631 | INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), |
| 1615 | INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), | 1632 | INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), |
| 1633 | INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), | ||
| 1616 | INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), | 1634 | INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), |
| 1617 | INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), | 1635 | INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), |
| 1618 | INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), | 1636 | INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), |
| @@ -1647,6 +1665,7 @@ private: | |||
| 1647 | INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), | 1665 | INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), |
| 1648 | INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), | 1666 | INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), |
| 1649 | INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), | 1667 | INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), |
| 1668 | INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), | ||
| 1650 | INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), | 1669 | INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), |
| 1651 | INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), | 1670 | INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), |
| 1652 | INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), | 1671 | INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), |
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index a0e015c4b..99c34649f 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h | |||
| @@ -62,7 +62,16 @@ struct Header { | |||
| 62 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB | 62 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB |
| 63 | INSERT_PADDING_BYTES(16); // ImapGenericVector[32] | 63 | INSERT_PADDING_BYTES(16); // ImapGenericVector[32] |
| 64 | INSERT_PADDING_BYTES(2); // ImapColor | 64 | INSERT_PADDING_BYTES(2); // ImapColor |
| 65 | INSERT_PADDING_BYTES(2); // ImapSystemValuesC | 65 | union { |
| 66 | BitField<0, 8, u16> clip_distances; | ||
| 67 | BitField<8, 1, u16> point_sprite_s; | ||
| 68 | BitField<9, 1, u16> point_sprite_t; | ||
| 69 | BitField<10, 1, u16> fog_coordinate; | ||
| 70 | BitField<12, 1, u16> tessellation_eval_point_u; | ||
| 71 | BitField<13, 1, u16> tessellation_eval_point_v; | ||
| 72 | BitField<14, 1, u16> instance_id; | ||
| 73 | BitField<15, 1, u16> vertex_id; | ||
| 74 | }; | ||
| 66 | INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] | 75 | INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] |
| 67 | INSERT_PADDING_BYTES(1); // ImapReserved | 76 | INSERT_PADDING_BYTES(1); // ImapReserved |
| 68 | INSERT_PADDING_BYTES(3); // OmapSystemValuesA | 77 | INSERT_PADDING_BYTES(3); // OmapSystemValuesA |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 77a20bb84..47247f097 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -9,6 +9,13 @@ | |||
| 9 | 9 | ||
| 10 | namespace Tegra { | 10 | namespace Tegra { |
| 11 | 11 | ||
| 12 | MemoryManager::MemoryManager() { | ||
| 13 | // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might | ||
| 14 | // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with | ||
| 15 | // Undertale using 0 for a render target. | ||
| 16 | PageSlot(0) = static_cast<u64>(PageStatus::Reserved); | ||
| 17 | } | ||
| 18 | |||
| 12 | GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { | 19 | GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { |
| 13 | const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; | 20 | const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; |
| 14 | 21 | ||
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 4eb338aa2..fb03497ca 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -18,7 +18,7 @@ using GPUVAddr = u64; | |||
| 18 | 18 | ||
| 19 | class MemoryManager final { | 19 | class MemoryManager final { |
| 20 | public: | 20 | public: |
| 21 | MemoryManager() = default; | 21 | MemoryManager(); |
| 22 | 22 | ||
| 23 | GPUVAddr AllocateSpace(u64 size, u64 align); | 23 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| 24 | GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); | 24 | GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); |
| @@ -37,6 +37,7 @@ private: | |||
| 37 | enum class PageStatus : u64 { | 37 | enum class PageStatus : u64 { |
| 38 | Unmapped = 0xFFFFFFFFFFFFFFFFULL, | 38 | Unmapped = 0xFFFFFFFFFFFFFFFFULL, |
| 39 | Allocated = 0xFFFFFFFFFFFFFFFEULL, | 39 | Allocated = 0xFFFFFFFFFFFFFFFEULL, |
| 40 | Reserved = 0xFFFFFFFFFFFFFFFDULL, | ||
| 40 | }; | 41 | }; |
| 41 | 42 | ||
| 42 | std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, | 43 | std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, |
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp new file mode 100644 index 000000000..f14abba7d --- /dev/null +++ b/src/video_core/morton.cpp | |||
| @@ -0,0 +1,353 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cstring> | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "core/memory.h" | ||
| 10 | #include "video_core/morton.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/textures/decoders.h" | ||
| 13 | |||
| 14 | namespace VideoCore { | ||
| 15 | |||
| 16 | using Surface::GetBytesPerPixel; | ||
| 17 | using Surface::PixelFormat; | ||
| 18 | |||
| 19 | using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); | ||
| 20 | using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; | ||
| 21 | |||
| 22 | template <bool morton_to_linear, PixelFormat format> | ||
| 23 | static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, | ||
| 24 | u8* buffer, std::size_t buffer_size, VAddr addr) { | ||
| 25 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | ||
| 26 | |||
| 27 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 28 | // pixel values. | ||
| 29 | const u32 tile_size_x{GetDefaultBlockWidth(format)}; | ||
| 30 | const u32 tile_size_y{GetDefaultBlockHeight(format)}; | ||
| 31 | |||
| 32 | if constexpr (morton_to_linear) { | ||
| 33 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | ||
| 34 | stride, height, depth, block_height, block_depth); | ||
| 35 | } else { | ||
| 36 | Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, | ||
| 37 | (height + tile_size_y - 1) / tile_size_y, depth, | ||
| 38 | bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), | ||
| 39 | buffer, false, block_height, block_depth); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | static constexpr ConversionArray morton_to_linear_fns = { | ||
| 44 | // clang-format off | ||
| 45 | MortonCopy<true, PixelFormat::ABGR8U>, | ||
| 46 | MortonCopy<true, PixelFormat::ABGR8S>, | ||
| 47 | MortonCopy<true, PixelFormat::ABGR8UI>, | ||
| 48 | MortonCopy<true, PixelFormat::B5G6R5U>, | ||
| 49 | MortonCopy<true, PixelFormat::A2B10G10R10U>, | ||
| 50 | MortonCopy<true, PixelFormat::A1B5G5R5U>, | ||
| 51 | MortonCopy<true, PixelFormat::R8U>, | ||
| 52 | MortonCopy<true, PixelFormat::R8UI>, | ||
| 53 | MortonCopy<true, PixelFormat::RGBA16F>, | ||
| 54 | MortonCopy<true, PixelFormat::RGBA16U>, | ||
| 55 | MortonCopy<true, PixelFormat::RGBA16UI>, | ||
| 56 | MortonCopy<true, PixelFormat::R11FG11FB10F>, | ||
| 57 | MortonCopy<true, PixelFormat::RGBA32UI>, | ||
| 58 | MortonCopy<true, PixelFormat::DXT1>, | ||
| 59 | MortonCopy<true, PixelFormat::DXT23>, | ||
| 60 | MortonCopy<true, PixelFormat::DXT45>, | ||
| 61 | MortonCopy<true, PixelFormat::DXN1>, | ||
| 62 | MortonCopy<true, PixelFormat::DXN2UNORM>, | ||
| 63 | MortonCopy<true, PixelFormat::DXN2SNORM>, | ||
| 64 | MortonCopy<true, PixelFormat::BC7U>, | ||
| 65 | MortonCopy<true, PixelFormat::BC6H_UF16>, | ||
| 66 | MortonCopy<true, PixelFormat::BC6H_SF16>, | ||
| 67 | MortonCopy<true, PixelFormat::ASTC_2D_4X4>, | ||
| 68 | MortonCopy<true, PixelFormat::G8R8U>, | ||
| 69 | MortonCopy<true, PixelFormat::G8R8S>, | ||
| 70 | MortonCopy<true, PixelFormat::BGRA8>, | ||
| 71 | MortonCopy<true, PixelFormat::RGBA32F>, | ||
| 72 | MortonCopy<true, PixelFormat::RG32F>, | ||
| 73 | MortonCopy<true, PixelFormat::R32F>, | ||
| 74 | MortonCopy<true, PixelFormat::R16F>, | ||
| 75 | MortonCopy<true, PixelFormat::R16U>, | ||
| 76 | MortonCopy<true, PixelFormat::R16S>, | ||
| 77 | MortonCopy<true, PixelFormat::R16UI>, | ||
| 78 | MortonCopy<true, PixelFormat::R16I>, | ||
| 79 | MortonCopy<true, PixelFormat::RG16>, | ||
| 80 | MortonCopy<true, PixelFormat::RG16F>, | ||
| 81 | MortonCopy<true, PixelFormat::RG16UI>, | ||
| 82 | MortonCopy<true, PixelFormat::RG16I>, | ||
| 83 | MortonCopy<true, PixelFormat::RG16S>, | ||
| 84 | MortonCopy<true, PixelFormat::RGB32F>, | ||
| 85 | MortonCopy<true, PixelFormat::RGBA8_SRGB>, | ||
| 86 | MortonCopy<true, PixelFormat::RG8U>, | ||
| 87 | MortonCopy<true, PixelFormat::RG8S>, | ||
| 88 | MortonCopy<true, PixelFormat::RG32UI>, | ||
| 89 | MortonCopy<true, PixelFormat::R32UI>, | ||
| 90 | MortonCopy<true, PixelFormat::ASTC_2D_8X8>, | ||
| 91 | MortonCopy<true, PixelFormat::ASTC_2D_8X5>, | ||
| 92 | MortonCopy<true, PixelFormat::ASTC_2D_5X4>, | ||
| 93 | MortonCopy<true, PixelFormat::BGRA8_SRGB>, | ||
| 94 | MortonCopy<true, PixelFormat::DXT1_SRGB>, | ||
| 95 | MortonCopy<true, PixelFormat::DXT23_SRGB>, | ||
| 96 | MortonCopy<true, PixelFormat::DXT45_SRGB>, | ||
| 97 | MortonCopy<true, PixelFormat::BC7U_SRGB>, | ||
| 98 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, | ||
| 99 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, | ||
| 100 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, | ||
| 101 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, | ||
| 102 | MortonCopy<true, PixelFormat::ASTC_2D_5X5>, | ||
| 103 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, | ||
| 104 | MortonCopy<true, PixelFormat::ASTC_2D_10X8>, | ||
| 105 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, | ||
| 106 | MortonCopy<true, PixelFormat::Z32F>, | ||
| 107 | MortonCopy<true, PixelFormat::Z16>, | ||
| 108 | MortonCopy<true, PixelFormat::Z24S8>, | ||
| 109 | MortonCopy<true, PixelFormat::S8Z24>, | ||
| 110 | MortonCopy<true, PixelFormat::Z32FS8>, | ||
| 111 | // clang-format on | ||
| 112 | }; | ||
| 113 | |||
| 114 | static constexpr ConversionArray linear_to_morton_fns = { | ||
| 115 | // clang-format off | ||
| 116 | MortonCopy<false, PixelFormat::ABGR8U>, | ||
| 117 | MortonCopy<false, PixelFormat::ABGR8S>, | ||
| 118 | MortonCopy<false, PixelFormat::ABGR8UI>, | ||
| 119 | MortonCopy<false, PixelFormat::B5G6R5U>, | ||
| 120 | MortonCopy<false, PixelFormat::A2B10G10R10U>, | ||
| 121 | MortonCopy<false, PixelFormat::A1B5G5R5U>, | ||
| 122 | MortonCopy<false, PixelFormat::R8U>, | ||
| 123 | MortonCopy<false, PixelFormat::R8UI>, | ||
| 124 | MortonCopy<false, PixelFormat::RGBA16F>, | ||
| 125 | MortonCopy<false, PixelFormat::RGBA16U>, | ||
| 126 | MortonCopy<false, PixelFormat::RGBA16UI>, | ||
| 127 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | ||
| 128 | MortonCopy<false, PixelFormat::RGBA32UI>, | ||
| 129 | MortonCopy<false, PixelFormat::DXT1>, | ||
| 130 | MortonCopy<false, PixelFormat::DXT23>, | ||
| 131 | MortonCopy<false, PixelFormat::DXT45>, | ||
| 132 | MortonCopy<false, PixelFormat::DXN1>, | ||
| 133 | MortonCopy<false, PixelFormat::DXN2UNORM>, | ||
| 134 | MortonCopy<false, PixelFormat::DXN2SNORM>, | ||
| 135 | MortonCopy<false, PixelFormat::BC7U>, | ||
| 136 | MortonCopy<false, PixelFormat::BC6H_UF16>, | ||
| 137 | MortonCopy<false, PixelFormat::BC6H_SF16>, | ||
| 138 | // TODO(Subv): Swizzling ASTC formats are not supported | ||
| 139 | nullptr, | ||
| 140 | MortonCopy<false, PixelFormat::G8R8U>, | ||
| 141 | MortonCopy<false, PixelFormat::G8R8S>, | ||
| 142 | MortonCopy<false, PixelFormat::BGRA8>, | ||
| 143 | MortonCopy<false, PixelFormat::RGBA32F>, | ||
| 144 | MortonCopy<false, PixelFormat::RG32F>, | ||
| 145 | MortonCopy<false, PixelFormat::R32F>, | ||
| 146 | MortonCopy<false, PixelFormat::R16F>, | ||
| 147 | MortonCopy<false, PixelFormat::R16U>, | ||
| 148 | MortonCopy<false, PixelFormat::R16S>, | ||
| 149 | MortonCopy<false, PixelFormat::R16UI>, | ||
| 150 | MortonCopy<false, PixelFormat::R16I>, | ||
| 151 | MortonCopy<false, PixelFormat::RG16>, | ||
| 152 | MortonCopy<false, PixelFormat::RG16F>, | ||
| 153 | MortonCopy<false, PixelFormat::RG16UI>, | ||
| 154 | MortonCopy<false, PixelFormat::RG16I>, | ||
| 155 | MortonCopy<false, PixelFormat::RG16S>, | ||
| 156 | MortonCopy<false, PixelFormat::RGB32F>, | ||
| 157 | MortonCopy<false, PixelFormat::RGBA8_SRGB>, | ||
| 158 | MortonCopy<false, PixelFormat::RG8U>, | ||
| 159 | MortonCopy<false, PixelFormat::RG8S>, | ||
| 160 | MortonCopy<false, PixelFormat::RG32UI>, | ||
| 161 | MortonCopy<false, PixelFormat::R32UI>, | ||
| 162 | nullptr, | ||
| 163 | nullptr, | ||
| 164 | nullptr, | ||
| 165 | MortonCopy<false, PixelFormat::BGRA8_SRGB>, | ||
| 166 | MortonCopy<false, PixelFormat::DXT1_SRGB>, | ||
| 167 | MortonCopy<false, PixelFormat::DXT23_SRGB>, | ||
| 168 | MortonCopy<false, PixelFormat::DXT45_SRGB>, | ||
| 169 | MortonCopy<false, PixelFormat::BC7U_SRGB>, | ||
| 170 | nullptr, | ||
| 171 | nullptr, | ||
| 172 | nullptr, | ||
| 173 | nullptr, | ||
| 174 | nullptr, | ||
| 175 | nullptr, | ||
| 176 | nullptr, | ||
| 177 | nullptr, | ||
| 178 | MortonCopy<false, PixelFormat::Z32F>, | ||
| 179 | MortonCopy<false, PixelFormat::Z16>, | ||
| 180 | MortonCopy<false, PixelFormat::Z24S8>, | ||
| 181 | MortonCopy<false, PixelFormat::S8Z24>, | ||
| 182 | MortonCopy<false, PixelFormat::Z32FS8>, | ||
| 183 | // clang-format on | ||
| 184 | }; | ||
| 185 | |||
| 186 | constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { | ||
| 187 | switch (mode) { | ||
| 188 | case MortonSwizzleMode::MortonToLinear: | ||
| 189 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | ||
| 190 | case MortonSwizzleMode::LinearToMorton: | ||
| 191 | return linear_to_morton_fns[static_cast<std::size_t>(format)]; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | /// 8x8 Z-Order coordinate from 2D coordinates | ||
| 196 | static u32 MortonInterleave(u32 x, u32 y) { | ||
| 197 | static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; | ||
| 198 | static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; | ||
| 199 | return xlut[x % 8] + ylut[y % 8]; | ||
| 200 | } | ||
| 201 | |||
| 202 | /// Calculates the offset of the position of the pixel in Morton order | ||
| 203 | static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 204 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 205 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 206 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 207 | // texels are laid out in a 2x2 subtile like this: | ||
| 208 | // 2 3 | ||
| 209 | // 0 1 | ||
| 210 | // | ||
| 211 | // The full 8x8 tile has the texels arranged like this: | ||
| 212 | // | ||
| 213 | // 42 43 46 47 58 59 62 63 | ||
| 214 | // 40 41 44 45 56 57 60 61 | ||
| 215 | // 34 35 38 39 50 51 54 55 | ||
| 216 | // 32 33 36 37 48 49 52 53 | ||
| 217 | // 10 11 14 15 26 27 30 31 | ||
| 218 | // 08 09 12 13 24 25 28 29 | ||
| 219 | // 02 03 06 07 18 19 22 23 | ||
| 220 | // 00 01 04 05 16 17 20 21 | ||
| 221 | // | ||
| 222 | // This pattern is what's called Z-order curve, or Morton order. | ||
| 223 | |||
| 224 | const unsigned int block_height = 8; | ||
| 225 | const unsigned int coarse_x = x & ~7; | ||
| 226 | |||
| 227 | u32 i = MortonInterleave(x, y); | ||
| 228 | |||
| 229 | const unsigned int offset = coarse_x * block_height; | ||
| 230 | |||
| 231 | return (i + offset) * bytes_per_pixel; | ||
| 232 | } | ||
| 233 | |||
| 234 | static u32 MortonInterleave128(u32 x, u32 y) { | ||
| 235 | // 128x128 Z-Order coordinate from 2D coordinates | ||
| 236 | static constexpr u32 xlut[] = { | ||
| 237 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, | ||
| 238 | 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, | ||
| 239 | 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, | ||
| 240 | 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, | ||
| 241 | 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, | ||
| 242 | 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, | ||
| 243 | 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, | ||
| 244 | 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, | ||
| 245 | 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, | ||
| 246 | 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, | ||
| 247 | 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, | ||
| 248 | 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, | ||
| 249 | 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, | ||
| 250 | 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, | ||
| 251 | 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, | ||
| 252 | 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, | ||
| 253 | 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, | ||
| 254 | 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, | ||
| 255 | 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, | ||
| 256 | 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, | ||
| 257 | 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, | ||
| 258 | 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, | ||
| 259 | 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, | ||
| 260 | 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, | ||
| 261 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, | ||
| 262 | 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, | ||
| 263 | 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, | ||
| 264 | 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, | ||
| 265 | 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, | ||
| 266 | 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, | ||
| 267 | 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, | ||
| 268 | 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, | ||
| 269 | 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, | ||
| 270 | 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, | ||
| 271 | 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, | ||
| 272 | }; | ||
| 273 | static constexpr u32 ylut[] = { | ||
| 274 | 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, | ||
| 275 | 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, | ||
| 276 | 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, | ||
| 277 | 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, | ||
| 278 | 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, | ||
| 279 | 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, | ||
| 280 | 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, | ||
| 281 | 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, | ||
| 282 | 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, | ||
| 283 | 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, | ||
| 284 | 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, | ||
| 285 | 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, | ||
| 286 | 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, | ||
| 287 | 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, | ||
| 288 | 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, | ||
| 289 | 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, | ||
| 290 | 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, | ||
| 291 | 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, | ||
| 292 | 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, | ||
| 293 | 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, | ||
| 294 | 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, | ||
| 295 | 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, | ||
| 296 | 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, | ||
| 297 | 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, | ||
| 298 | 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, | ||
| 299 | 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, | ||
| 300 | 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, | ||
| 301 | 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, | ||
| 302 | 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, | ||
| 303 | 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, | ||
| 304 | 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, | ||
| 305 | 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, | ||
| 306 | 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, | ||
| 307 | 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, | ||
| 308 | 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, | ||
| 309 | }; | ||
| 310 | return xlut[x % 128] + ylut[y % 128]; | ||
| 311 | } | ||
| 312 | |||
| 313 | static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 314 | // Calculates the offset of the position of the pixel in Morton order | ||
| 315 | // Framebuffer images are split into 128x128 tiles. | ||
| 316 | |||
| 317 | constexpr u32 block_height = 128; | ||
| 318 | const u32 coarse_x = x & ~127; | ||
| 319 | |||
| 320 | const u32 i = MortonInterleave128(x, y); | ||
| 321 | |||
| 322 | const u32 offset = coarse_x * block_height; | ||
| 323 | |||
| 324 | return (i + offset) * bytes_per_pixel; | ||
| 325 | } | ||
| 326 | |||
| 327 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | ||
| 328 | u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, | ||
| 329 | std::size_t buffer_size, VAddr addr) { | ||
| 330 | |||
| 331 | GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer, | ||
| 332 | buffer_size, addr); | ||
| 333 | } | ||
| 334 | |||
| 335 | void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, | ||
| 336 | u8* morton_data, u8* linear_data, bool morton_to_linear) { | ||
| 337 | u8* data_ptrs[2]; | ||
| 338 | for (u32 y = 0; y < height; ++y) { | ||
| 339 | for (u32 x = 0; x < width; ++x) { | ||
| 340 | const u32 coarse_y = y & ~127; | ||
| 341 | const u32 morton_offset = | ||
| 342 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | ||
| 343 | const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel; | ||
| 344 | |||
| 345 | data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset; | ||
| 346 | data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index]; | ||
| 347 | |||
| 348 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 349 | } | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | } // namespace VideoCore \ No newline at end of file | ||
diff --git a/src/video_core/morton.h b/src/video_core/morton.h new file mode 100644 index 000000000..b9b9eca86 --- /dev/null +++ b/src/video_core/morton.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | |||
| 12 | enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; | ||
| 13 | |||
| 14 | void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, | ||
| 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, | ||
| 16 | std::size_t buffer_size, VAddr addr); | ||
| 17 | |||
| 18 | void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, | ||
| 19 | u8* morton_data, u8* linear_data, bool morton_to_linear); | ||
| 20 | |||
| 21 | } // namespace VideoCore \ No newline at end of file | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 075192c3f..46a6c0308 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s | |||
| 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); | 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void OGLBufferCache::Map(std::size_t max_size) { | 79 | bool OGLBufferCache::Map(std::size_t max_size) { |
| 80 | bool invalidate; | 80 | bool invalidate; |
| 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = |
| 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); |
| @@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) { | |||
| 85 | if (invalidate) { | 85 | if (invalidate) { |
| 86 | InvalidateAll(); | 86 | InvalidateAll(); |
| 87 | } | 87 | } |
| 88 | return invalidate; | ||
| 88 | } | 89 | } |
| 89 | 90 | ||
| 90 | void OGLBufferCache::Unmap() { | 91 | void OGLBufferCache::Unmap() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 91fca3f6c..c11acfb79 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -50,7 +50,7 @@ public: | |||
| 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. | 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. |
| 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); | 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); |
| 52 | 52 | ||
| 53 | void Map(std::size_t max_size); | 53 | bool Map(std::size_t max_size); |
| 54 | void Unmap(); | 54 | void Unmap(); |
| 55 | 55 | ||
| 56 | GLuint GetHandle() const; | 56 | GLuint GetHandle() const; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 99004c9ad..98fb5a9aa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 176 | } | 176 | } |
| 177 | state.draw.vertex_array = VAO.handle; | 177 | state.draw.vertex_array = VAO.handle; |
| 178 | state.ApplyVertexBufferState(); | 178 | state.ApplyVertexBufferState(); |
| 179 | |||
| 180 | // Rebinding the VAO invalidates the vertex buffer bindings. | ||
| 181 | gpu.dirty_flags.vertex_array = 0xFFFFFFFF; | ||
| 179 | } | 182 | } |
| 180 | 183 | ||
| 181 | void RasterizerOpenGL::SetupVertexBuffer() { | 184 | void RasterizerOpenGL::SetupVertexBuffer() { |
| 182 | MICROPROFILE_SCOPE(OpenGL_VB); | 185 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 183 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 184 | const auto& regs = gpu.regs; | 186 | const auto& regs = gpu.regs; |
| 185 | 187 | ||
| 188 | if (!gpu.dirty_flags.vertex_array) | ||
| 189 | return; | ||
| 190 | |||
| 191 | MICROPROFILE_SCOPE(OpenGL_VB); | ||
| 192 | |||
| 186 | // Upload all guest vertex arrays sequentially to our buffer | 193 | // Upload all guest vertex arrays sequentially to our buffer |
| 187 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 194 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 195 | if (~gpu.dirty_flags.vertex_array & (1u << index)) | ||
| 196 | continue; | ||
| 197 | |||
| 188 | const auto& vertex_array = regs.vertex_array[index]; | 198 | const auto& vertex_array = regs.vertex_array[index]; |
| 189 | if (!vertex_array.IsEnabled()) | 199 | if (!vertex_array.IsEnabled()) |
| 190 | continue; | 200 | continue; |
| @@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 211 | 221 | ||
| 212 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... | 222 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... |
| 213 | state.draw.vertex_buffer = buffer_cache.GetHandle(); | 223 | state.draw.vertex_buffer = buffer_cache.GetHandle(); |
| 224 | |||
| 225 | gpu.dirty_flags.vertex_array = 0; | ||
| 214 | } | 226 | } |
| 215 | 227 | ||
| 216 | DrawParameters RasterizerOpenGL::SetupDraw() { | 228 | DrawParameters RasterizerOpenGL::SetupDraw() { |
| @@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 600 | return; | 612 | return; |
| 601 | 613 | ||
| 602 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 614 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 603 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 615 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 604 | const auto& regs = gpu.regs; | 616 | const auto& regs = gpu.regs; |
| 605 | 617 | ||
| 606 | ScopeAcquireGLContext acquire_context{emu_window}; | 618 | ScopeAcquireGLContext acquire_context{emu_window}; |
| @@ -620,7 +632,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 620 | SyncTransformFeedback(); | 632 | SyncTransformFeedback(); |
| 621 | SyncPointState(); | 633 | SyncPointState(); |
| 622 | CheckAlphaTests(); | 634 | CheckAlphaTests(); |
| 623 | 635 | SyncPolygonOffset(); | |
| 624 | // TODO(bunnei): Sync framebuffer_scale uniform here | 636 | // TODO(bunnei): Sync framebuffer_scale uniform here |
| 625 | // TODO(bunnei): Sync scissorbox uniform(s) here | 637 | // TODO(bunnei): Sync scissorbox uniform(s) here |
| 626 | 638 | ||
| @@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 653 | // Add space for at least 18 constant buffers | 665 | // Add space for at least 18 constant buffers |
| 654 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | 666 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); |
| 655 | 667 | ||
| 656 | buffer_cache.Map(buffer_size); | 668 | bool invalidate = buffer_cache.Map(buffer_size); |
| 669 | if (invalidate) { | ||
| 670 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 671 | gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF; | ||
| 672 | } | ||
| 657 | 673 | ||
| 658 | SetupVertexFormat(); | 674 | SetupVertexFormat(); |
| 659 | SetupVertexBuffer(); | 675 | SetupVertexBuffer(); |
| @@ -1195,6 +1211,16 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1195 | state.point.size = regs.point_size; | 1211 | state.point.size = regs.point_size; |
| 1196 | } | 1212 | } |
| 1197 | 1213 | ||
| 1214 | void RasterizerOpenGL::SyncPolygonOffset() { | ||
| 1215 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | ||
| 1216 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | ||
| 1217 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | ||
| 1218 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | ||
| 1219 | state.polygon_offset.units = regs.polygon_offset_units; | ||
| 1220 | state.polygon_offset.factor = regs.polygon_offset_factor; | ||
| 1221 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | ||
| 1222 | } | ||
| 1223 | |||
| 1198 | void RasterizerOpenGL::CheckAlphaTests() { | 1224 | void RasterizerOpenGL::CheckAlphaTests() { |
| 1199 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 1225 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 1200 | 1226 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f4354289c..dfb4616f2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -183,6 +183,9 @@ private: | |||
| 183 | /// Syncs Color Mask | 183 | /// Syncs Color Mask |
| 184 | void SyncColorMask(); | 184 | void SyncColorMask(); |
| 185 | 185 | ||
| 186 | /// Syncs the polygon offsets | ||
| 187 | void SyncPolygonOffset(); | ||
| 188 | |||
| 186 | /// Check asserts for alpha testing. | 189 | /// Check asserts for alpha testing. |
| 187 | void CheckAlphaTests(); | 190 | void CheckAlphaTests(); |
| 188 | 191 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 4f434fc31..d458f77e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "core/memory.h" | 15 | #include "core/memory.h" |
| 16 | #include "core/settings.h" | 16 | #include "core/settings.h" |
| 17 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/morton.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 19 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 20 | #include "video_core/renderer_opengl/gl_state.h" | 21 | #include "video_core/renderer_opengl/gl_state.h" |
| @@ -22,10 +23,11 @@ | |||
| 22 | #include "video_core/surface.h" | 23 | #include "video_core/surface.h" |
| 23 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/astc.h" |
| 24 | #include "video_core/textures/decoders.h" | 25 | #include "video_core/textures/decoders.h" |
| 25 | #include "video_core/utils.h" | ||
| 26 | 26 | ||
| 27 | namespace OpenGL { | 27 | namespace OpenGL { |
| 28 | 28 | ||
| 29 | using VideoCore::MortonSwizzle; | ||
| 30 | using VideoCore::MortonSwizzleMode; | ||
| 29 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | 31 | using VideoCore::Surface::ComponentTypeFromDepthFormat; |
| 30 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | 32 | using VideoCore::Surface::ComponentTypeFromRenderTarget; |
| 31 | using VideoCore::Surface::ComponentTypeFromTexture; | 33 | using VideoCore::Surface::ComponentTypeFromTexture; |
| @@ -370,174 +372,7 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { | |||
| 370 | return {0, actual_height, MipWidth(mip_level), 0}; | 372 | return {0, actual_height, MipWidth(mip_level), 0}; |
| 371 | } | 373 | } |
| 372 | 374 | ||
| 373 | template <bool morton_to_gl, PixelFormat format> | 375 | void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, |
| 374 | void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer, | ||
| 375 | std::size_t gl_buffer_size, VAddr addr) { | ||
| 376 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | ||
| 377 | |||
| 378 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 379 | // pixel values. | ||
| 380 | const u32 tile_size_x{GetDefaultBlockWidth(format)}; | ||
| 381 | const u32 tile_size_y{GetDefaultBlockHeight(format)}; | ||
| 382 | |||
| 383 | if (morton_to_gl) { | ||
| 384 | Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | ||
| 385 | stride, height, depth, block_height, block_depth); | ||
| 386 | } else { | ||
| 387 | Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, | ||
| 388 | (height + tile_size_y - 1) / tile_size_y, depth, | ||
| 389 | bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), | ||
| 390 | gl_buffer, false, block_height, block_depth); | ||
| 391 | } | ||
| 392 | } | ||
| 393 | |||
| 394 | using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), | ||
| 395 | VideoCore::Surface::MaxPixelFormat>; | ||
| 396 | |||
| 397 | static constexpr GLConversionArray morton_to_gl_fns = { | ||
| 398 | // clang-format off | ||
| 399 | MortonCopy<true, PixelFormat::ABGR8U>, | ||
| 400 | MortonCopy<true, PixelFormat::ABGR8S>, | ||
| 401 | MortonCopy<true, PixelFormat::ABGR8UI>, | ||
| 402 | MortonCopy<true, PixelFormat::B5G6R5U>, | ||
| 403 | MortonCopy<true, PixelFormat::A2B10G10R10U>, | ||
| 404 | MortonCopy<true, PixelFormat::A1B5G5R5U>, | ||
| 405 | MortonCopy<true, PixelFormat::R8U>, | ||
| 406 | MortonCopy<true, PixelFormat::R8UI>, | ||
| 407 | MortonCopy<true, PixelFormat::RGBA16F>, | ||
| 408 | MortonCopy<true, PixelFormat::RGBA16U>, | ||
| 409 | MortonCopy<true, PixelFormat::RGBA16UI>, | ||
| 410 | MortonCopy<true, PixelFormat::R11FG11FB10F>, | ||
| 411 | MortonCopy<true, PixelFormat::RGBA32UI>, | ||
| 412 | MortonCopy<true, PixelFormat::DXT1>, | ||
| 413 | MortonCopy<true, PixelFormat::DXT23>, | ||
| 414 | MortonCopy<true, PixelFormat::DXT45>, | ||
| 415 | MortonCopy<true, PixelFormat::DXN1>, | ||
| 416 | MortonCopy<true, PixelFormat::DXN2UNORM>, | ||
| 417 | MortonCopy<true, PixelFormat::DXN2SNORM>, | ||
| 418 | MortonCopy<true, PixelFormat::BC7U>, | ||
| 419 | MortonCopy<true, PixelFormat::BC6H_UF16>, | ||
| 420 | MortonCopy<true, PixelFormat::BC6H_SF16>, | ||
| 421 | MortonCopy<true, PixelFormat::ASTC_2D_4X4>, | ||
| 422 | MortonCopy<true, PixelFormat::G8R8U>, | ||
| 423 | MortonCopy<true, PixelFormat::G8R8S>, | ||
| 424 | MortonCopy<true, PixelFormat::BGRA8>, | ||
| 425 | MortonCopy<true, PixelFormat::RGBA32F>, | ||
| 426 | MortonCopy<true, PixelFormat::RG32F>, | ||
| 427 | MortonCopy<true, PixelFormat::R32F>, | ||
| 428 | MortonCopy<true, PixelFormat::R16F>, | ||
| 429 | MortonCopy<true, PixelFormat::R16U>, | ||
| 430 | MortonCopy<true, PixelFormat::R16S>, | ||
| 431 | MortonCopy<true, PixelFormat::R16UI>, | ||
| 432 | MortonCopy<true, PixelFormat::R16I>, | ||
| 433 | MortonCopy<true, PixelFormat::RG16>, | ||
| 434 | MortonCopy<true, PixelFormat::RG16F>, | ||
| 435 | MortonCopy<true, PixelFormat::RG16UI>, | ||
| 436 | MortonCopy<true, PixelFormat::RG16I>, | ||
| 437 | MortonCopy<true, PixelFormat::RG16S>, | ||
| 438 | MortonCopy<true, PixelFormat::RGB32F>, | ||
| 439 | MortonCopy<true, PixelFormat::RGBA8_SRGB>, | ||
| 440 | MortonCopy<true, PixelFormat::RG8U>, | ||
| 441 | MortonCopy<true, PixelFormat::RG8S>, | ||
| 442 | MortonCopy<true, PixelFormat::RG32UI>, | ||
| 443 | MortonCopy<true, PixelFormat::R32UI>, | ||
| 444 | MortonCopy<true, PixelFormat::ASTC_2D_8X8>, | ||
| 445 | MortonCopy<true, PixelFormat::ASTC_2D_8X5>, | ||
| 446 | MortonCopy<true, PixelFormat::ASTC_2D_5X4>, | ||
| 447 | MortonCopy<true, PixelFormat::BGRA8_SRGB>, | ||
| 448 | MortonCopy<true, PixelFormat::DXT1_SRGB>, | ||
| 449 | MortonCopy<true, PixelFormat::DXT23_SRGB>, | ||
| 450 | MortonCopy<true, PixelFormat::DXT45_SRGB>, | ||
| 451 | MortonCopy<true, PixelFormat::BC7U_SRGB>, | ||
| 452 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, | ||
| 453 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, | ||
| 454 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, | ||
| 455 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, | ||
| 456 | MortonCopy<true, PixelFormat::ASTC_2D_5X5>, | ||
| 457 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, | ||
| 458 | MortonCopy<true, PixelFormat::ASTC_2D_10X8>, | ||
| 459 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, | ||
| 460 | MortonCopy<true, PixelFormat::Z32F>, | ||
| 461 | MortonCopy<true, PixelFormat::Z16>, | ||
| 462 | MortonCopy<true, PixelFormat::Z24S8>, | ||
| 463 | MortonCopy<true, PixelFormat::S8Z24>, | ||
| 464 | MortonCopy<true, PixelFormat::Z32FS8>, | ||
| 465 | // clang-format on | ||
| 466 | }; | ||
| 467 | |||
| 468 | static constexpr GLConversionArray gl_to_morton_fns = { | ||
| 469 | // clang-format off | ||
| 470 | MortonCopy<false, PixelFormat::ABGR8U>, | ||
| 471 | MortonCopy<false, PixelFormat::ABGR8S>, | ||
| 472 | MortonCopy<false, PixelFormat::ABGR8UI>, | ||
| 473 | MortonCopy<false, PixelFormat::B5G6R5U>, | ||
| 474 | MortonCopy<false, PixelFormat::A2B10G10R10U>, | ||
| 475 | MortonCopy<false, PixelFormat::A1B5G5R5U>, | ||
| 476 | MortonCopy<false, PixelFormat::R8U>, | ||
| 477 | MortonCopy<false, PixelFormat::R8UI>, | ||
| 478 | MortonCopy<false, PixelFormat::RGBA16F>, | ||
| 479 | MortonCopy<false, PixelFormat::RGBA16U>, | ||
| 480 | MortonCopy<false, PixelFormat::RGBA16UI>, | ||
| 481 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | ||
| 482 | MortonCopy<false, PixelFormat::RGBA32UI>, | ||
| 483 | MortonCopy<false, PixelFormat::DXT1>, | ||
| 484 | MortonCopy<false, PixelFormat::DXT23>, | ||
| 485 | MortonCopy<false, PixelFormat::DXT45>, | ||
| 486 | MortonCopy<false, PixelFormat::DXN1>, | ||
| 487 | MortonCopy<false, PixelFormat::DXN2UNORM>, | ||
| 488 | MortonCopy<false, PixelFormat::DXN2SNORM>, | ||
| 489 | MortonCopy<false, PixelFormat::BC7U>, | ||
| 490 | MortonCopy<false, PixelFormat::BC6H_UF16>, | ||
| 491 | MortonCopy<false, PixelFormat::BC6H_SF16>, | ||
| 492 | // TODO(Subv): Swizzling ASTC formats are not supported | ||
| 493 | nullptr, | ||
| 494 | MortonCopy<false, PixelFormat::G8R8U>, | ||
| 495 | MortonCopy<false, PixelFormat::G8R8S>, | ||
| 496 | MortonCopy<false, PixelFormat::BGRA8>, | ||
| 497 | MortonCopy<false, PixelFormat::RGBA32F>, | ||
| 498 | MortonCopy<false, PixelFormat::RG32F>, | ||
| 499 | MortonCopy<false, PixelFormat::R32F>, | ||
| 500 | MortonCopy<false, PixelFormat::R16F>, | ||
| 501 | MortonCopy<false, PixelFormat::R16U>, | ||
| 502 | MortonCopy<false, PixelFormat::R16S>, | ||
| 503 | MortonCopy<false, PixelFormat::R16UI>, | ||
| 504 | MortonCopy<false, PixelFormat::R16I>, | ||
| 505 | MortonCopy<false, PixelFormat::RG16>, | ||
| 506 | MortonCopy<false, PixelFormat::RG16F>, | ||
| 507 | MortonCopy<false, PixelFormat::RG16UI>, | ||
| 508 | MortonCopy<false, PixelFormat::RG16I>, | ||
| 509 | MortonCopy<false, PixelFormat::RG16S>, | ||
| 510 | MortonCopy<false, PixelFormat::RGB32F>, | ||
| 511 | MortonCopy<false, PixelFormat::RGBA8_SRGB>, | ||
| 512 | MortonCopy<false, PixelFormat::RG8U>, | ||
| 513 | MortonCopy<false, PixelFormat::RG8S>, | ||
| 514 | MortonCopy<false, PixelFormat::RG32UI>, | ||
| 515 | MortonCopy<false, PixelFormat::R32UI>, | ||
| 516 | nullptr, | ||
| 517 | nullptr, | ||
| 518 | nullptr, | ||
| 519 | MortonCopy<false, PixelFormat::BGRA8_SRGB>, | ||
| 520 | MortonCopy<false, PixelFormat::DXT1_SRGB>, | ||
| 521 | MortonCopy<false, PixelFormat::DXT23_SRGB>, | ||
| 522 | MortonCopy<false, PixelFormat::DXT45_SRGB>, | ||
| 523 | MortonCopy<false, PixelFormat::BC7U_SRGB>, | ||
| 524 | nullptr, | ||
| 525 | nullptr, | ||
| 526 | nullptr, | ||
| 527 | nullptr, | ||
| 528 | nullptr, | ||
| 529 | nullptr, | ||
| 530 | nullptr, | ||
| 531 | nullptr, | ||
| 532 | MortonCopy<false, PixelFormat::Z32F>, | ||
| 533 | MortonCopy<false, PixelFormat::Z16>, | ||
| 534 | MortonCopy<false, PixelFormat::Z24S8>, | ||
| 535 | MortonCopy<false, PixelFormat::S8Z24>, | ||
| 536 | MortonCopy<false, PixelFormat::Z32FS8>, | ||
| 537 | // clang-format on | ||
| 538 | }; | ||
| 539 | |||
| 540 | void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params, | ||
| 541 | std::vector<u8>& gl_buffer, u32 mip_level) { | 376 | std::vector<u8>& gl_buffer, u32 mip_level) { |
| 542 | u32 depth = params.MipDepth(mip_level); | 377 | u32 depth = params.MipDepth(mip_level); |
| 543 | if (params.target == SurfaceTarget::Texture2D) { | 378 | if (params.target == SurfaceTarget::Texture2D) { |
| @@ -550,19 +385,19 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params | |||
| 550 | const u64 layer_size = params.LayerMemorySize(); | 385 | const u64 layer_size = params.LayerMemorySize(); |
| 551 | const u64 gl_size = params.LayerSizeGL(mip_level); | 386 | const u64 gl_size = params.LayerSizeGL(mip_level); |
| 552 | for (u32 i = 0; i < params.depth; i++) { | 387 | for (u32 i = 0; i < params.depth; i++) { |
| 553 | functions[static_cast<std::size_t>(params.pixel_format)]( | 388 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |
| 554 | params.MipWidth(mip_level), params.MipBlockHeight(mip_level), | 389 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |
| 555 | params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, | 390 | params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size, |
| 556 | gl_buffer.data() + offset_gl, gl_size, params.addr + offset); | 391 | params.addr + offset); |
| 557 | offset += layer_size; | 392 | offset += layer_size; |
| 558 | offset_gl += gl_size; | 393 | offset_gl += gl_size; |
| 559 | } | 394 | } |
| 560 | } else { | 395 | } else { |
| 561 | const u64 offset = params.GetMipmapLevelOffset(mip_level); | 396 | const u64 offset = params.GetMipmapLevelOffset(mip_level); |
| 562 | functions[static_cast<std::size_t>(params.pixel_format)]( | 397 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |
| 563 | params.MipWidth(mip_level), params.MipBlockHeight(mip_level), | 398 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |
| 564 | params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), | 399 | params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(), |
| 565 | gl_buffer.size(), params.addr + offset); | 400 | params.addr + offset); |
| 566 | } | 401 | } |
| 567 | } | 402 | } |
| 568 | 403 | ||
| @@ -996,7 +831,7 @@ void CachedSurface::LoadGLBuffer() { | |||
| 996 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | 831 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", |
| 997 | params.block_width, static_cast<u32>(params.target)); | 832 | params.block_width, static_cast<u32>(params.target)); |
| 998 | for (u32 i = 0; i < params.max_mip_level; i++) | 833 | for (u32 i = 0; i < params.max_mip_level; i++) |
| 999 | SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); | 834 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); |
| 1000 | } else { | 835 | } else { |
| 1001 | const auto texture_src_data{Memory::GetPointer(params.addr)}; | 836 | const auto texture_src_data{Memory::GetPointer(params.addr)}; |
| 1002 | const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; | 837 | const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; |
| @@ -1035,7 +870,7 @@ void CachedSurface::FlushGLBuffer() { | |||
| 1035 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | 870 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", |
| 1036 | params.block_width, static_cast<u32>(params.target)); | 871 | params.block_width, static_cast<u32>(params.target)); |
| 1037 | 872 | ||
| 1038 | SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); | 873 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); |
| 1039 | } else { | 874 | } else { |
| 1040 | std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); | 875 | std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); |
| 1041 | } | 876 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 97b9028c5..7c0935a4e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -500,27 +500,42 @@ public: | |||
| 500 | const Register& buf_reg) { | 500 | const Register& buf_reg) { |
| 501 | const std::string dest = GetOutputAttribute(attribute); | 501 | const std::string dest = GetOutputAttribute(attribute); |
| 502 | const std::string src = GetRegisterAsFloat(val_reg); | 502 | const std::string src = GetRegisterAsFloat(val_reg); |
| 503 | if (dest.empty()) | ||
| 504 | return; | ||
| 503 | 505 | ||
| 504 | if (!dest.empty()) { | 506 | // Can happen with unknown/unimplemented output attributes, in which case we ignore the |
| 505 | // Can happen with unknown/unimplemented output attributes, in which case we ignore the | 507 | // instruction for now. |
| 506 | // instruction for now. | 508 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { |
| 507 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { | 509 | // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry |
| 508 | // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry | 510 | // shader. These instructions use a dirty register as buffer index, to avoid some |
| 509 | // shader. These instructions use a dirty register as buffer index, to avoid some | 511 | // drivers from complaining about out of boundary writes, guard them. |
| 510 | // drivers from complaining about out of boundary writes, guard them. | 512 | const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + |
| 511 | const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + | 513 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; |
| 512 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; | 514 | shader.AddLine("amem[" + buf_index + "][" + |
| 513 | shader.AddLine("amem[" + buf_index + "][" + | 515 | std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) + |
| 514 | std::to_string(static_cast<u32>(attribute)) + ']' + | 516 | " = " + src + ';'); |
| 515 | GetSwizzle(elem) + " = " + src + ';'); | 517 | return; |
| 516 | } else { | 518 | } |
| 517 | if (attribute == Attribute::Index::PointSize) { | 519 | |
| 518 | fixed_pipeline_output_attributes_used.insert(attribute); | 520 | switch (attribute) { |
| 519 | shader.AddLine(dest + " = " + src + ';'); | 521 | case Attribute::Index::ClipDistances0123: |
| 520 | } else { | 522 | case Attribute::Index::ClipDistances4567: { |
| 521 | shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); | 523 | const u64 index = attribute == Attribute::Index::ClipDistances4567 ? 4 : 0 + elem; |
| 522 | } | 524 | UNIMPLEMENTED_IF_MSG( |
| 523 | } | 525 | ((header.vtg.clip_distances >> index) & 1) == 0, |
| 526 | "Shader is setting gl_ClipDistance{} without enabling it in the header", index); | ||
| 527 | |||
| 528 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 529 | shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';'); | ||
| 530 | break; | ||
| 531 | } | ||
| 532 | case Attribute::Index::PointSize: | ||
| 533 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 534 | shader.AddLine(dest + " = " + src + ';'); | ||
| 535 | break; | ||
| 536 | default: | ||
| 537 | shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); | ||
| 538 | break; | ||
| 524 | } | 539 | } |
| 525 | } | 540 | } |
| 526 | 541 | ||
| @@ -740,12 +755,19 @@ private: | |||
| 740 | void GenerateVertex() { | 755 | void GenerateVertex() { |
| 741 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) | 756 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) |
| 742 | return; | 757 | return; |
| 758 | bool clip_distances_declared = false; | ||
| 759 | |||
| 743 | declarations.AddLine("out gl_PerVertex {"); | 760 | declarations.AddLine("out gl_PerVertex {"); |
| 744 | ++declarations.scope; | 761 | ++declarations.scope; |
| 745 | declarations.AddLine("vec4 gl_Position;"); | 762 | declarations.AddLine("vec4 gl_Position;"); |
| 746 | for (auto& o : fixed_pipeline_output_attributes_used) { | 763 | for (auto& o : fixed_pipeline_output_attributes_used) { |
| 747 | if (o == Attribute::Index::PointSize) | 764 | if (o == Attribute::Index::PointSize) |
| 748 | declarations.AddLine("float gl_PointSize;"); | 765 | declarations.AddLine("float gl_PointSize;"); |
| 766 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | ||
| 767 | o == Attribute::Index::ClipDistances4567)) { | ||
| 768 | declarations.AddLine("float gl_ClipDistance[];"); | ||
| 769 | clip_distances_declared = true; | ||
| 770 | } | ||
| 749 | } | 771 | } |
| 750 | --declarations.scope; | 772 | --declarations.scope; |
| 751 | declarations.AddLine("};"); | 773 | declarations.AddLine("};"); |
| @@ -845,7 +867,8 @@ private: | |||
| 845 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 867 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 846 | // shader. | 868 | // shader. |
| 847 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); | 869 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); |
| 848 | return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))"; | 870 | // Config pack's first value is instance_id. |
| 871 | return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))"; | ||
| 849 | case Attribute::Index::FrontFacing: | 872 | case Attribute::Index::FrontFacing: |
| 850 | // TODO(Subv): Find out what the values are for the other elements. | 873 | // TODO(Subv): Find out what the values are for the other elements. |
| 851 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | 874 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); |
| @@ -916,6 +939,10 @@ private: | |||
| 916 | return "gl_PointSize"; | 939 | return "gl_PointSize"; |
| 917 | case Attribute::Index::Position: | 940 | case Attribute::Index::Position: |
| 918 | return "position"; | 941 | return "position"; |
| 942 | case Attribute::Index::ClipDistances0123: | ||
| 943 | case Attribute::Index::ClipDistances4567: { | ||
| 944 | return "gl_ClipDistance"; | ||
| 945 | } | ||
| 919 | default: | 946 | default: |
| 920 | const u32 index{static_cast<u32>(attribute) - | 947 | const u32 index{static_cast<u32>(attribute) - |
| 921 | static_cast<u32>(Attribute::Index::Attribute_0)}; | 948 | static_cast<u32>(Attribute::Index::Attribute_0)}; |
| @@ -1266,7 +1293,15 @@ private: | |||
| 1266 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); | 1293 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); |
| 1267 | } | 1294 | } |
| 1268 | 1295 | ||
| 1269 | void WriteTexsInstruction(const Instruction& instr, const std::string& texture) { | 1296 | void WriteTexsInstruction(const Instruction& instr, const std::string& coord, |
| 1297 | const std::string& texture) { | ||
| 1298 | // Add an extra scope and declare the texture coords inside to prevent | ||
| 1299 | // overwriting them in case they are used as outputs of the texs instruction. | ||
| 1300 | shader.AddLine('{'); | ||
| 1301 | ++shader.scope; | ||
| 1302 | shader.AddLine(coord); | ||
| 1303 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 1304 | |||
| 1270 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | 1305 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle |
| 1271 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | 1306 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 |
| 1272 | 1307 | ||
| @@ -1278,17 +1313,19 @@ private: | |||
| 1278 | 1313 | ||
| 1279 | if (written_components < 2) { | 1314 | if (written_components < 2) { |
| 1280 | // Write the first two swizzle components to gpr0 and gpr0+1 | 1315 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 1281 | regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, | 1316 | regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false, |
| 1282 | written_components % 2); | 1317 | written_components % 2); |
| 1283 | } else { | 1318 | } else { |
| 1284 | ASSERT(instr.texs.HasTwoDestinations()); | 1319 | ASSERT(instr.texs.HasTwoDestinations()); |
| 1285 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 1320 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 1286 | regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, | 1321 | regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false, |
| 1287 | written_components % 2); | 1322 | written_components % 2); |
| 1288 | } | 1323 | } |
| 1289 | 1324 | ||
| 1290 | ++written_components; | 1325 | ++written_components; |
| 1291 | } | 1326 | } |
| 1327 | --shader.scope; | ||
| 1328 | shader.AddLine('}'); | ||
| 1292 | } | 1329 | } |
| 1293 | 1330 | ||
| 1294 | static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { | 1331 | static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { |
| @@ -1685,6 +1722,26 @@ private: | |||
| 1685 | 1722 | ||
| 1686 | break; | 1723 | break; |
| 1687 | } | 1724 | } |
| 1725 | case OpCode::Type::Bfi: { | ||
| 1726 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 1727 | |||
| 1728 | const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> { | ||
| 1729 | switch (opcode->get().GetId()) { | ||
| 1730 | case OpCode::Id::BFI_IMM_R: | ||
| 1731 | return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 1732 | std::to_string(instr.alu.GetSignedImm20_20())}; | ||
| 1733 | default: | ||
| 1734 | UNREACHABLE(); | ||
| 1735 | } | ||
| 1736 | }(); | ||
| 1737 | const std::string offset = '(' + packed_shift + " & 0xff)"; | ||
| 1738 | const std::string bits = "((" + packed_shift + " >> 8) & 0xff)"; | ||
| 1739 | const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false); | ||
| 1740 | regs.SetRegisterToInteger( | ||
| 1741 | instr.gpr0, false, 0, | ||
| 1742 | "bitfieldInsert(" + base + ", " + insert + ", " + offset + ", " + bits + ')', 1, 1); | ||
| 1743 | break; | ||
| 1744 | } | ||
| 1688 | case OpCode::Type::Shift: { | 1745 | case OpCode::Type::Shift: { |
| 1689 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); | 1746 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); |
| 1690 | std::string op_b; | 1747 | std::string op_b; |
| @@ -2510,61 +2567,83 @@ private: | |||
| 2510 | const bool depth_compare = | 2567 | const bool depth_compare = |
| 2511 | instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | 2568 | instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); |
| 2512 | u32 num_coordinates = TextureCoordinates(texture_type); | 2569 | u32 num_coordinates = TextureCoordinates(texture_type); |
| 2513 | if (depth_compare) | 2570 | u32 start_index = 0; |
| 2514 | num_coordinates += 1; | 2571 | std::string array_elem; |
| 2572 | if (is_array) { | ||
| 2573 | array_elem = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2574 | start_index = 1; | ||
| 2575 | } | ||
| 2576 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 2577 | u32 start_index_b = 0; | ||
| 2578 | std::string lod_value; | ||
| 2579 | if (process_mode != Tegra::Shader::TextureProcessMode::LZ && | ||
| 2580 | process_mode != Tegra::Shader::TextureProcessMode::None) { | ||
| 2581 | start_index_b = 1; | ||
| 2582 | lod_value = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2583 | } | ||
| 2584 | |||
| 2585 | std::string depth_value; | ||
| 2586 | if (depth_compare) { | ||
| 2587 | depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b); | ||
| 2588 | } | ||
| 2589 | |||
| 2590 | bool depth_compare_extra = false; | ||
| 2515 | 2591 | ||
| 2516 | switch (num_coordinates) { | 2592 | switch (num_coordinates) { |
| 2517 | case 1: { | 2593 | case 1: { |
| 2594 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); | ||
| 2518 | if (is_array) { | 2595 | if (is_array) { |
| 2519 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); | 2596 | if (depth_compare) { |
| 2520 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2597 | coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " + |
| 2521 | coord = "vec2 coords = vec2(" + x + ", " + index + ");"; | 2598 | array_elem + ");"; |
| 2599 | } else { | ||
| 2600 | coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");"; | ||
| 2601 | } | ||
| 2522 | } else { | 2602 | } else { |
| 2523 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | 2603 | if (depth_compare) { |
| 2524 | coord = "float coords = " + x + ';'; | 2604 | coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");"; |
| 2605 | } else { | ||
| 2606 | coord = "float coords = " + x + ';'; | ||
| 2607 | } | ||
| 2525 | } | 2608 | } |
| 2526 | break; | 2609 | break; |
| 2527 | } | 2610 | } |
| 2528 | case 2: { | 2611 | case 2: { |
| 2612 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); | ||
| 2613 | const std::string y = | ||
| 2614 | regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1); | ||
| 2529 | if (is_array) { | 2615 | if (is_array) { |
| 2530 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); | 2616 | if (depth_compare) { |
| 2531 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2617 | coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value + |
| 2532 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); | 2618 | ", " + array_elem + ");"; |
| 2533 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; | 2619 | } else { |
| 2620 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");"; | ||
| 2621 | } | ||
| 2534 | } else { | 2622 | } else { |
| 2535 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | 2623 | if (depth_compare) { |
| 2536 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2624 | coord = |
| 2537 | coord = "vec2 coords = vec2(" + x + ", " + y + ");"; | 2625 | "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");"; |
| 2626 | } else { | ||
| 2627 | coord = "vec2 coords = vec2(" + x + ", " + y + ");"; | ||
| 2628 | } | ||
| 2538 | } | 2629 | } |
| 2539 | break; | 2630 | break; |
| 2540 | } | 2631 | } |
| 2541 | case 3: { | 2632 | case 3: { |
| 2542 | if (depth_compare) { | 2633 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); |
| 2543 | if (is_array) { | 2634 | const std::string y = |
| 2544 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); | 2635 | regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1); |
| 2545 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2636 | const std::string z = |
| 2546 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); | 2637 | regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2); |
| 2547 | const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); | 2638 | if (is_array) { |
| 2548 | coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + | 2639 | depth_compare_extra = depth_compare; |
| 2549 | ");"; | 2640 | coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + |
| 2550 | } else { | 2641 | array_elem + ");"; |
| 2551 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2552 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 2553 | const std::string z = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2554 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; | ||
| 2555 | } | ||
| 2556 | } else { | 2642 | } else { |
| 2557 | if (is_array) { | 2643 | if (depth_compare) { |
| 2558 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); | 2644 | coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + |
| 2559 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2645 | depth_value + ");"; |
| 2560 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); | ||
| 2561 | const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 3); | ||
| 2562 | coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + | ||
| 2563 | ");"; | ||
| 2564 | } else { | 2646 | } else { |
| 2565 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2566 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 2567 | const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); | ||
| 2568 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; | 2647 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; |
| 2569 | } | 2648 | } |
| 2570 | } | 2649 | } |
| @@ -2580,82 +2659,85 @@ private: | |||
| 2580 | coord = "vec2 coords = vec2(" + x + ", " + y + ");"; | 2659 | coord = "vec2 coords = vec2(" + x + ", " + y + ");"; |
| 2581 | texture_type = Tegra::Shader::TextureType::Texture2D; | 2660 | texture_type = Tegra::Shader::TextureType::Texture2D; |
| 2582 | } | 2661 | } |
| 2583 | // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias | ||
| 2584 | // or lod. | ||
| 2585 | 2662 | ||
| 2586 | const std::string sampler = | 2663 | const std::string sampler = |
| 2587 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 2664 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 2588 | // Add an extra scope and declare the texture coords inside to prevent | 2665 | // Add an extra scope and declare the texture coords inside to prevent |
| 2589 | // overwriting them in case they are used as outputs of the texs instruction. | 2666 | // overwriting them in case they are used as outputs of the texs instruction. |
| 2590 | 2667 | ||
| 2591 | shader.AddLine("{"); | 2668 | shader.AddLine('{'); |
| 2592 | ++shader.scope; | 2669 | ++shader.scope; |
| 2593 | shader.AddLine(coord); | 2670 | shader.AddLine(coord); |
| 2594 | std::string texture; | 2671 | std::string texture; |
| 2595 | 2672 | ||
| 2596 | switch (instr.tex.GetTextureProcessMode()) { | 2673 | switch (instr.tex.GetTextureProcessMode()) { |
| 2597 | case Tegra::Shader::TextureProcessMode::None: { | 2674 | case Tegra::Shader::TextureProcessMode::None: { |
| 2598 | texture = "texture(" + sampler + ", coords)"; | 2675 | if (!depth_compare_extra) { |
| 2676 | texture = "texture(" + sampler + ", coords)"; | ||
| 2677 | } else { | ||
| 2678 | texture = "texture(" + sampler + ", coords, " + depth_value + ')'; | ||
| 2679 | } | ||
| 2599 | break; | 2680 | break; |
| 2600 | } | 2681 | } |
| 2601 | case Tegra::Shader::TextureProcessMode::LZ: { | 2682 | case Tegra::Shader::TextureProcessMode::LZ: { |
| 2602 | texture = "textureLod(" + sampler + ", coords, 0.0)"; | 2683 | if (!depth_compare_extra) { |
| 2684 | texture = "textureLod(" + sampler + ", coords, 0.0)"; | ||
| 2685 | } else { | ||
| 2686 | texture = "texture(" + sampler + ", coords, " + depth_value + ')'; | ||
| 2687 | } | ||
| 2603 | break; | 2688 | break; |
| 2604 | } | 2689 | } |
| 2605 | case Tegra::Shader::TextureProcessMode::LB: | 2690 | case Tegra::Shader::TextureProcessMode::LB: |
| 2606 | case Tegra::Shader::TextureProcessMode::LBA: { | 2691 | case Tegra::Shader::TextureProcessMode::LBA: { |
| 2607 | const std::string bias = [&]() { | ||
| 2608 | if (depth_compare) { | ||
| 2609 | if (is_array) | ||
| 2610 | return regs.GetRegisterAsFloat(instr.gpr20.Value() + 2); | ||
| 2611 | else | ||
| 2612 | return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); | ||
| 2613 | } else { | ||
| 2614 | return regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2615 | } | ||
| 2616 | }(); | ||
| 2617 | shader.AddLine("float bias = " + bias + ';'); | ||
| 2618 | |||
| 2619 | // TODO: Figure if A suffix changes the equation at all. | 2692 | // TODO: Figure if A suffix changes the equation at all. |
| 2620 | texture = "texture(" + sampler + ", coords, bias)"; | 2693 | if (!depth_compare_extra) { |
| 2694 | texture = "texture(" + sampler + ", coords, " + lod_value + ')'; | ||
| 2695 | } else { | ||
| 2696 | texture = "texture(" + sampler + ", coords, " + depth_value + ')'; | ||
| 2697 | LOG_WARNING(HW_GPU, | ||
| 2698 | "OpenGL Limitation: can't set bias value along depth compare"); | ||
| 2699 | } | ||
| 2621 | break; | 2700 | break; |
| 2622 | } | 2701 | } |
| 2623 | case Tegra::Shader::TextureProcessMode::LL: | 2702 | case Tegra::Shader::TextureProcessMode::LL: |
| 2624 | case Tegra::Shader::TextureProcessMode::LLA: { | 2703 | case Tegra::Shader::TextureProcessMode::LLA: { |
| 2625 | const std::string lod = [&]() { | ||
| 2626 | if (num_coordinates <= 2) { | ||
| 2627 | return regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2628 | } else { | ||
| 2629 | return regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); | ||
| 2630 | } | ||
| 2631 | }(); | ||
| 2632 | shader.AddLine("float lod = " + lod + ';'); | ||
| 2633 | |||
| 2634 | // TODO: Figure if A suffix changes the equation at all. | 2704 | // TODO: Figure if A suffix changes the equation at all. |
| 2635 | texture = "textureLod(" + sampler + ", coords, lod)"; | 2705 | if (!depth_compare_extra) { |
| 2706 | texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; | ||
| 2707 | } else { | ||
| 2708 | texture = "texture(" + sampler + ", coords, " + depth_value + ')'; | ||
| 2709 | LOG_WARNING(HW_GPU, | ||
| 2710 | "OpenGL Limitation: can't set lod value along depth compare"); | ||
| 2711 | } | ||
| 2636 | break; | 2712 | break; |
| 2637 | } | 2713 | } |
| 2638 | default: { | 2714 | default: { |
| 2639 | texture = "texture(" + sampler + ", coords)"; | 2715 | if (!depth_compare_extra) { |
| 2716 | texture = "texture(" + sampler + ", coords)"; | ||
| 2717 | } else { | ||
| 2718 | texture = "texture(" + sampler + ", coords, " + depth_value + ')'; | ||
| 2719 | } | ||
| 2640 | UNIMPLEMENTED_MSG("Unhandled texture process mode {}", | 2720 | UNIMPLEMENTED_MSG("Unhandled texture process mode {}", |
| 2641 | static_cast<u32>(instr.tex.GetTextureProcessMode())); | 2721 | static_cast<u32>(instr.tex.GetTextureProcessMode())); |
| 2642 | } | 2722 | } |
| 2643 | } | 2723 | } |
| 2644 | if (!depth_compare) { | 2724 | if (!depth_compare) { |
| 2725 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2645 | std::size_t dest_elem{}; | 2726 | std::size_t dest_elem{}; |
| 2646 | for (std::size_t elem = 0; elem < 4; ++elem) { | 2727 | for (std::size_t elem = 0; elem < 4; ++elem) { |
| 2647 | if (!instr.tex.IsComponentEnabled(elem)) { | 2728 | if (!instr.tex.IsComponentEnabled(elem)) { |
| 2648 | // Skip disabled components | 2729 | // Skip disabled components |
| 2649 | continue; | 2730 | continue; |
| 2650 | } | 2731 | } |
| 2651 | regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); | 2732 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, |
| 2733 | dest_elem); | ||
| 2652 | ++dest_elem; | 2734 | ++dest_elem; |
| 2653 | } | 2735 | } |
| 2654 | } else { | 2736 | } else { |
| 2655 | regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); | 2737 | regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); |
| 2656 | } | 2738 | } |
| 2657 | --shader.scope; | 2739 | --shader.scope; |
| 2658 | shader.AddLine("}"); | 2740 | shader.AddLine('}'); |
| 2659 | break; | 2741 | break; |
| 2660 | } | 2742 | } |
| 2661 | case OpCode::Id::TEXS: { | 2743 | case OpCode::Id::TEXS: { |
| @@ -2668,41 +2750,76 @@ private: | |||
| 2668 | const bool depth_compare = | 2750 | const bool depth_compare = |
| 2669 | instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | 2751 | instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); |
| 2670 | u32 num_coordinates = TextureCoordinates(texture_type); | 2752 | u32 num_coordinates = TextureCoordinates(texture_type); |
| 2671 | if (depth_compare) | 2753 | const auto process_mode = instr.texs.GetTextureProcessMode(); |
| 2672 | num_coordinates += 1; | 2754 | std::string lod_value; |
| 2673 | 2755 | std::string coord; | |
| 2674 | // Scope to avoid variable name overlaps. | 2756 | u32 lod_offset = 0; |
| 2675 | shader.AddLine('{'); | 2757 | if (process_mode == Tegra::Shader::TextureProcessMode::LL) { |
| 2676 | ++shader.scope; | 2758 | if (num_coordinates > 2) { |
| 2759 | lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); | ||
| 2760 | lod_offset = 2; | ||
| 2761 | } else { | ||
| 2762 | lod_value = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2763 | lod_offset = 1; | ||
| 2764 | } | ||
| 2765 | } | ||
| 2677 | 2766 | ||
| 2678 | switch (num_coordinates) { | 2767 | switch (num_coordinates) { |
| 2768 | case 1: { | ||
| 2769 | coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';'; | ||
| 2770 | break; | ||
| 2771 | } | ||
| 2679 | case 2: { | 2772 | case 2: { |
| 2680 | if (is_array) { | 2773 | if (is_array) { |
| 2681 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); | 2774 | if (depth_compare) { |
| 2682 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2775 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); |
| 2683 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); | 2776 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); |
| 2684 | shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"); | 2777 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); |
| 2778 | const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); | ||
| 2779 | coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + | ||
| 2780 | ");"; | ||
| 2781 | } else { | ||
| 2782 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2783 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 2784 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2785 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; | ||
| 2786 | } | ||
| 2685 | } else { | 2787 | } else { |
| 2686 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | 2788 | if (lod_offset != 0) { |
| 2687 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); | 2789 | if (depth_compare) { |
| 2688 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | 2790 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); |
| 2791 | const std::string y = | ||
| 2792 | regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 2793 | const std::string z = | ||
| 2794 | regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset); | ||
| 2795 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; | ||
| 2796 | } else { | ||
| 2797 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2798 | const std::string y = | ||
| 2799 | regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 2800 | coord = "vec2 coords = vec2(" + x + ", " + y + ");"; | ||
| 2801 | } | ||
| 2802 | } else { | ||
| 2803 | if (depth_compare) { | ||
| 2804 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2805 | const std::string y = | ||
| 2806 | regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 2807 | const std::string z = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2808 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; | ||
| 2809 | } else { | ||
| 2810 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2811 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2812 | coord = "vec2 coords = vec2(" + x + ", " + y + ");"; | ||
| 2813 | } | ||
| 2814 | } | ||
| 2689 | } | 2815 | } |
| 2690 | break; | 2816 | break; |
| 2691 | } | 2817 | } |
| 2692 | case 3: { | 2818 | case 3: { |
| 2693 | if (is_array) { | 2819 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); |
| 2694 | const std::string index = regs.GetRegisterAsInteger(instr.gpr8); | 2820 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); |
| 2695 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2821 | const std::string z = regs.GetRegisterAsFloat(instr.gpr20); |
| 2696 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); | 2822 | coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; |
| 2697 | const std::string z = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2698 | shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + | ||
| 2699 | index + ");"); | ||
| 2700 | } else { | ||
| 2701 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2702 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 2703 | const std::string z = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2704 | shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"); | ||
| 2705 | } | ||
| 2706 | break; | 2823 | break; |
| 2707 | } | 2824 | } |
| 2708 | default: | 2825 | default: |
| @@ -2712,14 +2829,14 @@ private: | |||
| 2712 | // Fallback to interpreting as a 2D texture for now | 2829 | // Fallback to interpreting as a 2D texture for now |
| 2713 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | 2830 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); |
| 2714 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); | 2831 | const std::string y = regs.GetRegisterAsFloat(instr.gpr20); |
| 2715 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | 2832 | coord = "vec2 coords = vec2(" + x + ", " + y + ");"; |
| 2716 | texture_type = Tegra::Shader::TextureType::Texture2D; | 2833 | texture_type = Tegra::Shader::TextureType::Texture2D; |
| 2717 | is_array = false; | 2834 | is_array = false; |
| 2718 | } | 2835 | } |
| 2719 | const std::string sampler = | 2836 | const std::string sampler = |
| 2720 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 2837 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 2721 | std::string texture; | 2838 | std::string texture; |
| 2722 | switch (instr.texs.GetTextureProcessMode()) { | 2839 | switch (process_mode) { |
| 2723 | case Tegra::Shader::TextureProcessMode::None: { | 2840 | case Tegra::Shader::TextureProcessMode::None: { |
| 2724 | texture = "texture(" + sampler + ", coords)"; | 2841 | texture = "texture(" + sampler + ", coords)"; |
| 2725 | break; | 2842 | break; |
| @@ -2733,8 +2850,7 @@ private: | |||
| 2733 | break; | 2850 | break; |
| 2734 | } | 2851 | } |
| 2735 | case Tegra::Shader::TextureProcessMode::LL: { | 2852 | case Tegra::Shader::TextureProcessMode::LL: { |
| 2736 | const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); | 2853 | texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; |
| 2737 | texture = "textureLod(" + sampler + ", coords, " + op_c + ')'; | ||
| 2738 | break; | 2854 | break; |
| 2739 | } | 2855 | } |
| 2740 | default: { | 2856 | default: { |
| @@ -2744,13 +2860,11 @@ private: | |||
| 2744 | } | 2860 | } |
| 2745 | } | 2861 | } |
| 2746 | if (!depth_compare) { | 2862 | if (!depth_compare) { |
| 2747 | WriteTexsInstruction(instr, texture); | 2863 | WriteTexsInstruction(instr, coord, texture); |
| 2748 | } else { | 2864 | } else { |
| 2749 | WriteTexsInstruction(instr, "vec4(" + texture + ')'); | 2865 | WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); |
| 2750 | } | 2866 | } |
| 2751 | 2867 | ||
| 2752 | shader.AddLine('}'); | ||
| 2753 | --shader.scope; | ||
| 2754 | break; | 2868 | break; |
| 2755 | } | 2869 | } |
| 2756 | case OpCode::Id::TLDS: { | 2870 | case OpCode::Id::TLDS: { |
| @@ -2772,11 +2886,12 @@ private: | |||
| 2772 | // Scope to avoid variable name overlaps. | 2886 | // Scope to avoid variable name overlaps. |
| 2773 | shader.AddLine('{'); | 2887 | shader.AddLine('{'); |
| 2774 | ++shader.scope; | 2888 | ++shader.scope; |
| 2889 | std::string coords; | ||
| 2775 | 2890 | ||
| 2776 | switch (texture_type) { | 2891 | switch (texture_type) { |
| 2777 | case Tegra::Shader::TextureType::Texture1D: { | 2892 | case Tegra::Shader::TextureType::Texture1D: { |
| 2778 | const std::string x = regs.GetRegisterAsInteger(instr.gpr8); | 2893 | const std::string x = regs.GetRegisterAsInteger(instr.gpr8); |
| 2779 | shader.AddLine("int coords = " + x + ';'); | 2894 | coords = "float coords = " + x + ';'; |
| 2780 | break; | 2895 | break; |
| 2781 | } | 2896 | } |
| 2782 | case Tegra::Shader::TextureType::Texture2D: { | 2897 | case Tegra::Shader::TextureType::Texture2D: { |
| @@ -2784,7 +2899,8 @@ private: | |||
| 2784 | 2899 | ||
| 2785 | const std::string x = regs.GetRegisterAsInteger(instr.gpr8); | 2900 | const std::string x = regs.GetRegisterAsInteger(instr.gpr8); |
| 2786 | const std::string y = regs.GetRegisterAsInteger(instr.gpr20); | 2901 | const std::string y = regs.GetRegisterAsInteger(instr.gpr20); |
| 2787 | shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); | 2902 | // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); |
| 2903 | coords = "ivec2 coords = ivec2(" + x + ", " + y + ");"; | ||
| 2788 | extra_op_offset = 1; | 2904 | extra_op_offset = 1; |
| 2789 | break; | 2905 | break; |
| 2790 | } | 2906 | } |
| @@ -2812,7 +2928,7 @@ private: | |||
| 2812 | static_cast<u32>(instr.tlds.GetTextureProcessMode())); | 2928 | static_cast<u32>(instr.tlds.GetTextureProcessMode())); |
| 2813 | } | 2929 | } |
| 2814 | } | 2930 | } |
| 2815 | WriteTexsInstruction(instr, texture); | 2931 | WriteTexsInstruction(instr, coords, texture); |
| 2816 | 2932 | ||
| 2817 | --shader.scope; | 2933 | --shader.scope; |
| 2818 | shader.AddLine('}'); | 2934 | shader.AddLine('}'); |
| @@ -2871,14 +2987,17 @@ private: | |||
| 2871 | 2987 | ||
| 2872 | const std::string texture = "textureGather(" + sampler + ", coords, " + | 2988 | const std::string texture = "textureGather(" + sampler + ", coords, " + |
| 2873 | std::to_string(instr.tld4.component) + ')'; | 2989 | std::to_string(instr.tld4.component) + ')'; |
| 2990 | |||
| 2874 | if (!depth_compare) { | 2991 | if (!depth_compare) { |
| 2992 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2875 | std::size_t dest_elem{}; | 2993 | std::size_t dest_elem{}; |
| 2876 | for (std::size_t elem = 0; elem < 4; ++elem) { | 2994 | for (std::size_t elem = 0; elem < 4; ++elem) { |
| 2877 | if (!instr.tex.IsComponentEnabled(elem)) { | 2995 | if (!instr.tex.IsComponentEnabled(elem)) { |
| 2878 | // Skip disabled components | 2996 | // Skip disabled components |
| 2879 | continue; | 2997 | continue; |
| 2880 | } | 2998 | } |
| 2881 | regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); | 2999 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, |
| 3000 | dest_elem); | ||
| 2882 | ++dest_elem; | 3001 | ++dest_elem; |
| 2883 | } | 3002 | } |
| 2884 | } else { | 3003 | } else { |
| @@ -2899,6 +3018,7 @@ private: | |||
| 2899 | // Scope to avoid variable name overlaps. | 3018 | // Scope to avoid variable name overlaps. |
| 2900 | shader.AddLine('{'); | 3019 | shader.AddLine('{'); |
| 2901 | ++shader.scope; | 3020 | ++shader.scope; |
| 3021 | std::string coords; | ||
| 2902 | 3022 | ||
| 2903 | const bool depth_compare = | 3023 | const bool depth_compare = |
| 2904 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | 3024 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); |
| @@ -2908,20 +3028,19 @@ private: | |||
| 2908 | const std::string sampler = GetSampler( | 3028 | const std::string sampler = GetSampler( |
| 2909 | instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); | 3029 | instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); |
| 2910 | if (!depth_compare) { | 3030 | if (!depth_compare) { |
| 2911 | shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");"); | 3031 | coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; |
| 2912 | } else { | 3032 | } else { |
| 2913 | // Note: TLD4S coordinate encoding works just like TEXS's | 3033 | // Note: TLD4S coordinate encoding works just like TEXS's |
| 2914 | shader.AddLine( | 3034 | const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); |
| 2915 | "float op_y = " + regs.GetRegisterAsFloat(instr.gpr8.Value() + 1) + ';'); | 3035 | coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");"; |
| 2916 | shader.AddLine("vec3 coords = vec3(" + op_a + ", op_y, " + op_b + ");"); | ||
| 2917 | } | 3036 | } |
| 2918 | const std::string texture = "textureGather(" + sampler + ", coords, " + | 3037 | const std::string texture = "textureGather(" + sampler + ", coords, " + |
| 2919 | std::to_string(instr.tld4s.component) + ')'; | 3038 | std::to_string(instr.tld4s.component) + ')'; |
| 2920 | 3039 | ||
| 2921 | if (!depth_compare) { | 3040 | if (!depth_compare) { |
| 2922 | WriteTexsInstruction(instr, texture); | 3041 | WriteTexsInstruction(instr, coords, texture); |
| 2923 | } else { | 3042 | } else { |
| 2924 | WriteTexsInstruction(instr, "vec4(" + texture + ')'); | 3043 | WriteTexsInstruction(instr, coords, "vec4(" + texture + ')'); |
| 2925 | } | 3044 | } |
| 2926 | 3045 | ||
| 2927 | --shader.scope; | 3046 | --shader.scope; |
| @@ -3217,6 +3336,34 @@ private: | |||
| 3217 | } | 3336 | } |
| 3218 | break; | 3337 | break; |
| 3219 | } | 3338 | } |
| 3339 | case OpCode::Type::RegisterSetPredicate: { | ||
| 3340 | UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); | ||
| 3341 | |||
| 3342 | const std::string apply_mask = [&]() { | ||
| 3343 | switch (opcode->get().GetId()) { | ||
| 3344 | case OpCode::Id::R2P_IMM: | ||
| 3345 | return std::to_string(instr.r2p.immediate_mask); | ||
| 3346 | default: | ||
| 3347 | UNREACHABLE(); | ||
| 3348 | } | ||
| 3349 | }(); | ||
| 3350 | const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + | ||
| 3351 | " >> " + std::to_string(instr.r2p.byte) + ')'; | ||
| 3352 | |||
| 3353 | constexpr u64 programmable_preds = 7; | ||
| 3354 | for (u64 pred = 0; pred < programmable_preds; ++pred) { | ||
| 3355 | const auto shift = std::to_string(1 << pred); | ||
| 3356 | |||
| 3357 | shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {"); | ||
| 3358 | ++shader.scope; | ||
| 3359 | |||
| 3360 | SetPredicate(pred, '(' + mask + " & " + shift + ") != 0"); | ||
| 3361 | |||
| 3362 | --shader.scope; | ||
| 3363 | shader.AddLine('}'); | ||
| 3364 | } | ||
| 3365 | break; | ||
| 3366 | } | ||
| 3220 | case OpCode::Type::FloatSet: { | 3367 | case OpCode::Type::FloatSet: { |
| 3221 | const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), | 3368 | const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), |
| 3222 | instr.fset.abs_a != 0, instr.fset.neg_a != 0); | 3369 | instr.fset.abs_a != 0, instr.fset.neg_a != 0); |
| @@ -3254,6 +3401,10 @@ private: | |||
| 3254 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | 3401 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, |
| 3255 | 1); | 3402 | 1); |
| 3256 | } | 3403 | } |
| 3404 | if (instr.generates_cc.Value() != 0) { | ||
| 3405 | regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate); | ||
| 3406 | LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete"); | ||
| 3407 | } | ||
| 3257 | break; | 3408 | break; |
| 3258 | } | 3409 | } |
| 3259 | case OpCode::Type::IntegerSet: { | 3410 | case OpCode::Type::IntegerSet: { |
| @@ -3507,6 +3658,11 @@ private: | |||
| 3507 | regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); | 3658 | regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); |
| 3508 | break; | 3659 | break; |
| 3509 | } | 3660 | } |
| 3661 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 3662 | // Config pack's third value is Y_NEGATE's state. | ||
| 3663 | regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1); | ||
| 3664 | break; | ||
| 3665 | } | ||
| 3510 | default: { | 3666 | default: { |
| 3511 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | 3667 | UNIMPLEMENTED_MSG("Unhandled system move: {}", |
| 3512 | static_cast<u32>(instr.sys20.Value())); | 3668 | static_cast<u32>(instr.sys20.Value())); |
| @@ -3530,11 +3686,17 @@ private: | |||
| 3530 | "BRA with constant buffers are not implemented"); | 3686 | "BRA with constant buffers are not implemented"); |
| 3531 | 3687 | ||
| 3532 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 3688 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 3533 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3534 | "BRA condition code used: {}", static_cast<u32>(cc)); | ||
| 3535 | |||
| 3536 | const u32 target = offset + instr.bra.GetBranchTarget(); | 3689 | const u32 target = offset + instr.bra.GetBranchTarget(); |
| 3537 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | 3690 | if (cc != Tegra::Shader::ConditionCode::T) { |
| 3691 | const std::string condition_code = regs.GetConditionCode(cc); | ||
| 3692 | shader.AddLine("if (" + condition_code + "){"); | ||
| 3693 | shader.scope++; | ||
| 3694 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3695 | shader.scope--; | ||
| 3696 | shader.AddLine('}'); | ||
| 3697 | } else { | ||
| 3698 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3699 | } | ||
| 3538 | break; | 3700 | break; |
| 3539 | } | 3701 | } |
| 3540 | case OpCode::Id::IPA: { | 3702 | case OpCode::Id::IPA: { |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index eea090e52..23ed91e27 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -24,8 +24,7 @@ layout (location = 0) out vec4 position; | |||
| 24 | 24 | ||
| 25 | layout(std140) uniform vs_config { | 25 | layout(std140) uniform vs_config { |
| 26 | vec4 viewport_flip; | 26 | vec4 viewport_flip; |
| 27 | uvec4 instance_id; | 27 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 28 | uvec4 flip_stage; | ||
| 29 | uvec4 alpha_test; | 28 | uvec4 alpha_test; |
| 30 | }; | 29 | }; |
| 31 | )"; | 30 | )"; |
| @@ -63,7 +62,8 @@ void main() { | |||
| 63 | out += R"( | 62 | out += R"( |
| 64 | 63 | ||
| 65 | // Check if the flip stage is VertexB | 64 | // Check if the flip stage is VertexB |
| 66 | if (flip_stage[0] == 1) { | 65 | // Config pack's second value is flip_stage |
| 66 | if (config_pack[1] == 1) { | ||
| 67 | // Viewport can be flipped, which is unsupported by glViewport | 67 | // Viewport can be flipped, which is unsupported by glViewport |
| 68 | position.xy *= viewport_flip.xy; | 68 | position.xy *= viewport_flip.xy; |
| 69 | } | 69 | } |
| @@ -71,7 +71,7 @@ void main() { | |||
| 71 | 71 | ||
| 72 | // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 | 72 | // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 |
| 73 | // For now, this is here to bring order in lieu of proper emulation | 73 | // For now, this is here to bring order in lieu of proper emulation |
| 74 | if (flip_stage[0] == 1) { | 74 | if (config_pack[1] == 1) { |
| 75 | position.w = 1.0; | 75 | position.w = 1.0; |
| 76 | } | 76 | } |
| 77 | } | 77 | } |
| @@ -101,8 +101,7 @@ layout (location = 0) out vec4 position; | |||
| 101 | 101 | ||
| 102 | layout (std140) uniform gs_config { | 102 | layout (std140) uniform gs_config { |
| 103 | vec4 viewport_flip; | 103 | vec4 viewport_flip; |
| 104 | uvec4 instance_id; | 104 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 105 | uvec4 flip_stage; | ||
| 106 | uvec4 alpha_test; | 105 | uvec4 alpha_test; |
| 107 | }; | 106 | }; |
| 108 | 107 | ||
| @@ -139,8 +138,7 @@ layout (location = 0) in vec4 position; | |||
| 139 | 138 | ||
| 140 | layout (std140) uniform fs_config { | 139 | layout (std140) uniform fs_config { |
| 141 | vec4 viewport_flip; | 140 | vec4 viewport_flip; |
| 142 | uvec4 instance_id; | 141 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 143 | uvec4 flip_stage; | ||
| 144 | uvec4 alpha_test; | 142 | uvec4 alpha_test; |
| 145 | }; | 143 | }; |
| 146 | 144 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8b8869ecb..6a30c28d2 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -27,16 +27,18 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh | |||
| 27 | alpha_test.func = func; | 27 | alpha_test.func = func; |
| 28 | alpha_test.ref = regs.alpha_test_ref; | 28 | alpha_test.ref = regs.alpha_test_ref; |
| 29 | 29 | ||
| 30 | // We only assign the instance to the first component of the vector, the rest is just padding. | 30 | instance_id = state.current_instance; |
| 31 | instance_id[0] = state.current_instance; | ||
| 32 | 31 | ||
| 33 | // Assign in which stage the position has to be flipped | 32 | // Assign in which stage the position has to be flipped |
| 34 | // (the last stage before the fragment shader). | 33 | // (the last stage before the fragment shader). |
| 35 | if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { | 34 | if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { |
| 36 | flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); | 35 | flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); |
| 37 | } else { | 36 | } else { |
| 38 | flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); | 37 | flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); |
| 39 | } | 38 | } |
| 39 | |||
| 40 | // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. | ||
| 41 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f; | ||
| 40 | } | 42 | } |
| 41 | 43 | ||
| 42 | } // namespace OpenGL::GLShader | 44 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 9a5d7e289..b757f5f44 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -21,8 +21,11 @@ using Tegra::Engines::Maxwell3D; | |||
| 21 | struct MaxwellUniformData { | 21 | struct MaxwellUniformData { |
| 22 | void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); | 22 | void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); |
| 23 | alignas(16) GLvec4 viewport_flip; | 23 | alignas(16) GLvec4 viewport_flip; |
| 24 | alignas(16) GLuvec4 instance_id; | 24 | struct alignas(16) { |
| 25 | alignas(16) GLuvec4 flip_stage; | 25 | GLuint instance_id; |
| 26 | GLuint flip_stage; | ||
| 27 | GLfloat y_direction; | ||
| 28 | }; | ||
| 26 | struct alignas(16) { | 29 | struct alignas(16) { |
| 27 | GLuint enabled; | 30 | GLuint enabled; |
| 28 | GLuint func; | 31 | GLuint func; |
| @@ -30,7 +33,7 @@ struct MaxwellUniformData { | |||
| 30 | GLuint padding; | 33 | GLuint padding; |
| 31 | } alpha_test; | 34 | } alpha_test; |
| 32 | }; | 35 | }; |
| 33 | static_assert(sizeof(MaxwellUniformData) == 64, "MaxwellUniformData structure size is incorrect"); | 36 | static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect"); |
| 34 | static_assert(sizeof(MaxwellUniformData) < 16384, | 37 | static_assert(sizeof(MaxwellUniformData) < 16384, |
| 35 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | 38 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); |
| 36 | 39 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 934f4db78..b3bfad6a0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -92,6 +92,13 @@ OpenGLState::OpenGLState() { | |||
| 92 | 92 | ||
| 93 | point.size = 1; | 93 | point.size = 1; |
| 94 | fragment_color_clamp.enabled = false; | 94 | fragment_color_clamp.enabled = false; |
| 95 | |||
| 96 | polygon_offset.fill_enable = false; | ||
| 97 | polygon_offset.line_enable = false; | ||
| 98 | polygon_offset.point_enable = false; | ||
| 99 | polygon_offset.factor = 0.0f; | ||
| 100 | polygon_offset.units = 0.0f; | ||
| 101 | polygon_offset.clamp = 0.0f; | ||
| 95 | } | 102 | } |
| 96 | 103 | ||
| 97 | void OpenGLState::ApplyDefaultState() { | 104 | void OpenGLState::ApplyDefaultState() { |
| @@ -406,6 +413,55 @@ void OpenGLState::ApplyLogicOp() const { | |||
| 406 | } | 413 | } |
| 407 | } | 414 | } |
| 408 | 415 | ||
| 416 | void OpenGLState::ApplyPolygonOffset() const { | ||
| 417 | |||
| 418 | const bool fill_enable_changed = | ||
| 419 | polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; | ||
| 420 | const bool line_enable_changed = | ||
| 421 | polygon_offset.line_enable != cur_state.polygon_offset.line_enable; | ||
| 422 | const bool point_enable_changed = | ||
| 423 | polygon_offset.point_enable != cur_state.polygon_offset.point_enable; | ||
| 424 | const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; | ||
| 425 | const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; | ||
| 426 | const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; | ||
| 427 | |||
| 428 | if (fill_enable_changed) { | ||
| 429 | if (polygon_offset.fill_enable) { | ||
| 430 | glEnable(GL_POLYGON_OFFSET_FILL); | ||
| 431 | } else { | ||
| 432 | glDisable(GL_POLYGON_OFFSET_FILL); | ||
| 433 | } | ||
| 434 | } | ||
| 435 | |||
| 436 | if (line_enable_changed) { | ||
| 437 | if (polygon_offset.line_enable) { | ||
| 438 | glEnable(GL_POLYGON_OFFSET_LINE); | ||
| 439 | } else { | ||
| 440 | glDisable(GL_POLYGON_OFFSET_LINE); | ||
| 441 | } | ||
| 442 | } | ||
| 443 | |||
| 444 | if (point_enable_changed) { | ||
| 445 | if (polygon_offset.point_enable) { | ||
| 446 | glEnable(GL_POLYGON_OFFSET_POINT); | ||
| 447 | } else { | ||
| 448 | glDisable(GL_POLYGON_OFFSET_POINT); | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) && | ||
| 453 | (factor_changed || units_changed || clamp_changed)) { | ||
| 454 | |||
| 455 | if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { | ||
| 456 | glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); | ||
| 457 | } else { | ||
| 458 | glPolygonOffset(polygon_offset.factor, polygon_offset.units); | ||
| 459 | UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, | ||
| 460 | "Unimplemented Depth polygon offset clamp."); | ||
| 461 | } | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 409 | void OpenGLState::ApplyTextures() const { | 465 | void OpenGLState::ApplyTextures() const { |
| 410 | for (std::size_t i = 0; i < std::size(texture_units); ++i) { | 466 | for (std::size_t i = 0; i < std::size(texture_units); ++i) { |
| 411 | const auto& texture_unit = texture_units[i]; | 467 | const auto& texture_unit = texture_units[i]; |
| @@ -532,6 +588,7 @@ void OpenGLState::Apply() const { | |||
| 532 | ApplyLogicOp(); | 588 | ApplyLogicOp(); |
| 533 | ApplyTextures(); | 589 | ApplyTextures(); |
| 534 | ApplySamplers(); | 590 | ApplySamplers(); |
| 591 | ApplyPolygonOffset(); | ||
| 535 | cur_state = *this; | 592 | cur_state = *this; |
| 536 | } | 593 | } |
| 537 | 594 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 032fc43f0..0bf19ed07 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -176,6 +176,15 @@ public: | |||
| 176 | float size; // GL_POINT_SIZE | 176 | float size; // GL_POINT_SIZE |
| 177 | } point; | 177 | } point; |
| 178 | 178 | ||
| 179 | struct { | ||
| 180 | bool point_enable; | ||
| 181 | bool line_enable; | ||
| 182 | bool fill_enable; | ||
| 183 | GLfloat units; | ||
| 184 | GLfloat factor; | ||
| 185 | GLfloat clamp; | ||
| 186 | } polygon_offset; | ||
| 187 | |||
| 179 | std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE | 188 | std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE |
| 180 | 189 | ||
| 181 | OpenGLState(); | 190 | OpenGLState(); |
| @@ -226,6 +235,7 @@ private: | |||
| 226 | void ApplyLogicOp() const; | 235 | void ApplyLogicOp() const; |
| 227 | void ApplyTextures() const; | 236 | void ApplyTextures() const; |
| 228 | void ApplySamplers() const; | 237 | void ApplySamplers() const; |
| 238 | void ApplyPolygonOffset() const; | ||
| 229 | }; | 239 | }; |
| 230 | 240 | ||
| 231 | } // namespace OpenGL | 241 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1492e063a..4fd0d66c5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -19,9 +19,9 @@ | |||
| 19 | #include "core/settings.h" | 19 | #include "core/settings.h" |
| 20 | #include "core/telemetry_session.h" | 20 | #include "core/telemetry_session.h" |
| 21 | #include "core/tracer/recorder.h" | 21 | #include "core/tracer/recorder.h" |
| 22 | #include "video_core/morton.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 23 | #include "video_core/renderer_opengl/renderer_opengl.h" | 24 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 24 | #include "video_core/utils.h" | ||
| 25 | 25 | ||
| 26 | namespace OpenGL { | 26 | namespace OpenGL { |
| 27 | 27 | ||
diff --git a/src/video_core/utils.h b/src/video_core/utils.h deleted file mode 100644 index e0a14d48f..000000000 --- a/src/video_core/utils.h +++ /dev/null | |||
| @@ -1,164 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCore { | ||
| 10 | |||
| 11 | // 8x8 Z-Order coordinate from 2D coordinates | ||
| 12 | static inline u32 MortonInterleave(u32 x, u32 y) { | ||
| 13 | static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; | ||
| 14 | static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; | ||
| 15 | return xlut[x % 8] + ylut[y % 8]; | ||
| 16 | } | ||
| 17 | |||
| 18 | /** | ||
| 19 | * Calculates the offset of the position of the pixel in Morton order | ||
| 20 | */ | ||
| 21 | static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 22 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 23 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 24 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 25 | // texels are laid out in a 2x2 subtile like this: | ||
| 26 | // 2 3 | ||
| 27 | // 0 1 | ||
| 28 | // | ||
| 29 | // The full 8x8 tile has the texels arranged like this: | ||
| 30 | // | ||
| 31 | // 42 43 46 47 58 59 62 63 | ||
| 32 | // 40 41 44 45 56 57 60 61 | ||
| 33 | // 34 35 38 39 50 51 54 55 | ||
| 34 | // 32 33 36 37 48 49 52 53 | ||
| 35 | // 10 11 14 15 26 27 30 31 | ||
| 36 | // 08 09 12 13 24 25 28 29 | ||
| 37 | // 02 03 06 07 18 19 22 23 | ||
| 38 | // 00 01 04 05 16 17 20 21 | ||
| 39 | // | ||
| 40 | // This pattern is what's called Z-order curve, or Morton order. | ||
| 41 | |||
| 42 | const unsigned int block_height = 8; | ||
| 43 | const unsigned int coarse_x = x & ~7; | ||
| 44 | |||
| 45 | u32 i = VideoCore::MortonInterleave(x, y); | ||
| 46 | |||
| 47 | const unsigned int offset = coarse_x * block_height; | ||
| 48 | |||
| 49 | return (i + offset) * bytes_per_pixel; | ||
| 50 | } | ||
| 51 | |||
| 52 | static inline u32 MortonInterleave128(u32 x, u32 y) { | ||
| 53 | // 128x128 Z-Order coordinate from 2D coordinates | ||
| 54 | static constexpr u32 xlut[] = { | ||
| 55 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, | ||
| 56 | 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, | ||
| 57 | 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, | ||
| 58 | 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, | ||
| 59 | 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, | ||
| 60 | 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, | ||
| 61 | 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, | ||
| 62 | 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, | ||
| 63 | 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, | ||
| 64 | 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, | ||
| 65 | 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, | ||
| 66 | 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, | ||
| 67 | 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, | ||
| 68 | 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, | ||
| 69 | 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, | ||
| 70 | 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, | ||
| 71 | 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, | ||
| 72 | 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, | ||
| 73 | 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, | ||
| 74 | 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, | ||
| 75 | 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, | ||
| 76 | 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, | ||
| 77 | 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, | ||
| 78 | 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, | ||
| 79 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, | ||
| 80 | 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, | ||
| 81 | 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, | ||
| 82 | 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, | ||
| 83 | 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, | ||
| 84 | 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, | ||
| 85 | 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, | ||
| 86 | 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, | ||
| 87 | 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, | ||
| 88 | 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, | ||
| 89 | 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, | ||
| 90 | }; | ||
| 91 | static constexpr u32 ylut[] = { | ||
| 92 | 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, | ||
| 93 | 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, | ||
| 94 | 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, | ||
| 95 | 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, | ||
| 96 | 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, | ||
| 97 | 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, | ||
| 98 | 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, | ||
| 99 | 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, | ||
| 100 | 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, | ||
| 101 | 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, | ||
| 102 | 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, | ||
| 103 | 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, | ||
| 104 | 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, | ||
| 105 | 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, | ||
| 106 | 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, | ||
| 107 | 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, | ||
| 108 | 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, | ||
| 109 | 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, | ||
| 110 | 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, | ||
| 111 | 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, | ||
| 112 | 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, | ||
| 113 | 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, | ||
| 114 | 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, | ||
| 115 | 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, | ||
| 116 | 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, | ||
| 117 | 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, | ||
| 118 | 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, | ||
| 119 | 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, | ||
| 120 | 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, | ||
| 121 | 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, | ||
| 122 | 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, | ||
| 123 | 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, | ||
| 124 | 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, | ||
| 125 | 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, | ||
| 126 | 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, | ||
| 127 | }; | ||
| 128 | return xlut[x % 128] + ylut[y % 128]; | ||
| 129 | } | ||
| 130 | |||
| 131 | static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 132 | // Calculates the offset of the position of the pixel in Morton order | ||
| 133 | // Framebuffer images are split into 128x128 tiles. | ||
| 134 | |||
| 135 | const unsigned int block_height = 128; | ||
| 136 | const unsigned int coarse_x = x & ~127; | ||
| 137 | |||
| 138 | u32 i = MortonInterleave128(x, y); | ||
| 139 | |||
| 140 | const unsigned int offset = coarse_x * block_height; | ||
| 141 | |||
| 142 | return (i + offset) * bytes_per_pixel; | ||
| 143 | } | ||
| 144 | |||
| 145 | static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, | ||
| 146 | u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, | ||
| 147 | bool morton_to_gl) { | ||
| 148 | u8* data_ptrs[2]; | ||
| 149 | for (unsigned y = 0; y < height; ++y) { | ||
| 150 | for (unsigned x = 0; x < width; ++x) { | ||
| 151 | const u32 coarse_y = y & ~127; | ||
| 152 | u32 morton_offset = | ||
| 153 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | ||
| 154 | u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel; | ||
| 155 | |||
| 156 | data_ptrs[morton_to_gl] = morton_data + morton_offset; | ||
| 157 | data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; | ||
| 158 | |||
| 159 | memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | } // namespace VideoCore | ||