diff options
38 files changed, 778 insertions, 365 deletions
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml index 9975f5c49..6cd209dbf 100644 --- a/.ci/templates/build-standard.yml +++ b/.ci/templates/build-standard.yml | |||
| @@ -3,7 +3,7 @@ jobs: | |||
| 3 | displayName: 'standard' | 3 | displayName: 'standard' |
| 4 | pool: | 4 | pool: |
| 5 | vmImage: ubuntu-latest | 5 | vmImage: ubuntu-latest |
| 6 | strategy: | 6 | strategy: |
| 7 | maxParallel: 10 | 7 | maxParallel: 10 |
| 8 | matrix: | 8 | matrix: |
| 9 | windows: | 9 | windows: |
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml index 101e52996..278efb6f5 100644 --- a/.ci/templates/build-testing.yml +++ b/.ci/templates/build-testing.yml | |||
| @@ -3,7 +3,7 @@ jobs: | |||
| 3 | displayName: 'testing' | 3 | displayName: 'testing' |
| 4 | pool: | 4 | pool: |
| 5 | vmImage: ubuntu-latest | 5 | vmImage: ubuntu-latest |
| 6 | strategy: | 6 | strategy: |
| 7 | maxParallel: 10 | 7 | maxParallel: 10 |
| 8 | matrix: | 8 | matrix: |
| 9 | windows: | 9 | windows: |
diff --git a/.ci/templates/release.yml b/.ci/templates/release.yml deleted file mode 100644 index 60bebd2aa..000000000 --- a/.ci/templates/release.yml +++ /dev/null | |||
| @@ -1,29 +0,0 @@ | |||
| 1 | steps: | ||
| 2 | - task: DownloadPipelineArtifact@2 | ||
| 3 | displayName: 'Download Windows Release' | ||
| 4 | inputs: | ||
| 5 | artifactName: 'yuzu-$(BuildName)-windows-mingw' | ||
| 6 | buildType: 'current' | ||
| 7 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 8 | - task: DownloadPipelineArtifact@2 | ||
| 9 | displayName: 'Download Linux Release' | ||
| 10 | inputs: | ||
| 11 | artifactName: 'yuzu-$(BuildName)-linux' | ||
| 12 | buildType: 'current' | ||
| 13 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 14 | - task: DownloadPipelineArtifact@2 | ||
| 15 | displayName: 'Download Release Point' | ||
| 16 | inputs: | ||
| 17 | artifactName: 'yuzu-$(BuildName)-release-point' | ||
| 18 | buildType: 'current' | ||
| 19 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 20 | - script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha | ||
| 21 | displayName: 'Calculate Release Point' | ||
| 22 | - task: GitHubRelease@0 | ||
| 23 | inputs: | ||
| 24 | gitHubConnection: $(GitHubReleaseConnectionName) | ||
| 25 | repositoryName: '$(GitHubReleaseRepoName)' | ||
| 26 | action: 'create' | ||
| 27 | target: $(variables.tagcommit) | ||
| 28 | title: 'yuzu $(BuildName) #$(Build.BuildId)' | ||
| 29 | assets: '$(Build.ArtifactStagingDirectory)/*' | ||
| @@ -1,7 +1,8 @@ | |||
| 1 | yuzu emulator | 1 | yuzu emulator |
| 2 | ============= | 2 | ============= |
| 3 | [](https://travis-ci.org/yuzu-emu/yuzu) | 3 | [](https://travis-ci.org/yuzu-emu/yuzu) |
| 4 | [](https://ci.appveyor.com/project/bunnei/yuzu) | 4 | [](https://ci.appveyor.com/project/bunnei/yuzu) |
| 5 | [](https://dev.azure.com/yuzu-emu/yuzu/) | ||
| 5 | 6 | ||
| 6 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). | 7 | yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). |
| 7 | 8 | ||
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index b0ee7821a..97d5c2a8a 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp | |||
| @@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_ | |||
| 50 | 50 | ||
| 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, | 51 | static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, |
| 52 | void* user_data) { | 52 | void* user_data) { |
| 53 | auto* const system = static_cast<System*>(user_data); | ||
| 54 | |||
| 53 | ARM_Interface::ThreadContext ctx{}; | 55 | ARM_Interface::ThreadContext ctx{}; |
| 54 | Core::CurrentArmInterface().SaveContext(ctx); | 56 | system->CurrentArmInterface().SaveContext(ctx); |
| 55 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, | 57 | ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, |
| 56 | ctx.pc, ctx.cpu_registers[30]); | 58 | ctx.pc, ctx.cpu_registers[30]); |
| 57 | return {}; | 59 | |
| 60 | return false; | ||
| 58 | } | 61 | } |
| 59 | 62 | ||
| 60 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | 63 | ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { |
| @@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { | |||
| 65 | 68 | ||
| 66 | uc_hook hook{}; | 69 | uc_hook hook{}; |
| 67 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); | 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); |
| 68 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); | 71 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1)); |
| 69 | if (GDBStub::IsServerEnabled()) { | 72 | if (GDBStub::IsServerEnabled()) { |
| 70 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); | 73 | CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); |
| 71 | last_bkpt_hit = false; | 74 | last_bkpt_hit = false; |
diff --git a/src/core/core.h b/src/core/core.h index 11e73278e..8ebb385ac 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -327,10 +327,6 @@ private: | |||
| 327 | static System s_instance; | 327 | static System s_instance; |
| 328 | }; | 328 | }; |
| 329 | 329 | ||
| 330 | inline ARM_Interface& CurrentArmInterface() { | ||
| 331 | return System::GetInstance().CurrentArmInterface(); | ||
| 332 | } | ||
| 333 | |||
| 334 | inline Kernel::Process* CurrentProcess() { | 330 | inline Kernel::Process* CurrentProcess() { |
| 335 | return System::GetInstance().CurrentProcess(); | 331 | return System::GetInstance().CurrentProcess(); |
| 336 | } | 332 | } |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index db3ab14ce..92169a97b 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -184,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | void Process::Run(s32 main_thread_priority, u64 stack_size) { | 186 | void Process::Run(s32 main_thread_priority, u64 stack_size) { |
| 187 | // The kernel always ensures that the given stack size is page aligned. | 187 | AllocateMainThreadStack(stack_size); |
| 188 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | 188 | tls_region_address = CreateTLSRegion(); |
| 189 | |||
| 190 | // Allocate and map the main thread stack | ||
| 191 | // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part | ||
| 192 | // of the user address space. | ||
| 193 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 194 | vm_manager | ||
| 195 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 196 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 197 | .Unwrap(); | ||
| 198 | 189 | ||
| 199 | vm_manager.LogLayout(); | 190 | vm_manager.LogLayout(); |
| 191 | |||
| 200 | ChangeStatus(ProcessStatus::Running); | 192 | ChangeStatus(ProcessStatus::Running); |
| 201 | 193 | ||
| 202 | SetupMainThread(*this, kernel, main_thread_priority); | 194 | SetupMainThread(*this, kernel, main_thread_priority); |
| @@ -226,6 +218,9 @@ void Process::PrepareForTermination() { | |||
| 226 | stop_threads(system.Scheduler(2).GetThreadList()); | 218 | stop_threads(system.Scheduler(2).GetThreadList()); |
| 227 | stop_threads(system.Scheduler(3).GetThreadList()); | 219 | stop_threads(system.Scheduler(3).GetThreadList()); |
| 228 | 220 | ||
| 221 | FreeTLSRegion(tls_region_address); | ||
| 222 | tls_region_address = 0; | ||
| 223 | |||
| 229 | ChangeStatus(ProcessStatus::Exited); | 224 | ChangeStatus(ProcessStatus::Exited); |
| 230 | } | 225 | } |
| 231 | 226 | ||
| @@ -325,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) { | |||
| 325 | WakeupAllWaitingThreads(); | 320 | WakeupAllWaitingThreads(); |
| 326 | } | 321 | } |
| 327 | 322 | ||
| 323 | void Process::AllocateMainThreadStack(u64 stack_size) { | ||
| 324 | // The kernel always ensures that the given stack size is page aligned. | ||
| 325 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | ||
| 326 | |||
| 327 | // Allocate and map the main thread stack | ||
| 328 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 329 | vm_manager | ||
| 330 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 331 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 332 | .Unwrap(); | ||
| 333 | } | ||
| 334 | |||
| 328 | } // namespace Kernel | 335 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 3196014da..c2df451f3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -135,6 +135,11 @@ public: | |||
| 135 | return mutex; | 135 | return mutex; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /// Gets the address to the process' dedicated TLS region. | ||
| 139 | VAddr GetTLSRegionAddress() const { | ||
| 140 | return tls_region_address; | ||
| 141 | } | ||
| 142 | |||
| 138 | /// Gets the current status of the process | 143 | /// Gets the current status of the process |
| 139 | ProcessStatus GetStatus() const { | 144 | ProcessStatus GetStatus() const { |
| 140 | return status; | 145 | return status; |
| @@ -296,6 +301,9 @@ private: | |||
| 296 | /// a process signal. | 301 | /// a process signal. |
| 297 | void ChangeStatus(ProcessStatus new_status); | 302 | void ChangeStatus(ProcessStatus new_status); |
| 298 | 303 | ||
| 304 | /// Allocates the main thread stack for the process, given the stack size in bytes. | ||
| 305 | void AllocateMainThreadStack(u64 stack_size); | ||
| 306 | |||
| 299 | /// Memory manager for this process. | 307 | /// Memory manager for this process. |
| 300 | Kernel::VMManager vm_manager; | 308 | Kernel::VMManager vm_manager; |
| 301 | 309 | ||
| @@ -358,6 +366,9 @@ private: | |||
| 358 | /// variable related facilities. | 366 | /// variable related facilities. |
| 359 | Mutex mutex; | 367 | Mutex mutex; |
| 360 | 368 | ||
| 369 | /// Address indicating the location of the process' dedicated TLS region. | ||
| 370 | VAddr tls_region_address = 0; | ||
| 371 | |||
| 361 | /// Random values for svcGetInfo RandomEntropy | 372 | /// Random values for svcGetInfo RandomEntropy |
| 362 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; | 373 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; |
| 363 | 374 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index a46eed3da..1fd1a732a 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -843,9 +843,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 843 | return RESULT_SUCCESS; | 843 | return RESULT_SUCCESS; |
| 844 | 844 | ||
| 845 | case GetInfoType::UserExceptionContextAddr: | 845 | case GetInfoType::UserExceptionContextAddr: |
| 846 | LOG_WARNING(Kernel_SVC, | 846 | *result = process->GetTLSRegionAddress(); |
| 847 | "(STUBBED) Attempted to query user exception context address, returned 0"); | ||
| 848 | *result = 0; | ||
| 849 | return RESULT_SUCCESS; | 847 | return RESULT_SUCCESS; |
| 850 | 848 | ||
| 851 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: | 849 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| @@ -1739,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var | |||
| 1739 | // Wait for an address (via Address Arbiter) | 1737 | // Wait for an address (via Address Arbiter) |
| 1740 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1738 | static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1741 | s64 timeout) { | 1739 | s64 timeout) { |
| 1742 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", | 1740 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address, |
| 1743 | address, type, value, timeout); | 1741 | type, value, timeout); |
| 1744 | 1742 | ||
| 1745 | // If the passed address is a kernel virtual address, return invalid memory state. | 1743 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1746 | if (Memory::IsKernelVirtualAddress(address)) { | 1744 | if (Memory::IsKernelVirtualAddress(address)) { |
| @@ -1762,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, | |||
| 1762 | // Signals to an address (via Address Arbiter) | 1760 | // Signals to an address (via Address Arbiter) |
| 1763 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, | 1761 | static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, |
| 1764 | s32 num_to_wake) { | 1762 | s32 num_to_wake) { |
| 1765 | LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", | 1763 | LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", |
| 1766 | address, type, value, num_to_wake); | 1764 | address, type, value, num_to_wake); |
| 1767 | 1765 | ||
| 1768 | // If the passed address is a kernel virtual address, return invalid memory state. | 1766 | // If the passed address is a kernel virtual address, return invalid memory state. |
| 1769 | if (Memory::IsKernelVirtualAddress(address)) { | 1767 | if (Memory::IsKernelVirtualAddress(address)) { |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..bd036cbe8 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 089465a71..08586d33c 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 37 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 38 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 39 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 40 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 41 | } |
| 42 | break; | 42 | break; |
| 43 | } | 43 | } |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..44279de00 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..fe9fc0278 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 69 | regs.stencil_back_func_mask = 0xFFFFFFFF; | 70 | regs.stencil_back_func_mask = 0xFFFFFFFF; |
| 70 | regs.stencil_back_mask = 0xFFFFFFFF; | 71 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 71 | 72 | ||
| 73 | regs.depth_test_func = Regs::ComparisonOp::Always; | ||
| 74 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | ||
| 75 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | ||
| 76 | |||
| 72 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 77 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 73 | // register carrying a default value. Assume it's OpenGL's default (1). | 78 | // register carrying a default value. Assume it's OpenGL's default (1). |
| 74 | regs.point_size = 1.0f; | 79 | regs.point_size = 1.0f; |
| @@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 91 | regs.rt_separate_frag_data = 1; |
| 87 | } | 92 | } |
| 88 | 93 | ||
| 94 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 95 | |||
| 96 | void Maxwell3D::InitDirtySettings() { | ||
| 97 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 98 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 99 | const auto end_itr = start_itr + range; | ||
| 100 | std::fill(start_itr, end_itr, position); | ||
| 101 | }; | ||
| 102 | dirty.regs.fill(true); | ||
| 103 | |||
| 104 | // Init Render Targets | ||
| 105 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 106 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 107 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 108 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 109 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 110 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 111 | rt_dirty_reg++; | ||
| 112 | } | ||
| 113 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 116 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 117 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 118 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 119 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 120 | |||
| 121 | // Init Vertex Arrays | ||
| 122 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 123 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 124 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 125 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 126 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 127 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 128 | vertex_reg += vertex_array_size) { | ||
| 129 | set_block(vertex_reg, 3, va_reg); | ||
| 130 | // The divisor concerns vertex array instances | ||
| 131 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 132 | va_reg++; | ||
| 133 | vi_reg++; | ||
| 134 | } | ||
| 135 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 136 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 137 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 138 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 139 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 140 | vertex_reg += vertex_limit_size) { | ||
| 141 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 142 | va_reg++; | ||
| 143 | } | ||
| 144 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 145 | constexpr u32 vertex_instance_size = | ||
| 146 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 147 | constexpr u32 vertex_instance_end = | ||
| 148 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 149 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 150 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 151 | vertex_reg += vertex_instance_size) { | ||
| 152 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 153 | vi_reg++; | ||
| 154 | } | ||
| 155 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 156 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 157 | |||
| 158 | // Init Shaders | ||
| 159 | constexpr u32 shader_registers_count = | ||
| 160 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 161 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 162 | DIRTY_REGS_POS(shaders)); | ||
| 163 | |||
| 164 | // State | ||
| 165 | |||
| 166 | // Viewport | ||
| 167 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 168 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 169 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 170 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 171 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 172 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 173 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 174 | |||
| 175 | // Viewport transformation | ||
| 176 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 177 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 178 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 179 | |||
| 180 | // Cullmode | ||
| 181 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 182 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 183 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 184 | |||
| 185 | // Screen y control | ||
| 186 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 187 | |||
| 188 | // Primitive Restart | ||
| 189 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 190 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 191 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 192 | |||
| 193 | // Depth Test | ||
| 194 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 195 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 196 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 197 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 198 | |||
| 199 | // Stencil Test | ||
| 200 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 201 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 205 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 206 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 210 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 211 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 212 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 217 | |||
| 218 | // Color Mask | ||
| 219 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 221 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 222 | color_mask_dirty_reg); | ||
| 223 | // Blend State | ||
| 224 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 225 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 226 | blend_state_dirty_reg); | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 228 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 229 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 230 | blend_state_dirty_reg); | ||
| 231 | |||
| 232 | // Scissor State | ||
| 233 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 234 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 235 | scissor_test_dirty_reg); | ||
| 236 | |||
| 237 | // Polygon Offset | ||
| 238 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 240 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 241 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 242 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 243 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 244 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 245 | } | ||
| 246 | |||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 247 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 90 | // Reset the current macro. | 248 | // Reset the current macro. |
| 91 | executing_macro = 0; | 249 | executing_macro = 0; |
| @@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 108 | 266 | ||
| 109 | const u32 method = method_call.method; | 267 | const u32 method = method_call.method; |
| 110 | 268 | ||
| 269 | if (method == cb_data_state.current) { | ||
| 270 | regs.reg_array[method] = method_call.argument; | ||
| 271 | ProcessCBData(method_call.argument); | ||
| 272 | return; | ||
| 273 | } else if (cb_data_state.current != null_cb_data) { | ||
| 274 | FinishCBData(); | ||
| 275 | } | ||
| 276 | |||
| 111 | // It is an error to write to a register other than the current macro's ARG register before it | 277 | // It is an error to write to a register other than the current macro's ARG register before it |
| 112 | // has finished execution. | 278 | // has finished execution. |
| 113 | if (executing_macro != 0) { | 279 | if (executing_macro != 0) { |
| @@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 309 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 310 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 311 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 312 | const std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 313 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 314 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 315 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 316 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 317 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 318 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 319 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 320 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 321 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 322 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 323 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 324 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 325 | } |
| 190 | } | 326 | } |
| 191 | 327 | ||
| @@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 214 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | 350 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): |
| 215 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | 351 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): |
| 216 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | 352 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { |
| 217 | ProcessCBData(method_call.argument); | 353 | StartCBData(method); |
| 218 | break; | 354 | break; |
| 219 | } | 355 | } |
| 220 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 356 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { |
| @@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 397 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 398 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 399 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 400 | dirty.OnMemoryWrite(); |
| 265 | } | 401 | } |
| 266 | break; | 402 | break; |
| 267 | } | 403 | } |
| @@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 469 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 470 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 471 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 472 | break; |
| 338 | } | 473 | } |
| 339 | default: | 474 | default: |
| @@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 405 | } | 540 | } |
| 406 | 541 | ||
| 407 | void Maxwell3D::ProcessCBData(u32 value) { | 542 | void Maxwell3D::ProcessCBData(u32 value) { |
| 543 | const u32 id = cb_data_state.id; | ||
| 544 | cb_data_state.buffer[id][cb_data_state.counter] = value; | ||
| 545 | // Increment the current buffer position. | ||
| 546 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 547 | cb_data_state.counter++; | ||
| 548 | } | ||
| 549 | |||
| 550 | void Maxwell3D::StartCBData(u32 method) { | ||
| 551 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 552 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 553 | cb_data_state.id = method - first_cb_data; | ||
| 554 | cb_data_state.current = method; | ||
| 555 | cb_data_state.counter = 0; | ||
| 556 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | ||
| 557 | } | ||
| 558 | |||
| 559 | void Maxwell3D::FinishCBData() { | ||
| 408 | // Write the input value to the current const buffer at the current position. | 560 | // Write the input value to the current const buffer at the current position. |
| 409 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 561 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
| 410 | ASSERT(buffer_address != 0); | 562 | ASSERT(buffer_address != 0); |
| 411 | 563 | ||
| 412 | // Don't allow writing past the end of the buffer. | 564 | // Don't allow writing past the end of the buffer. |
| 413 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 565 | ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); |
| 414 | 566 | ||
| 415 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | 567 | const GPUVAddr address{buffer_address + cb_data_state.start_pos}; |
| 568 | const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; | ||
| 416 | 569 | ||
| 417 | u8* ptr{memory_manager.GetPointer(address)}; | 570 | const u32 id = cb_data_state.id; |
| 418 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 571 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); |
| 419 | memory_manager.Write<u32>(address, value); | 572 | dirty.OnMemoryWrite(); |
| 420 | 573 | ||
| 421 | dirty_flags.OnMemoryWrite(); | 574 | cb_data_state.id = null_cb_data; |
| 422 | 575 | cb_data_state.current = null_cb_data; | |
| 423 | // Increment the current buffer position. | ||
| 424 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 425 | } | 576 | } |
| 426 | 577 | ||
| 427 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 578 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8d15c8a48..ac300bf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1124,23 +1124,77 @@ public: | |||
| 1124 | 1124 | ||
| 1125 | State state{}; | 1125 | State state{}; |
| 1126 | 1126 | ||
| 1127 | struct DirtyFlags { | 1127 | struct DirtyRegs { |
| 1128 | std::bitset<8> color_buffer{0xFF}; | 1128 | static constexpr std::size_t NUM_REGS = 256; |
| 1129 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1129 | union { |
| 1130 | struct { | ||
| 1131 | bool null_dirty; | ||
| 1132 | |||
| 1133 | // Vertex Attributes | ||
| 1134 | bool vertex_attrib_format; | ||
| 1135 | |||
| 1136 | // Vertex Arrays | ||
| 1137 | std::array<bool, 32> vertex_array; | ||
| 1138 | |||
| 1139 | bool vertex_array_buffers; | ||
| 1140 | |||
| 1141 | // Vertex Instances | ||
| 1142 | std::array<bool, 32> vertex_instance; | ||
| 1143 | |||
| 1144 | bool vertex_instances; | ||
| 1145 | |||
| 1146 | // Render Targets | ||
| 1147 | std::array<bool, 8> render_target; | ||
| 1148 | bool depth_buffer; | ||
| 1149 | |||
| 1150 | bool render_settings; | ||
| 1151 | |||
| 1152 | // Shaders | ||
| 1153 | bool shaders; | ||
| 1154 | |||
| 1155 | // Rasterizer State | ||
| 1156 | bool viewport; | ||
| 1157 | bool clip_coefficient; | ||
| 1158 | bool cull_mode; | ||
| 1159 | bool primitive_restart; | ||
| 1160 | bool depth_test; | ||
| 1161 | bool stencil_test; | ||
| 1162 | bool blend_state; | ||
| 1163 | bool scissor_test; | ||
| 1164 | bool transform_feedback; | ||
| 1165 | bool color_mask; | ||
| 1166 | bool polygon_offset; | ||
| 1130 | 1167 | ||
| 1131 | bool vertex_attrib_format = true; | 1168 | // Complementary |
| 1132 | bool zeta_buffer = true; | 1169 | bool viewport_transform; |
| 1133 | bool shaders = true; | 1170 | bool screen_y_control; |
| 1171 | |||
| 1172 | bool memory_general; | ||
| 1173 | }; | ||
| 1174 | std::array<bool, NUM_REGS> regs; | ||
| 1175 | }; | ||
| 1176 | |||
| 1177 | void ResetVertexArrays() { | ||
| 1178 | vertex_array.fill(true); | ||
| 1179 | vertex_array_buffers = true; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | void ResetRenderTargets() { | ||
| 1183 | depth_buffer = true; | ||
| 1184 | render_target.fill(true); | ||
| 1185 | render_settings = true; | ||
| 1186 | } | ||
| 1134 | 1187 | ||
| 1135 | void OnMemoryWrite() { | 1188 | void OnMemoryWrite() { |
| 1136 | zeta_buffer = true; | ||
| 1137 | shaders = true; | 1189 | shaders = true; |
| 1138 | color_buffer.set(); | 1190 | memory_general = true; |
| 1139 | vertex_array.set(); | 1191 | ResetRenderTargets(); |
| 1192 | ResetVertexArrays(); | ||
| 1140 | } | 1193 | } |
| 1141 | }; | ||
| 1142 | 1194 | ||
| 1143 | DirtyFlags dirty_flags; | 1195 | } dirty{}; |
| 1196 | |||
| 1197 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1144 | 1198 | ||
| 1145 | /// Reads a register value located at the input method address | 1199 | /// Reads a register value located at the input method address |
| 1146 | u32 GetRegisterValue(u32 method) const; | 1200 | u32 GetRegisterValue(u32 method) const; |
| @@ -1192,6 +1246,15 @@ private: | |||
| 1192 | /// Interpreter for the macro codes uploaded to the GPU. | 1246 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1193 | MacroInterpreter macro_interpreter; | 1247 | MacroInterpreter macro_interpreter; |
| 1194 | 1248 | ||
| 1249 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||
| 1250 | struct { | ||
| 1251 | std::array<std::array<u32, 0x4000>, 16> buffer; | ||
| 1252 | u32 current{null_cb_data}; | ||
| 1253 | u32 id{null_cb_data}; | ||
| 1254 | u32 start_pos{}; | ||
| 1255 | u32 counter{}; | ||
| 1256 | } cb_data_state; | ||
| 1257 | |||
| 1195 | Upload::State upload_state; | 1258 | Upload::State upload_state; |
| 1196 | 1259 | ||
| 1197 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1260 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| @@ -1200,6 +1263,8 @@ private: | |||
| 1200 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1263 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1201 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1264 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1202 | 1265 | ||
| 1266 | void InitDirtySettings(); | ||
| 1267 | |||
| 1203 | /** | 1268 | /** |
| 1204 | * Call a macro on this engine. | 1269 | * Call a macro on this engine. |
| 1205 | * @param method Method to call | 1270 | * @param method Method to call |
| @@ -1223,7 +1288,9 @@ private: | |||
| 1223 | void ProcessSyncPoint(); | 1288 | void ProcessSyncPoint(); |
| 1224 | 1289 | ||
| 1225 | /// Handles a write to the CB_DATA[i] register. | 1290 | /// Handles a write to the CB_DATA[i] register. |
| 1291 | void StartCBData(u32 method); | ||
| 1226 | void ProcessCBData(u32 value); | 1292 | void ProcessCBData(u32 value); |
| 1293 | void FinishCBData(); | ||
| 1227 | 1294 | ||
| 1228 | /// Handles a write to the CB_BIND register. | 1295 | /// Handles a write to the CB_BIND register. |
| 1229 | void ProcessCBBind(Regs::ShaderStage stage); | 1296 | void ProcessCBBind(Regs::ShaderStage stage); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..b5f57e534 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 62 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 79d469b88..8520a0143 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -931,8 +931,6 @@ union Instruction { | |||
| 931 | } csetp; | 931 | } csetp; |
| 932 | 932 | ||
| 933 | union { | 933 | union { |
| 934 | BitField<35, 4, PredCondition> cond; | ||
| 935 | BitField<49, 1, u64> h_and; | ||
| 936 | BitField<6, 1, u64> ftz; | 934 | BitField<6, 1, u64> ftz; |
| 937 | BitField<45, 2, PredOperation> op; | 935 | BitField<45, 2, PredOperation> op; |
| 938 | BitField<3, 3, u64> pred3; | 936 | BitField<3, 3, u64> pred3; |
| @@ -940,9 +938,21 @@ union Instruction { | |||
| 940 | BitField<43, 1, u64> negate_a; | 938 | BitField<43, 1, u64> negate_a; |
| 941 | BitField<44, 1, u64> abs_a; | 939 | BitField<44, 1, u64> abs_a; |
| 942 | BitField<47, 2, HalfType> type_a; | 940 | BitField<47, 2, HalfType> type_a; |
| 943 | BitField<31, 1, u64> negate_b; | 941 | union { |
| 944 | BitField<30, 1, u64> abs_b; | 942 | BitField<35, 4, PredCondition> cond; |
| 945 | BitField<28, 2, HalfType> type_b; | 943 | BitField<49, 1, u64> h_and; |
| 944 | BitField<31, 1, u64> negate_b; | ||
| 945 | BitField<30, 1, u64> abs_b; | ||
| 946 | BitField<28, 2, HalfType> type_b; | ||
| 947 | } reg; | ||
| 948 | union { | ||
| 949 | BitField<56, 1, u64> negate_b; | ||
| 950 | BitField<54, 1, u64> abs_b; | ||
| 951 | } cbuf; | ||
| 952 | union { | ||
| 953 | BitField<49, 4, PredCondition> cond; | ||
| 954 | BitField<53, 1, u64> h_and; | ||
| 955 | } cbuf_and_imm; | ||
| 946 | BitField<42, 1, u64> neg_pred; | 956 | BitField<42, 1, u64> neg_pred; |
| 947 | BitField<39, 3, u64> pred39; | 957 | BitField<39, 3, u64> pred39; |
| 948 | } hsetp2; | 958 | } hsetp2; |
| @@ -1548,7 +1558,9 @@ public: | |||
| 1548 | HFMA2_RC, | 1558 | HFMA2_RC, |
| 1549 | HFMA2_RR, | 1559 | HFMA2_RR, |
| 1550 | HFMA2_IMM_R, | 1560 | HFMA2_IMM_R, |
| 1561 | HSETP2_C, | ||
| 1551 | HSETP2_R, | 1562 | HSETP2_R, |
| 1563 | HSETP2_IMM, | ||
| 1552 | HSET2_R, | 1564 | HSET2_R, |
| 1553 | POPC_C, | 1565 | POPC_C, |
| 1554 | POPC_R, | 1566 | POPC_R, |
| @@ -1831,7 +1843,9 @@ private: | |||
| 1831 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), | 1843 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), |
| 1832 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), | 1844 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), |
| 1833 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), | 1845 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), |
| 1834 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), | 1846 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), |
| 1847 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | ||
| 1848 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | ||
| 1835 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 1849 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 1836 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 1850 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
| 1837 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | 1851 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9cd4cf7b8..c59e687b6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -107,6 +107,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 107 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 107 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 108 | state.draw.shader_program = 0; | 108 | state.draw.shader_program = 0; |
| 109 | state.Apply(); | 109 | state.Apply(); |
| 110 | clear_framebuffer.Create(); | ||
| 110 | 111 | ||
| 111 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | 112 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 112 | CheckExtensions(); | 113 | CheckExtensions(); |
| @@ -126,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 126 | auto& gpu = system.GPU().Maxwell3D(); | 127 | auto& gpu = system.GPU().Maxwell3D(); |
| 127 | const auto& regs = gpu.regs; | 128 | const auto& regs = gpu.regs; |
| 128 | 129 | ||
| 129 | if (!gpu.dirty_flags.vertex_attrib_format) { | 130 | if (!gpu.dirty.vertex_attrib_format) { |
| 130 | return state.draw.vertex_array; | 131 | return state.draw.vertex_array; |
| 131 | } | 132 | } |
| 132 | gpu.dirty_flags.vertex_attrib_format = false; | 133 | gpu.dirty.vertex_attrib_format = false; |
| 133 | 134 | ||
| 134 | MICROPROFILE_SCOPE(OpenGL_VAO); | 135 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 135 | 136 | ||
| @@ -183,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 183 | } | 184 | } |
| 184 | 185 | ||
| 185 | // Rebinding the VAO invalidates the vertex buffer bindings. | 186 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 186 | gpu.dirty_flags.vertex_array.set(); | 187 | gpu.dirty.ResetVertexArrays(); |
| 187 | 188 | ||
| 188 | state.draw.vertex_array = vao_entry.handle; | 189 | state.draw.vertex_array = vao_entry.handle; |
| 189 | return vao_entry.handle; | 190 | return vao_entry.handle; |
| @@ -191,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 191 | 192 | ||
| 192 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 193 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { |
| 193 | auto& gpu = system.GPU().Maxwell3D(); | 194 | auto& gpu = system.GPU().Maxwell3D(); |
| 194 | const auto& regs = gpu.regs; | 195 | if (!gpu.dirty.vertex_array_buffers) |
| 195 | |||
| 196 | if (gpu.dirty_flags.vertex_array.none()) | ||
| 197 | return; | 196 | return; |
| 197 | gpu.dirty.vertex_array_buffers = false; | ||
| 198 | |||
| 199 | const auto& regs = gpu.regs; | ||
| 198 | 200 | ||
| 199 | MICROPROFILE_SCOPE(OpenGL_VB); | 201 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 200 | 202 | ||
| 201 | // Upload all guest vertex arrays sequentially to our buffer | 203 | // Upload all guest vertex arrays sequentially to our buffer |
| 202 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 204 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 203 | if (!gpu.dirty_flags.vertex_array[index]) | 205 | if (!gpu.dirty.vertex_array[index]) |
| 204 | continue; | 206 | continue; |
| 207 | gpu.dirty.vertex_array[index] = false; | ||
| 208 | gpu.dirty.vertex_instance[index] = false; | ||
| 205 | 209 | ||
| 206 | const auto& vertex_array = regs.vertex_array[index]; | 210 | const auto& vertex_array = regs.vertex_array[index]; |
| 207 | if (!vertex_array.IsEnabled()) | 211 | if (!vertex_array.IsEnabled()) |
| @@ -226,8 +230,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 226 | glVertexArrayBindingDivisor(vao, index, 0); | 230 | glVertexArrayBindingDivisor(vao, index, 0); |
| 227 | } | 231 | } |
| 228 | } | 232 | } |
| 233 | } | ||
| 234 | |||
| 235 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { | ||
| 236 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 237 | |||
| 238 | if (!gpu.dirty.vertex_instances) | ||
| 239 | return; | ||
| 240 | gpu.dirty.vertex_instances = false; | ||
| 229 | 241 | ||
| 230 | gpu.dirty_flags.vertex_array.reset(); | 242 | const auto& regs = gpu.regs; |
| 243 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 244 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 245 | if (!gpu.dirty.vertex_instance[index]) | ||
| 246 | continue; | ||
| 247 | |||
| 248 | gpu.dirty.vertex_instance[index] = false; | ||
| 249 | |||
| 250 | if (regs.instanced_arrays.IsInstancingEnabled(index) && | ||
| 251 | regs.vertex_array[index].divisor != 0) { | ||
| 252 | // Enable vertex buffer instancing with the specified divisor. | ||
| 253 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 254 | } else { | ||
| 255 | // Disable the vertex buffer instancing. | ||
| 256 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 257 | } | ||
| 258 | } | ||
| 231 | } | 259 | } |
| 232 | 260 | ||
| 233 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { | 261 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| @@ -343,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 343 | 371 | ||
| 344 | SyncClipEnabled(clip_distances); | 372 | SyncClipEnabled(clip_distances); |
| 345 | 373 | ||
| 346 | gpu.dirty_flags.shaders = false; | 374 | gpu.dirty.shaders = false; |
| 347 | } | 375 | } |
| 348 | 376 | ||
| 349 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 377 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -426,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 426 | 454 | ||
| 427 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 455 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 428 | single_color_target}; | 456 | single_color_target}; |
| 429 | if (fb_config_state == current_framebuffer_config_state && | 457 | if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) { |
| 430 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { | ||
| 431 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 458 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 432 | // single color targets). This is done because the guest registers may not change but the | 459 | // single color targets). This is done because the guest registers may not change but the |
| 433 | // host framebuffer may contain different attachments | 460 | // host framebuffer may contain different attachments |
| 434 | return current_depth_stencil_usage; | 461 | return current_depth_stencil_usage; |
| 435 | } | 462 | } |
| 463 | gpu.dirty.render_settings = false; | ||
| 436 | current_framebuffer_config_state = fb_config_state; | 464 | current_framebuffer_config_state = fb_config_state; |
| 437 | 465 | ||
| 438 | texture_cache.GuardRenderTargets(true); | 466 | texture_cache.GuardRenderTargets(true); |
| @@ -521,13 +549,65 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 521 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | 549 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; |
| 522 | } | 550 | } |
| 523 | 551 | ||
| 552 | void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 553 | bool using_depth_fb, bool using_stencil_fb) { | ||
| 554 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 555 | const auto& regs = gpu.regs; | ||
| 556 | |||
| 557 | texture_cache.GuardRenderTargets(true); | ||
| 558 | View color_surface{}; | ||
| 559 | if (using_color_fb) { | ||
| 560 | color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); | ||
| 561 | } | ||
| 562 | View depth_surface{}; | ||
| 563 | if (using_depth_fb || using_stencil_fb) { | ||
| 564 | depth_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 565 | } | ||
| 566 | texture_cache.GuardRenderTargets(false); | ||
| 567 | |||
| 568 | current_state.draw.draw_framebuffer = clear_framebuffer.handle; | ||
| 569 | current_state.ApplyFramebufferState(); | ||
| 570 | |||
| 571 | if (color_surface) { | ||
| 572 | color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 573 | } else { | ||
| 574 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 575 | } | ||
| 576 | |||
| 577 | if (depth_surface) { | ||
| 578 | const auto& params = depth_surface->GetSurfaceParams(); | ||
| 579 | switch (params.type) { | ||
| 580 | case VideoCore::Surface::SurfaceType::Depth: { | ||
| 581 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 582 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 583 | break; | ||
| 584 | } | ||
| 585 | case VideoCore::Surface::SurfaceType::DepthStencil: { | ||
| 586 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 587 | break; | ||
| 588 | } | ||
| 589 | default: { UNIMPLEMENTED(); } | ||
| 590 | } | ||
| 591 | } else { | ||
| 592 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 593 | 0); | ||
| 594 | } | ||
| 595 | } | ||
| 596 | |||
| 524 | void RasterizerOpenGL::Clear() { | 597 | void RasterizerOpenGL::Clear() { |
| 525 | const auto& regs = system.GPU().Maxwell3D().regs; | 598 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 526 | bool use_color{}; | 599 | bool use_color{}; |
| 527 | bool use_depth{}; | 600 | bool use_depth{}; |
| 528 | bool use_stencil{}; | 601 | bool use_stencil{}; |
| 529 | 602 | ||
| 530 | OpenGLState clear_state; | 603 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 604 | SCOPE_EXIT({ | ||
| 605 | prev_state.AllDirty(); | ||
| 606 | prev_state.Apply(); | ||
| 607 | }); | ||
| 608 | |||
| 609 | OpenGLState clear_state{OpenGLState::GetCurState()}; | ||
| 610 | clear_state.SetDefaultViewports(); | ||
| 531 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 611 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 532 | regs.clear_buffers.A) { | 612 | regs.clear_buffers.A) { |
| 533 | use_color = true; | 613 | use_color = true; |
| @@ -547,6 +627,7 @@ void RasterizerOpenGL::Clear() { | |||
| 547 | // true. | 627 | // true. |
| 548 | clear_state.depth.test_enabled = true; | 628 | clear_state.depth.test_enabled = true; |
| 549 | clear_state.depth.test_func = GL_ALWAYS; | 629 | clear_state.depth.test_func = GL_ALWAYS; |
| 630 | clear_state.depth.write_mask = GL_TRUE; | ||
| 550 | } | 631 | } |
| 551 | if (regs.clear_buffers.S) { | 632 | if (regs.clear_buffers.S) { |
| 552 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); | 633 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); |
| @@ -583,8 +664,9 @@ void RasterizerOpenGL::Clear() { | |||
| 583 | return; | 664 | return; |
| 584 | } | 665 | } |
| 585 | 666 | ||
| 586 | const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( | 667 | ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); |
| 587 | clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); | 668 | |
| 669 | SyncViewport(clear_state); | ||
| 588 | if (regs.clear_flags.scissor) { | 670 | if (regs.clear_flags.scissor) { |
| 589 | SyncScissorTest(clear_state); | 671 | SyncScissorTest(clear_state); |
| 590 | } | 672 | } |
| @@ -593,21 +675,18 @@ void RasterizerOpenGL::Clear() { | |||
| 593 | clear_state.EmulateViewportWithScissor(); | 675 | clear_state.EmulateViewportWithScissor(); |
| 594 | } | 676 | } |
| 595 | 677 | ||
| 596 | clear_state.ApplyColorMask(); | 678 | clear_state.AllDirty(); |
| 597 | clear_state.ApplyDepth(); | 679 | clear_state.Apply(); |
| 598 | clear_state.ApplyStencilTest(); | ||
| 599 | clear_state.ApplyViewport(); | ||
| 600 | clear_state.ApplyFramebufferState(); | ||
| 601 | 680 | ||
| 602 | if (use_color) { | 681 | if (use_color) { |
| 603 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 682 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); |
| 604 | } | 683 | } |
| 605 | 684 | ||
| 606 | if (clear_depth && clear_stencil) { | 685 | if (use_depth && use_stencil) { |
| 607 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 686 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 608 | } else if (clear_depth) { | 687 | } else if (use_depth) { |
| 609 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); | 688 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); |
| 610 | } else if (clear_stencil) { | 689 | } else if (use_stencil) { |
| 611 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 690 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 612 | } | 691 | } |
| 613 | } | 692 | } |
| @@ -663,6 +742,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 663 | 742 | ||
| 664 | // Upload vertex and index data. | 743 | // Upload vertex and index data. |
| 665 | SetupVertexBuffer(vao); | 744 | SetupVertexBuffer(vao); |
| 745 | SetupVertexInstances(vao); | ||
| 666 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | 746 | const GLintptr index_buffer_offset = SetupIndexBuffer(); |
| 667 | 747 | ||
| 668 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | 748 | // Setup draw parameters. It will automatically choose what glDraw* method to use. |
| @@ -689,7 +769,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 689 | 769 | ||
| 690 | if (invalidate) { | 770 | if (invalidate) { |
| 691 | // As all cached buffers are invalidated, we need to recheck their state. | 771 | // As all cached buffers are invalidated, we need to recheck their state. |
| 692 | gpu.dirty_flags.vertex_array.set(); | 772 | gpu.dirty.ResetVertexArrays(); |
| 693 | } | 773 | } |
| 694 | 774 | ||
| 695 | shader_program_manager->ApplyTo(state); | 775 | shader_program_manager->ApplyTo(state); |
| @@ -702,6 +782,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 702 | params.DispatchDraw(); | 782 | params.DispatchDraw(); |
| 703 | 783 | ||
| 704 | accelerate_draw = AccelDraw::Disabled; | 784 | accelerate_draw = AccelDraw::Disabled; |
| 785 | gpu.dirty.memory_general = false; | ||
| 705 | } | 786 | } |
| 706 | 787 | ||
| 707 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 788 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -976,10 +1057,11 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 976 | } | 1057 | } |
| 977 | 1058 | ||
| 978 | void RasterizerOpenGL::SyncCullMode() { | 1059 | void RasterizerOpenGL::SyncCullMode() { |
| 979 | const auto& regs = system.GPU().Maxwell3D().regs; | 1060 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 980 | 1061 | ||
| 981 | state.cull.enabled = regs.cull.enabled != 0; | 1062 | const auto& regs = maxwell3d.regs; |
| 982 | 1063 | ||
| 1064 | state.cull.enabled = regs.cull.enabled != 0; | ||
| 983 | if (state.cull.enabled) { | 1065 | if (state.cull.enabled) { |
| 984 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 1066 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); |
| 985 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); | 1067 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); |
| @@ -1012,16 +1094,21 @@ void RasterizerOpenGL::SyncDepthTestState() { | |||
| 1012 | state.depth.test_enabled = regs.depth_test_enable != 0; | 1094 | state.depth.test_enabled = regs.depth_test_enable != 0; |
| 1013 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; | 1095 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; |
| 1014 | 1096 | ||
| 1015 | if (!state.depth.test_enabled) | 1097 | if (!state.depth.test_enabled) { |
| 1016 | return; | 1098 | return; |
| 1099 | } | ||
| 1017 | 1100 | ||
| 1018 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); | 1101 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); |
| 1019 | } | 1102 | } |
| 1020 | 1103 | ||
| 1021 | void RasterizerOpenGL::SyncStencilTestState() { | 1104 | void RasterizerOpenGL::SyncStencilTestState() { |
| 1022 | const auto& regs = system.GPU().Maxwell3D().regs; | 1105 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1023 | state.stencil.test_enabled = regs.stencil_enable != 0; | 1106 | if (!maxwell3d.dirty.stencil_test) { |
| 1107 | return; | ||
| 1108 | } | ||
| 1109 | const auto& regs = maxwell3d.regs; | ||
| 1024 | 1110 | ||
| 1111 | state.stencil.test_enabled = regs.stencil_enable != 0; | ||
| 1025 | if (!regs.stencil_enable) { | 1112 | if (!regs.stencil_enable) { |
| 1026 | return; | 1113 | return; |
| 1027 | } | 1114 | } |
| @@ -1050,10 +1137,17 @@ void RasterizerOpenGL::SyncStencilTestState() { | |||
| 1050 | state.stencil.back.action_depth_fail = GL_KEEP; | 1137 | state.stencil.back.action_depth_fail = GL_KEEP; |
| 1051 | state.stencil.back.action_depth_pass = GL_KEEP; | 1138 | state.stencil.back.action_depth_pass = GL_KEEP; |
| 1052 | } | 1139 | } |
| 1140 | state.MarkDirtyStencilState(); | ||
| 1141 | maxwell3d.dirty.stencil_test = false; | ||
| 1053 | } | 1142 | } |
| 1054 | 1143 | ||
| 1055 | void RasterizerOpenGL::SyncColorMask() { | 1144 | void RasterizerOpenGL::SyncColorMask() { |
| 1056 | const auto& regs = system.GPU().Maxwell3D().regs; | 1145 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1146 | if (!maxwell3d.dirty.color_mask) { | ||
| 1147 | return; | ||
| 1148 | } | ||
| 1149 | const auto& regs = maxwell3d.regs; | ||
| 1150 | |||
| 1057 | const std::size_t count = | 1151 | const std::size_t count = |
| 1058 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; | 1152 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; |
| 1059 | for (std::size_t i = 0; i < count; i++) { | 1153 | for (std::size_t i = 0; i < count; i++) { |
| @@ -1064,6 +1158,9 @@ void RasterizerOpenGL::SyncColorMask() { | |||
| 1064 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; | 1158 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; |
| 1065 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; | 1159 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; |
| 1066 | } | 1160 | } |
| 1161 | |||
| 1162 | state.MarkDirtyColorMask(); | ||
| 1163 | maxwell3d.dirty.color_mask = false; | ||
| 1067 | } | 1164 | } |
| 1068 | 1165 | ||
| 1069 | void RasterizerOpenGL::SyncMultiSampleState() { | 1166 | void RasterizerOpenGL::SyncMultiSampleState() { |
| @@ -1078,7 +1175,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() { | |||
| 1078 | } | 1175 | } |
| 1079 | 1176 | ||
| 1080 | void RasterizerOpenGL::SyncBlendState() { | 1177 | void RasterizerOpenGL::SyncBlendState() { |
| 1081 | const auto& regs = system.GPU().Maxwell3D().regs; | 1178 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1179 | if (!maxwell3d.dirty.blend_state) { | ||
| 1180 | return; | ||
| 1181 | } | ||
| 1182 | const auto& regs = maxwell3d.regs; | ||
| 1082 | 1183 | ||
| 1083 | state.blend_color.red = regs.blend_color.r; | 1184 | state.blend_color.red = regs.blend_color.r; |
| 1084 | state.blend_color.green = regs.blend_color.g; | 1185 | state.blend_color.green = regs.blend_color.g; |
| @@ -1101,6 +1202,8 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1101 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 1202 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
| 1102 | state.blend[i].enabled = false; | 1203 | state.blend[i].enabled = false; |
| 1103 | } | 1204 | } |
| 1205 | maxwell3d.dirty.blend_state = false; | ||
| 1206 | state.MarkDirtyBlendState(); | ||
| 1104 | return; | 1207 | return; |
| 1105 | } | 1208 | } |
| 1106 | 1209 | ||
| @@ -1117,6 +1220,9 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1117 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | 1220 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); |
| 1118 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | 1221 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); |
| 1119 | } | 1222 | } |
| 1223 | |||
| 1224 | state.MarkDirtyBlendState(); | ||
| 1225 | maxwell3d.dirty.blend_state = false; | ||
| 1120 | } | 1226 | } |
| 1121 | 1227 | ||
| 1122 | void RasterizerOpenGL::SyncLogicOpState() { | 1228 | void RasterizerOpenGL::SyncLogicOpState() { |
| @@ -1168,13 +1274,21 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1168 | } | 1274 | } |
| 1169 | 1275 | ||
| 1170 | void RasterizerOpenGL::SyncPolygonOffset() { | 1276 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1171 | const auto& regs = system.GPU().Maxwell3D().regs; | 1277 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1278 | if (!maxwell3d.dirty.polygon_offset) { | ||
| 1279 | return; | ||
| 1280 | } | ||
| 1281 | const auto& regs = maxwell3d.regs; | ||
| 1282 | |||
| 1172 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | 1283 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; |
| 1173 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | 1284 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; |
| 1174 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | 1285 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; |
| 1175 | state.polygon_offset.units = regs.polygon_offset_units; | 1286 | state.polygon_offset.units = regs.polygon_offset_units; |
| 1176 | state.polygon_offset.factor = regs.polygon_offset_factor; | 1287 | state.polygon_offset.factor = regs.polygon_offset_factor; |
| 1177 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | 1288 | state.polygon_offset.clamp = regs.polygon_offset_clamp; |
| 1289 | |||
| 1290 | state.MarkDirtyPolygonOffset(); | ||
| 1291 | maxwell3d.dirty.polygon_offset = false; | ||
| 1178 | } | 1292 | } |
| 1179 | 1293 | ||
| 1180 | void RasterizerOpenGL::SyncAlphaTest() { | 1294 | void RasterizerOpenGL::SyncAlphaTest() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b2b671230..8b123c48d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -109,6 +109,9 @@ private: | |||
| 109 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, | 109 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, |
| 110 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); | 110 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); |
| 111 | 111 | ||
| 112 | void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 113 | bool using_depth_fb, bool using_stencil_fb); | ||
| 114 | |||
| 112 | /// Configures the current constbuffers to use for the draw command. | 115 | /// Configures the current constbuffers to use for the draw command. |
| 113 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 116 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 114 | const Shader& shader); | 117 | const Shader& shader); |
| @@ -227,6 +230,7 @@ private: | |||
| 227 | GLuint SetupVertexFormat(); | 230 | GLuint SetupVertexFormat(); |
| 228 | 231 | ||
| 229 | void SetupVertexBuffer(GLuint vao); | 232 | void SetupVertexBuffer(GLuint vao); |
| 233 | void SetupVertexInstances(GLuint vao); | ||
| 230 | 234 | ||
| 231 | GLintptr SetupIndexBuffer(); | 235 | GLintptr SetupIndexBuffer(); |
| 232 | 236 | ||
| @@ -237,6 +241,8 @@ private: | |||
| 237 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 241 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 238 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 242 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 239 | 243 | ||
| 244 | OGLFramebuffer clear_framebuffer; | ||
| 245 | |||
| 240 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 246 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 241 | CachedPageMap cached_pages; | 247 | CachedPageMap cached_pages; |
| 242 | }; | 248 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 865c191bd..1c90facc3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -628,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 628 | } | 628 | } |
| 629 | 629 | ||
| 630 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 630 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 631 | if (!system.GPU().Maxwell3D().dirty_flags.shaders) { | 631 | if (!system.GPU().Maxwell3D().dirty.shaders) { |
| 632 | return last_shaders[static_cast<std::size_t>(program)]; | 632 | return last_shaders[static_cast<std::size_t>(program)]; |
| 633 | } | 633 | } |
| 634 | 634 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 50b616be4..ffe26b241 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -257,10 +257,6 @@ public: | |||
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | private: | 259 | private: |
| 260 | using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); | ||
| 261 | using OperationDecompilersArray = | ||
| 262 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 263 | |||
| 264 | void DeclareVertex() { | 260 | void DeclareVertex() { |
| 265 | if (!IsVertexShader(stage)) | 261 | if (!IsVertexShader(stage)) |
| 266 | return; | 262 | return; |
| @@ -1414,14 +1410,10 @@ private: | |||
| 1414 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); | 1410 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); |
| 1415 | } | 1411 | } |
| 1416 | 1412 | ||
| 1417 | std::string LogicalAll2(Operation operation) { | 1413 | std::string LogicalAnd2(Operation operation) { |
| 1418 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | 1414 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); |
| 1419 | } | 1415 | } |
| 1420 | 1416 | ||
| 1421 | std::string LogicalAny2(Operation operation) { | ||
| 1422 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | template <bool with_nan> | 1417 | template <bool with_nan> |
| 1426 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { | 1418 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { |
| 1427 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, | 1419 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, |
| @@ -1728,7 +1720,7 @@ private: | |||
| 1728 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; | 1720 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; |
| 1729 | } | 1721 | } |
| 1730 | 1722 | ||
| 1731 | static constexpr OperationDecompilersArray operation_decompilers = { | 1723 | static constexpr std::array operation_decompilers = { |
| 1732 | &GLSLDecompiler::Assign, | 1724 | &GLSLDecompiler::Assign, |
| 1733 | 1725 | ||
| 1734 | &GLSLDecompiler::Select, | 1726 | &GLSLDecompiler::Select, |
| @@ -1812,8 +1804,7 @@ private: | |||
| 1812 | &GLSLDecompiler::LogicalXor, | 1804 | &GLSLDecompiler::LogicalXor, |
| 1813 | &GLSLDecompiler::LogicalNegate, | 1805 | &GLSLDecompiler::LogicalNegate, |
| 1814 | &GLSLDecompiler::LogicalPick2, | 1806 | &GLSLDecompiler::LogicalPick2, |
| 1815 | &GLSLDecompiler::LogicalAll2, | 1807 | &GLSLDecompiler::LogicalAnd2, |
| 1816 | &GLSLDecompiler::LogicalAny2, | ||
| 1817 | 1808 | ||
| 1818 | &GLSLDecompiler::LogicalLessThan<Type::Float>, | 1809 | &GLSLDecompiler::LogicalLessThan<Type::Float>, |
| 1819 | &GLSLDecompiler::LogicalEqual<Type::Float>, | 1810 | &GLSLDecompiler::LogicalEqual<Type::Float>, |
| @@ -1877,6 +1868,7 @@ private: | |||
| 1877 | &GLSLDecompiler::WorkGroupId<1>, | 1868 | &GLSLDecompiler::WorkGroupId<1>, |
| 1878 | &GLSLDecompiler::WorkGroupId<2>, | 1869 | &GLSLDecompiler::WorkGroupId<2>, |
| 1879 | }; | 1870 | }; |
| 1871 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1880 | 1872 | ||
| 1881 | std::string GetRegister(u32 index) const { | 1873 | std::string GetRegister(u32 index) const { |
| 1882 | return GetDeclarationWithSuffix(index, "gpr"); | 1874 | return GetDeclarationWithSuffix(index, "gpr"); |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 0eae98afe..f4777d0b0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -165,6 +165,25 @@ OpenGLState::OpenGLState() { | |||
| 165 | alpha_test.ref = 0.0f; | 165 | alpha_test.ref = 0.0f; |
| 166 | } | 166 | } |
| 167 | 167 | ||
| 168 | void OpenGLState::SetDefaultViewports() { | ||
| 169 | for (auto& item : viewports) { | ||
| 170 | item.x = 0; | ||
| 171 | item.y = 0; | ||
| 172 | item.width = 0; | ||
| 173 | item.height = 0; | ||
| 174 | item.depth_range_near = 0.0f; | ||
| 175 | item.depth_range_far = 1.0f; | ||
| 176 | item.scissor.enabled = false; | ||
| 177 | item.scissor.x = 0; | ||
| 178 | item.scissor.y = 0; | ||
| 179 | item.scissor.width = 0; | ||
| 180 | item.scissor.height = 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | depth_clamp.far_plane = false; | ||
| 184 | depth_clamp.near_plane = false; | ||
| 185 | } | ||
| 186 | |||
| 168 | void OpenGLState::ApplyDefaultState() { | 187 | void OpenGLState::ApplyDefaultState() { |
| 169 | glEnable(GL_BLEND); | 188 | glEnable(GL_BLEND); |
| 170 | glDisable(GL_FRAMEBUFFER_SRGB); | 189 | glDisable(GL_FRAMEBUFFER_SRGB); |
| @@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const { | |||
| 526 | } | 545 | } |
| 527 | } | 546 | } |
| 528 | 547 | ||
| 529 | void OpenGLState::Apply() const { | 548 | void OpenGLState::Apply() { |
| 530 | MICROPROFILE_SCOPE(OpenGL_State); | 549 | MICROPROFILE_SCOPE(OpenGL_State); |
| 531 | ApplyFramebufferState(); | 550 | ApplyFramebufferState(); |
| 532 | ApplyVertexArrayState(); | 551 | ApplyVertexArrayState(); |
| @@ -536,19 +555,31 @@ void OpenGLState::Apply() const { | |||
| 536 | ApplyPointSize(); | 555 | ApplyPointSize(); |
| 537 | ApplyFragmentColorClamp(); | 556 | ApplyFragmentColorClamp(); |
| 538 | ApplyMultisample(); | 557 | ApplyMultisample(); |
| 558 | if (dirty.color_mask) { | ||
| 559 | ApplyColorMask(); | ||
| 560 | dirty.color_mask = false; | ||
| 561 | } | ||
| 539 | ApplyDepthClamp(); | 562 | ApplyDepthClamp(); |
| 540 | ApplyColorMask(); | ||
| 541 | ApplyViewport(); | 563 | ApplyViewport(); |
| 542 | ApplyStencilTest(); | 564 | if (dirty.stencil_state) { |
| 565 | ApplyStencilTest(); | ||
| 566 | dirty.stencil_state = false; | ||
| 567 | } | ||
| 543 | ApplySRgb(); | 568 | ApplySRgb(); |
| 544 | ApplyCulling(); | 569 | ApplyCulling(); |
| 545 | ApplyDepth(); | 570 | ApplyDepth(); |
| 546 | ApplyPrimitiveRestart(); | 571 | ApplyPrimitiveRestart(); |
| 547 | ApplyBlending(); | 572 | if (dirty.blend_state) { |
| 573 | ApplyBlending(); | ||
| 574 | dirty.blend_state = false; | ||
| 575 | } | ||
| 548 | ApplyLogicOp(); | 576 | ApplyLogicOp(); |
| 549 | ApplyTextures(); | 577 | ApplyTextures(); |
| 550 | ApplySamplers(); | 578 | ApplySamplers(); |
| 551 | ApplyPolygonOffset(); | 579 | if (dirty.polygon_offset) { |
| 580 | ApplyPolygonOffset(); | ||
| 581 | dirty.polygon_offset = false; | ||
| 582 | } | ||
| 552 | ApplyAlphaTest(); | 583 | ApplyAlphaTest(); |
| 553 | } | 584 | } |
| 554 | 585 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b0140495d..fdf9a8a12 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -195,8 +195,9 @@ public: | |||
| 195 | s_rgb_used = false; | 195 | s_rgb_used = false; |
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | void SetDefaultViewports(); | ||
| 198 | /// Apply this state as the current OpenGL state | 199 | /// Apply this state as the current OpenGL state |
| 199 | void Apply() const; | 200 | void Apply(); |
| 200 | 201 | ||
| 201 | void ApplyFramebufferState() const; | 202 | void ApplyFramebufferState() const; |
| 202 | void ApplyVertexArrayState() const; | 203 | void ApplyVertexArrayState() const; |
| @@ -237,11 +238,41 @@ public: | |||
| 237 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | 238 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test |
| 238 | void EmulateViewportWithScissor(); | 239 | void EmulateViewportWithScissor(); |
| 239 | 240 | ||
| 241 | void MarkDirtyBlendState() { | ||
| 242 | dirty.blend_state = true; | ||
| 243 | } | ||
| 244 | |||
| 245 | void MarkDirtyStencilState() { | ||
| 246 | dirty.stencil_state = true; | ||
| 247 | } | ||
| 248 | |||
| 249 | void MarkDirtyPolygonOffset() { | ||
| 250 | dirty.polygon_offset = true; | ||
| 251 | } | ||
| 252 | |||
| 253 | void MarkDirtyColorMask() { | ||
| 254 | dirty.color_mask = true; | ||
| 255 | } | ||
| 256 | |||
| 257 | void AllDirty() { | ||
| 258 | dirty.blend_state = true; | ||
| 259 | dirty.stencil_state = true; | ||
| 260 | dirty.polygon_offset = true; | ||
| 261 | dirty.color_mask = true; | ||
| 262 | } | ||
| 263 | |||
| 240 | private: | 264 | private: |
| 241 | static OpenGLState cur_state; | 265 | static OpenGLState cur_state; |
| 242 | 266 | ||
| 243 | // Workaround for sRGB problems caused by QT not supporting srgb output | 267 | // Workaround for sRGB problems caused by QT not supporting srgb output |
| 244 | static bool s_rgb_used; | 268 | static bool s_rgb_used; |
| 269 | struct { | ||
| 270 | bool blend_state; | ||
| 271 | bool stencil_state; | ||
| 272 | bool viewport_state; | ||
| 273 | bool polygon_offset; | ||
| 274 | bool color_mask; | ||
| 275 | } dirty{}; | ||
| 245 | }; | 276 | }; |
| 246 | 277 | ||
| 247 | } // namespace OpenGL | 278 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b1f6bc7c2..8fcd39a69 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 485 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 485 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 486 | 486 | ||
| 487 | OpenGLState prev_state{OpenGLState::GetCurState()}; | 487 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 488 | SCOPE_EXIT({ prev_state.Apply(); }); | 488 | SCOPE_EXIT({ |
| 489 | prev_state.AllDirty(); | ||
| 490 | prev_state.Apply(); | ||
| 491 | }); | ||
| 489 | 492 | ||
| 490 | OpenGLState state; | 493 | OpenGLState state; |
| 491 | state.draw.read_framebuffer = src_framebuffer.handle; | 494 | state.draw.read_framebuffer = src_framebuffer.handle; |
| 492 | state.draw.draw_framebuffer = dst_framebuffer.handle; | 495 | state.draw.draw_framebuffer = dst_framebuffer.handle; |
| 496 | state.AllDirty(); | ||
| 493 | state.Apply(); | 497 | state.Apply(); |
| 494 | 498 | ||
| 495 | u32 buffers{}; | 499 | u32 buffers{}; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 9ecdddb0d..a05cef3b9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 108 | 108 | ||
| 109 | // Maintain the rasterizer's state as a priority | 109 | // Maintain the rasterizer's state as a priority |
| 110 | OpenGLState prev_state = OpenGLState::GetCurState(); | 110 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 111 | state.AllDirty(); | ||
| 111 | state.Apply(); | 112 | state.Apply(); |
| 112 | 113 | ||
| 113 | if (framebuffer) { | 114 | if (framebuffer) { |
| @@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 140 | system.GetPerfStats().BeginSystemFrame(); | 141 | system.GetPerfStats().BeginSystemFrame(); |
| 141 | 142 | ||
| 142 | // Restore the rasterizer state | 143 | // Restore the rasterizer state |
| 144 | prev_state.AllDirty(); | ||
| 143 | prev_state.Apply(); | 145 | prev_state.Apply(); |
| 144 | } | 146 | } |
| 145 | 147 | ||
| @@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 206 | // Link shaders and get variable locations | 208 | // Link shaders and get variable locations |
| 207 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | 209 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); |
| 208 | state.draw.shader_program = shader.handle; | 210 | state.draw.shader_program = shader.handle; |
| 211 | state.AllDirty(); | ||
| 209 | state.Apply(); | 212 | state.Apply(); |
| 210 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 213 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |
| 211 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); | 214 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); |
| @@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 338 | // Workaround brigthness problems in SMO by enabling sRGB in the final output | 341 | // Workaround brigthness problems in SMO by enabling sRGB in the final output |
| 339 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 | 342 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 |
| 340 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); | 343 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); |
| 344 | state.AllDirty(); | ||
| 341 | state.Apply(); | 345 | state.Apply(); |
| 342 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); | 346 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); |
| 343 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 347 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 344 | // Restore default state | 348 | // Restore default state |
| 345 | state.framebuffer_srgb.enabled = false; | 349 | state.framebuffer_srgb.enabled = false; |
| 346 | state.texture_units[0].texture = 0; | 350 | state.texture_units[0].texture = 0; |
| 351 | state.AllDirty(); | ||
| 347 | state.Apply(); | 352 | state.Apply(); |
| 348 | // Clear sRGB state for the next frame | 353 | // Clear sRGB state for the next frame |
| 349 | OpenGLState::ClearsRGBUsed(); | 354 | OpenGLState::ClearsRGBUsed(); |
| @@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 388 | GLuint old_read_fb = state.draw.read_framebuffer; | 393 | GLuint old_read_fb = state.draw.read_framebuffer; |
| 389 | GLuint old_draw_fb = state.draw.draw_framebuffer; | 394 | GLuint old_draw_fb = state.draw.draw_framebuffer; |
| 390 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; | 395 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; |
| 396 | state.AllDirty(); | ||
| 391 | state.Apply(); | 397 | state.Apply(); |
| 392 | 398 | ||
| 393 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 399 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| @@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 407 | screenshot_framebuffer.Release(); | 413 | screenshot_framebuffer.Release(); |
| 408 | state.draw.read_framebuffer = old_read_fb; | 414 | state.draw.read_framebuffer = old_read_fb; |
| 409 | state.draw.draw_framebuffer = old_draw_fb; | 415 | state.draw.draw_framebuffer = old_draw_fb; |
| 416 | state.AllDirty(); | ||
| 410 | state.Apply(); | 417 | state.Apply(); |
| 411 | glDeleteRenderbuffers(1, &renderbuffer); | 418 | glDeleteRenderbuffers(1, &renderbuffer); |
| 412 | 419 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 9b2d8e987..d267712c9 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -205,10 +205,6 @@ public: | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | private: | 207 | private: |
| 208 | using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation); | ||
| 209 | using OperationDecompilersArray = | ||
| 210 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 211 | |||
| 212 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | 208 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); |
| 213 | 209 | ||
| 214 | void AllocateBindings() { | 210 | void AllocateBindings() { |
| @@ -804,12 +800,7 @@ private: | |||
| 804 | return {}; | 800 | return {}; |
| 805 | } | 801 | } |
| 806 | 802 | ||
| 807 | Id LogicalAll2(Operation operation) { | 803 | Id LogicalAnd2(Operation operation) { |
| 808 | UNIMPLEMENTED(); | ||
| 809 | return {}; | ||
| 810 | } | ||
| 811 | |||
| 812 | Id LogicalAny2(Operation operation) { | ||
| 813 | UNIMPLEMENTED(); | 804 | UNIMPLEMENTED(); |
| 814 | return {}; | 805 | return {}; |
| 815 | } | 806 | } |
| @@ -1206,7 +1197,7 @@ private: | |||
| 1206 | return {}; | 1197 | return {}; |
| 1207 | } | 1198 | } |
| 1208 | 1199 | ||
| 1209 | static constexpr OperationDecompilersArray operation_decompilers = { | 1200 | static constexpr std::array operation_decompilers = { |
| 1210 | &SPIRVDecompiler::Assign, | 1201 | &SPIRVDecompiler::Assign, |
| 1211 | 1202 | ||
| 1212 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, | 1203 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, |
| @@ -1291,8 +1282,7 @@ private: | |||
| 1291 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, | 1282 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, |
| 1292 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, | 1283 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, |
| 1293 | &SPIRVDecompiler::LogicalPick2, | 1284 | &SPIRVDecompiler::LogicalPick2, |
| 1294 | &SPIRVDecompiler::LogicalAll2, | 1285 | &SPIRVDecompiler::LogicalAnd2, |
| 1295 | &SPIRVDecompiler::LogicalAny2, | ||
| 1296 | 1286 | ||
| 1297 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, | 1287 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, |
| 1298 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, | 1288 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, |
| @@ -1357,6 +1347,7 @@ private: | |||
| 1357 | &SPIRVDecompiler::WorkGroupId<1>, | 1347 | &SPIRVDecompiler::WorkGroupId<1>, |
| 1358 | &SPIRVDecompiler::WorkGroupId<2>, | 1348 | &SPIRVDecompiler::WorkGroupId<2>, |
| 1359 | }; | 1349 | }; |
| 1350 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1360 | 1351 | ||
| 1361 | const VKDevice& device; | 1352 | const VKDevice& device; |
| 1362 | const ShaderIR& ir; | 1353 | const ShaderIR& ir; |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 29c8895c5..afffd157f 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -46,12 +46,12 @@ void ShaderIR::Decode() { | |||
| 46 | coverage_end = shader_info.end; | 46 | coverage_end = shader_info.end; |
| 47 | if (shader_info.decompilable) { | 47 | if (shader_info.decompilable) { |
| 48 | disable_flow_stack = true; | 48 | disable_flow_stack = true; |
| 49 | const auto insert_block = ([this](NodeBlock& nodes, u32 label) { | 49 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { |
| 50 | if (label == exit_branch) { | 50 | if (label == exit_branch) { |
| 51 | return; | 51 | return; |
| 52 | } | 52 | } |
| 53 | basic_blocks.insert({label, nodes}); | 53 | basic_blocks.insert({label, nodes}); |
| 54 | }); | 54 | }; |
| 55 | const auto& blocks = shader_info.blocks; | 55 | const auto& blocks = shader_info.blocks; |
| 56 | NodeBlock current_block; | 56 | NodeBlock current_block; |
| 57 | u32 current_label = exit_branch; | 57 | u32 current_label = exit_branch; |
| @@ -103,7 +103,7 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | |||
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
| 106 | const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { | 106 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { |
| 107 | Node result = n; | 107 | Node result = n; |
| 108 | if (cond.cc != ConditionCode::T) { | 108 | if (cond.cc != ConditionCode::T) { |
| 109 | result = Conditional(GetConditionCode(cond.cc), {result}); | 109 | result = Conditional(GetConditionCode(cond.cc), {result}); |
| @@ -117,7 +117,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | |||
| 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); | 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); |
| 118 | } | 118 | } |
| 119 | return result; | 119 | return result; |
| 120 | }); | 120 | }; |
| 121 | if (block.branch.address < 0) { | 121 | if (block.branch.address < 0) { |
| 122 | if (block.branch.kills) { | 122 | if (block.branch.kills) { |
| 123 | Node n = Operation(OperationCode::Discard); | 123 | Node n = Operation(OperationCode::Discard); |
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8..ad180d6df 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |
| 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
| 25 | 25 | ||
| 26 | Node op_b = [&]() { | 26 | Tegra::Shader::PredCondition cond{}; |
| 27 | switch (opcode->get().GetId()) { | 27 | bool h_and{}; |
| 28 | case OpCode::Id::HSETP2_R: | 28 | Node op_b{}; |
| 29 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | 29 | switch (opcode->get().GetId()) { |
| 30 | instr.hsetp2.negate_b); | 30 | case OpCode::Id::HSETP2_C: |
| 31 | default: | 31 | cond = instr.hsetp2.cbuf_and_imm.cond; |
| 32 | UNREACHABLE(); | 32 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 33 | return Immediate(0); | 33 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
| 34 | } | 34 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); |
| 35 | }(); | 35 | break; |
| 36 | op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); | 36 | case OpCode::Id::HSETP2_IMM: |
| 37 | 37 | cond = instr.hsetp2.cbuf_and_imm.cond; | |
| 38 | // We can't use the constant predicate as destination. | 38 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 39 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 39 | op_b = UnpackHalfImmediate(instr, true); |
| 40 | 40 | break; | |
| 41 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | 41 | case OpCode::Id::HSETP2_R: |
| 42 | cond = instr.hsetp2.reg.cond; | ||
| 43 | h_and = instr.hsetp2.reg.h_and; | ||
| 44 | op_b = | ||
| 45 | UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, | ||
| 46 | instr.hsetp2.reg.negate_b), | ||
| 47 | instr.hsetp2.reg.type_b); | ||
| 48 | break; | ||
| 49 | default: | ||
| 50 | UNREACHABLE(); | ||
| 51 | op_b = Immediate(0); | ||
| 52 | } | ||
| 42 | 53 | ||
| 43 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | 54 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |
| 44 | const OperationCode pair_combiner = | 55 | const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); |
| 45 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 46 | |||
| 47 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); | ||
| 48 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 49 | 56 | ||
| 50 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 57 | const auto Write = [&](u64 dest, Node src) { |
| 51 | const Node value = Operation(combiner, first_pred, second_pred); | 58 | SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39)); |
| 52 | SetPredicate(bb, instr.hsetp2.pred3, value); | 59 | }; |
| 53 | 60 | ||
| 54 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 61 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); |
| 55 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | 62 | const u64 first = instr.hsetp2.pred0; |
| 56 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | 63 | const u64 second = instr.hsetp2.pred3; |
| 57 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | 64 | if (h_and) { |
| 65 | const Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 66 | Write(first, joined); | ||
| 67 | Write(second, Operation(OperationCode::LogicalNegate, joined)); | ||
| 68 | } else { | ||
| 69 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); | ||
| 70 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); | ||
| 58 | } | 71 | } |
| 59 | 72 | ||
| 60 | return pc; | 73 | return pc; |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 24f022cc0..77151a24b 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image | |||
| 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, | 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, |
| 96 | Tegra::Shader::ImageType type) { | 96 | Tegra::Shader::ImageType type) { |
| 97 | const Node image_register{GetRegister(reg)}; | 97 | const Node image_register{GetRegister(reg)}; |
| 98 | const Node base_image{ | 98 | const auto [base_image, cbuf_index, cbuf_offset]{ |
| 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; |
| 100 | const auto cbuf{std::get_if<CbufNode>(&*base_image)}; | ||
| 101 | const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())}; | ||
| 102 | const auto cbuf_offset{cbuf_offset_imm->GetValue()}; | ||
| 103 | const auto cbuf_index{cbuf->GetIndex()}; | ||
| 104 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | 100 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; |
| 105 | 101 | ||
| 106 | // If this image has already been used, return the existing mapping. | 102 | // If this image has already been used, return the existing mapping. |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 80fc0ccfc..ed108bea8 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 95 | const Node op_b = | 95 | const Node op_b = |
| 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); |
| 97 | 97 | ||
| 98 | SetTemporal(bb, 0, op_a); | 98 | SetTemporary(bb, 0, op_a); |
| 99 | SetTemporal(bb, 1, op_b); | 99 | SetTemporary(bb, 1, op_b); |
| 100 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 100 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); |
| 102 | break; | 102 | break; |
| 103 | } | 103 | } |
| 104 | default: | 104 | default: |
| @@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 136 | } | 136 | } |
| 137 | }(); | 137 | }(); |
| 138 | for (u32 i = 0; i < count; ++i) | 138 | for (u32 i = 0; i < count; ++i) |
| 139 | SetTemporal(bb, i, GetLmem(i * 4)); | 139 | SetTemporary(bb, i, GetLmem(i * 4)); |
| 140 | for (u32 i = 0; i < count; ++i) | 140 | for (u32 i = 0; i < count; ++i) |
| 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 142 | break; | 142 | break; |
| 143 | } | 143 | } |
| 144 | default: | 144 | default: |
| @@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 174 | 174 | ||
| 175 | SetTemporal(bb, i, gmem); | 175 | SetTemporary(bb, i, gmem); |
| 176 | } | 176 | } |
| 177 | for (u32 i = 0; i < count; ++i) { | 177 | for (u32 i = 0; i < count; ++i) { |
| 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 179 | } | 179 | } |
| 180 | break; | 180 | break; |
| 181 | } | 181 | } |
| @@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 253 | TrackAndGetGlobalMemory(bb, instr, true); | 253 | TrackAndGetGlobalMemory(bb, instr, true); |
| 254 | 254 | ||
| 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} |
| 256 | SetTemporal(bb, 0, real_address_base); | 256 | SetTemporary(bb, 0, real_address_base); |
| 257 | 257 | ||
| 258 | const u32 count = GetUniformTypeElementsCount(type); | 258 | const u32 count = GetUniformTypeElementsCount(type); |
| 259 | for (u32 i = 0; i < count; ++i) { | 259 | for (u32 i = 0; i < count; ++i) { |
| 260 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | 260 | SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); |
| 261 | } | 261 | } |
| 262 | for (u32 i = 0; i < count; ++i) { | 262 | for (u32 i = 0; i < count; ++i) { |
| 263 | const Node it_offset = Immediate(i * 4); | 263 | const Node it_offset = Immediate(i * 4); |
| @@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 267 | 267 | ||
| 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); |
| 269 | } | 269 | } |
| 270 | break; | 270 | break; |
| 271 | } | 271 | } |
| @@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB | |||
| 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 299 | 299 | ||
| 300 | const Node base_address{ | 300 | const auto [base_address, index, offset] = |
| 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; | 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); |
| 302 | const auto cbuf = std::get_if<CbufNode>(&*base_address); | 302 | ASSERT(base_address != nullptr); |
| 303 | ASSERT(cbuf != nullptr); | ||
| 304 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 305 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 306 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 307 | 303 | ||
| 308 | bb.push_back( | 304 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
| 309 | Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 310 | 305 | ||
| 311 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | 306 | const GlobalMemoryBase descriptor{index, offset}; |
| 312 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 307 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); |
| 313 | auto& usage = entry->second; | 308 | auto& usage = entry->second; |
| 314 | if (is_write) { | 309 | if (is_write) { |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 42e3de02f..c0f64d7a0 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -102,7 +102,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 102 | PRECISE, op_a, Immediate(3)); | 102 | PRECISE, op_a, Immediate(3)); |
| 103 | const Node operand = | 103 | const Node operand = |
| 104 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | 104 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); |
| 105 | branch = Operation(OperationCode::BranchIndirect, convert); | 105 | branch = Operation(OperationCode::BranchIndirect, operand); |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 108 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 323be3f14..0b934a069 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 181 | const Node value = | 181 | const Node value = |
| 182 | Operation(OperationCode::TextureQueryDimensions, meta, | 182 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |
| 184 | SetTemporal(bb, indexer++, value); | 184 | SetTemporary(bb, indexer++, value); |
| 185 | } | 185 | } |
| 186 | for (u32 i = 0; i < indexer; ++i) { | 186 | for (u32 i = 0; i < indexer; ++i) { |
| 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 188 | } | 188 | } |
| 189 | break; | 189 | break; |
| 190 | } | 190 | } |
| @@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 238 | auto params = coords; | 238 | auto params = coords; |
| 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 241 | SetTemporal(bb, indexer++, value); | 241 | SetTemporary(bb, indexer++, value); |
| 242 | } | 242 | } |
| 243 | for (u32 i = 0; i < indexer; ++i) { | 243 | for (u32 i = 0; i < indexer; ++i) { |
| 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 245 | } | 245 | } |
| 246 | break; | 246 | break; |
| 247 | } | 247 | } |
| @@ -308,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, |
| 309 | bool is_array, bool is_shadow) { | 309 | bool is_array, bool is_shadow) { |
| 310 | const Node sampler_register = GetRegister(reg); | 310 | const Node sampler_register = GetRegister(reg); |
| 311 | const Node base_sampler = | 311 | const auto [base_sampler, cbuf_index, cbuf_offset] = |
| 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 313 | const auto cbuf = std::get_if<CbufNode>(&*base_sampler); | 313 | ASSERT(base_sampler != nullptr); |
| 314 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 315 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 316 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 317 | const auto cbuf_index = cbuf->GetIndex(); | ||
| 318 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | 314 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |
| 319 | 315 | ||
| 320 | // If this sampler has already been used, return the existing mapping. | 316 | // If this sampler has already been used, return the existing mapping. |
| @@ -340,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const | |||
| 340 | // Skip disabled components | 336 | // Skip disabled components |
| 341 | continue; | 337 | continue; |
| 342 | } | 338 | } |
| 343 | SetTemporal(bb, dest_elem++, components[elem]); | 339 | SetTemporary(bb, dest_elem++, components[elem]); |
| 344 | } | 340 | } |
| 345 | // After writing values in temporals, move them to the real registers | 341 | // After writing values in temporals, move them to the real registers |
| 346 | for (u32 i = 0; i < dest_elem; ++i) { | 342 | for (u32 i = 0; i < dest_elem; ++i) { |
| 347 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 343 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 348 | } | 344 | } |
| 349 | } | 345 | } |
| 350 | 346 | ||
| @@ -357,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | |||
| 357 | for (u32 component = 0; component < 4; ++component) { | 353 | for (u32 component = 0; component < 4; ++component) { |
| 358 | if (!instr.texs.IsComponentEnabled(component)) | 354 | if (!instr.texs.IsComponentEnabled(component)) |
| 359 | continue; | 355 | continue; |
| 360 | SetTemporal(bb, dest_elem++, components[component]); | 356 | SetTemporary(bb, dest_elem++, components[component]); |
| 361 | } | 357 | } |
| 362 | 358 | ||
| 363 | for (u32 i = 0; i < dest_elem; ++i) { | 359 | for (u32 i = 0; i < dest_elem; ++i) { |
| 364 | if (i < 2) { | 360 | if (i < 2) { |
| 365 | // Write the first two swizzle components to gpr0 and gpr0+1 | 361 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 366 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | 362 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); |
| 367 | } else { | 363 | } else { |
| 368 | ASSERT(instr.texs.HasTwoDestinations()); | 364 | ASSERT(instr.texs.HasTwoDestinations()); |
| 369 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 365 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 370 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | 366 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); |
| 371 | } | 367 | } |
| 372 | } | 368 | } |
| 373 | } | 369 | } |
| @@ -395,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 395 | return; | 391 | return; |
| 396 | } | 392 | } |
| 397 | 393 | ||
| 398 | SetTemporal(bb, 0, first_value); | 394 | SetTemporary(bb, 0, first_value); |
| 399 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | 395 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |
| 400 | 396 | ||
| 401 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 397 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 402 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | 398 | SetRegister(bb, instr.gpr28, GetTemporary(1)); |
| 403 | } | 399 | } |
| 404 | 400 | ||
| 405 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 401 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 93dee77d1..206961909 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 73 | if (is_psl) { | 73 | if (is_psl) { |
| 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); |
| 75 | } | 75 | } |
| 76 | SetTemporal(bb, 0, product); | 76 | SetTemporary(bb, 0, product); |
| 77 | product = GetTemporal(0); | 77 | product = GetTemporary(0); |
| 78 | 78 | ||
| 79 | const Node original_c = op_c; | 79 | const Node original_c = op_c; |
| 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error |
| @@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 98 | } | 98 | } |
| 99 | }(); | 99 | }(); |
| 100 | 100 | ||
| 101 | SetTemporal(bb, 1, op_c); | 101 | SetTemporary(bb, 1, op_c); |
| 102 | op_c = GetTemporal(1); | 102 | op_c = GetTemporary(1); |
| 103 | 103 | ||
| 104 | // TODO(Rodrigo): Use an appropiate sign for this operation | 104 | // TODO(Rodrigo): Use an appropiate sign for this operation |
| 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); | 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); |
| 106 | SetTemporal(bb, 2, sum); | 106 | SetTemporary(bb, 2, sum); |
| 107 | sum = GetTemporal(2); | 107 | sum = GetTemporary(2); |
| 108 | if (is_merge) { | 108 | if (is_merge) { |
| 109 | const Node a = BitfieldExtract(sum, 0, 16); | 109 | const Node a = BitfieldExtract(sum, 0, 16); |
| 110 | const Node b = | 110 | const Node b = |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 7427ed896..715184d67 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -101,8 +101,7 @@ enum class OperationCode { | |||
| 101 | LogicalXor, /// (bool a, bool b) -> bool | 101 | LogicalXor, /// (bool a, bool b) -> bool |
| 102 | LogicalNegate, /// (bool a) -> bool | 102 | LogicalNegate, /// (bool a) -> bool |
| 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool | 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool |
| 104 | LogicalAll2, /// (bool2 a) -> bool | 104 | LogicalAnd2, /// (bool2 a) -> bool |
| 105 | LogicalAny2, /// (bool2 a) -> bool | ||
| 106 | 105 | ||
| 107 | LogicalFLessThan, /// (float a, float b) -> bool | 106 | LogicalFLessThan, /// (float a, float b) -> bool |
| 108 | LogicalFEqual, /// (float a, float b) -> bool | 107 | LogicalFEqual, /// (float a, float b) -> bool |
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 6fccbbba3..b3dcd291c 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 13 | 13 | ||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | 14 | Node Conditional(Node condition, std::vector<Node> code) { |
| 15 | return MakeNode<ConditionalNode>(condition, std::move(code)); | 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | Node Comment(std::string text) { | 18 | Node Comment(std::string text) { |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index caa409788..5e91fe129 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | |||
| 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); |
| 62 | entry->second.MarkAsUsedIndirect(); | 62 | entry->second.MarkAsUsedIndirect(); |
| 63 | 63 | ||
| 64 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | 64 | Node final_offset = [&] { |
| 65 | return MakeNode<CbufNode>(index, final_offset); | 65 | // Attempt to inline constant buffer without a variable offset. This is done to allow |
| 66 | // tracking LDC calls. | ||
| 67 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 68 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 69 | return Immediate(offset); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); | ||
| 73 | }(); | ||
| 74 | return MakeNode<CbufNode>(index, std::move(final_offset)); | ||
| 66 | } | 75 | } |
| 67 | 76 | ||
| 68 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | 77 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |
| @@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) { | |||
| 80 | 89 | ||
| 81 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | 90 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 82 | used_input_attributes.emplace(index); | 91 | used_input_attributes.emplace(index); |
| 83 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 92 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 84 | } | 93 | } |
| 85 | 94 | ||
| 86 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | 95 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { |
| @@ -113,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff | |||
| 113 | } | 122 | } |
| 114 | used_output_attributes.insert(index); | 123 | used_output_attributes.insert(index); |
| 115 | 124 | ||
| 116 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 125 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 117 | } | 126 | } |
| 118 | 127 | ||
| 119 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | 128 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { |
| @@ -125,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | |||
| 125 | } | 134 | } |
| 126 | 135 | ||
| 127 | Node ShaderIR::GetLocalMemory(Node address) { | 136 | Node ShaderIR::GetLocalMemory(Node address) { |
| 128 | return MakeNode<LmemNode>(address); | 137 | return MakeNode<LmemNode>(std::move(address)); |
| 129 | } | 138 | } |
| 130 | 139 | ||
| 131 | Node ShaderIR::GetTemporal(u32 id) { | 140 | Node ShaderIR::GetTemporary(u32 id) { |
| 132 | return GetRegister(Register::ZeroIndex + 1 + id); | 141 | return GetRegister(Register::ZeroIndex + 1 + id); |
| 133 | } | 142 | } |
| 134 | 143 | ||
| 135 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | 144 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
| 136 | if (absolute) { | 145 | if (absolute) { |
| 137 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | 146 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); |
| 138 | } | 147 | } |
| 139 | if (negate) { | 148 | if (negate) { |
| 140 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | 149 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); |
| 141 | } | 150 | } |
| 142 | return value; | 151 | return value; |
| 143 | } | 152 | } |
| @@ -146,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | |||
| 146 | if (!saturate) { | 155 | if (!saturate) { |
| 147 | return value; | 156 | return value; |
| 148 | } | 157 | } |
| 149 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 158 | |
| 150 | const Node positive_one = Immediate(1.0f); | 159 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 151 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | 160 | Node positive_one = Immediate(1.0f); |
| 161 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 162 | std::move(positive_one)); | ||
| 152 | } | 163 | } |
| 153 | 164 | ||
| 154 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | 165 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { |
| 155 | switch (size) { | 166 | switch (size) { |
| 156 | case Register::Size::Byte: | 167 | case Register::Size::Byte: |
| 157 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 168 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 158 | Immediate(24)); | 169 | std::move(value), Immediate(24)); |
| 159 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 170 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 160 | Immediate(24)); | 171 | std::move(value), Immediate(24)); |
| 161 | return value; | 172 | return value; |
| 162 | case Register::Size::Short: | 173 | case Register::Size::Short: |
| 163 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 174 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 164 | Immediate(16)); | 175 | std::move(value), Immediate(16)); |
| 165 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 176 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 166 | Immediate(16)); | 177 | std::move(value), Immediate(16)); |
| 167 | case Register::Size::Word: | 178 | case Register::Size::Word: |
| 168 | // Default - do nothing | 179 | // Default - do nothing |
| 169 | return value; | 180 | return value; |
| @@ -179,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b | |||
| 179 | return value; | 190 | return value; |
| 180 | } | 191 | } |
| 181 | if (absolute) { | 192 | if (absolute) { |
| 182 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | 193 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); |
| 183 | } | 194 | } |
| 184 | if (negate) { | 195 | if (negate) { |
| 185 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | 196 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); |
| 186 | } | 197 | } |
| 187 | return value; | 198 | return value; |
| 188 | } | 199 | } |
| 189 | 200 | ||
| 190 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | 201 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |
| 191 | const Node value = Immediate(instr.half_imm.PackImmediates()); | 202 | Node value = Immediate(instr.half_imm.PackImmediates()); |
| 192 | if (!has_negation) { | 203 | if (!has_negation) { |
| 193 | return value; | 204 | return value; |
| 194 | } | 205 | } |
| 195 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 196 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 197 | 206 | ||
| 198 | return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); | 207 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
| 208 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 209 | |||
| 210 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 211 | std::move(second_negate)); | ||
| 199 | } | 212 | } |
| 200 | 213 | ||
| 201 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | 214 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { |
| 202 | return Operation(OperationCode::HUnpack, type, value); | 215 | return Operation(OperationCode::HUnpack, type, std::move(value)); |
| 203 | } | 216 | } |
| 204 | 217 | ||
| 205 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | 218 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
| @@ -207,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 207 | case Tegra::Shader::HalfMerge::H0_H1: | 220 | case Tegra::Shader::HalfMerge::H0_H1: |
| 208 | return src; | 221 | return src; |
| 209 | case Tegra::Shader::HalfMerge::F32: | 222 | case Tegra::Shader::HalfMerge::F32: |
| 210 | return Operation(OperationCode::HMergeF32, src); | 223 | return Operation(OperationCode::HMergeF32, std::move(src)); |
| 211 | case Tegra::Shader::HalfMerge::Mrg_H0: | 224 | case Tegra::Shader::HalfMerge::Mrg_H0: |
| 212 | return Operation(OperationCode::HMergeH0, dest, src); | 225 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); |
| 213 | case Tegra::Shader::HalfMerge::Mrg_H1: | 226 | case Tegra::Shader::HalfMerge::Mrg_H1: |
| 214 | return Operation(OperationCode::HMergeH1, dest, src); | 227 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); |
| 215 | } | 228 | } |
| 216 | UNREACHABLE(); | 229 | UNREACHABLE(); |
| 217 | return src; | 230 | return src; |
| @@ -219,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 219 | 232 | ||
| 220 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | 233 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
| 221 | if (absolute) { | 234 | if (absolute) { |
| 222 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); | 235 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); |
| 223 | } | 236 | } |
| 224 | if (negate) { | 237 | if (negate) { |
| 225 | value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), | 238 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), |
| 226 | GetPredicate(true)); | 239 | GetPredicate(true)); |
| 227 | } | 240 | } |
| 228 | return value; | 241 | return value; |
| @@ -232,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | |||
| 232 | if (!saturate) { | 245 | if (!saturate) { |
| 233 | return value; | 246 | return value; |
| 234 | } | 247 | } |
| 235 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 248 | |
| 236 | const Node positive_one = Immediate(1.0f); | 249 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 237 | return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); | 250 | Node positive_one = Immediate(1.0f); |
| 251 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 252 | std::move(positive_one)); | ||
| 238 | } | 253 | } |
| 239 | 254 | ||
| 240 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 255 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| @@ -262,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 262 | condition == PredCondition::LessEqualWithNan || | 277 | condition == PredCondition::LessEqualWithNan || |
| 263 | condition == PredCondition::GreaterThanWithNan || | 278 | condition == PredCondition::GreaterThanWithNan || |
| 264 | condition == PredCondition::GreaterEqualWithNan) { | 279 | condition == PredCondition::GreaterEqualWithNan) { |
| 265 | |||
| 266 | predicate = Operation(OperationCode::LogicalOr, predicate, | 280 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| 267 | Operation(OperationCode::LogicalFIsNan, op_a)); | 281 | Operation(OperationCode::LogicalFIsNan, op_a)); |
| 268 | predicate = Operation(OperationCode::LogicalOr, predicate, | 282 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| @@ -291,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 291 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 292 | "Unknown predicate comparison operation"); | 306 | "Unknown predicate comparison operation"); |
| 293 | 307 | ||
| 294 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | 308 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), |
| 309 | std::move(op_b)); | ||
| 295 | 310 | ||
| 296 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | 311 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
| 297 | condition == PredCondition::NotEqualWithNan || | 312 | condition == PredCondition::NotEqualWithNan || |
| @@ -321,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition | |||
| 321 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 336 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 322 | "Unknown predicate comparison operation"); | 337 | "Unknown predicate comparison operation"); |
| 323 | 338 | ||
| 324 | const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | 339 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); |
| 325 | |||
| 326 | return predicate; | ||
| 327 | } | 340 | } |
| 328 | 341 | ||
| 329 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 342 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| @@ -349,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | |||
| 349 | } | 362 | } |
| 350 | 363 | ||
| 351 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | 364 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { |
| 352 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | 365 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); |
| 353 | } | 366 | } |
| 354 | 367 | ||
| 355 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | 368 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { |
| 356 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | 369 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); |
| 357 | } | 370 | } |
| 358 | 371 | ||
| 359 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | 372 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { |
| 360 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | 373 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); |
| 361 | } | 374 | } |
| 362 | 375 | ||
| 363 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | 376 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { |
| 364 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | 377 | bb.push_back( |
| 378 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 365 | } | 379 | } |
| 366 | 380 | ||
| 367 | void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { | 381 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { |
| 368 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | 382 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); |
| 369 | } | 383 | } |
| 370 | 384 | ||
| 371 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | 385 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { |
| 372 | if (!sets_cc) { | 386 | if (!sets_cc) { |
| 373 | return; | 387 | return; |
| 374 | } | 388 | } |
| 375 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | 389 | Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); |
| 376 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 390 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 377 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 391 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 378 | } | 392 | } |
| 379 | 393 | ||
| @@ -381,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_ | |||
| 381 | if (!sets_cc) { | 395 | if (!sets_cc) { |
| 382 | return; | 396 | return; |
| 383 | } | 397 | } |
| 384 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | 398 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); |
| 385 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 399 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 386 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 400 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 387 | } | 401 | } |
| 388 | 402 | ||
| 389 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | 403 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
| 390 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | 404 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), |
| 391 | Immediate(bits)); | 405 | Immediate(offset), Immediate(bits)); |
| 392 | } | 406 | } |
| 393 | 407 | ||
| 394 | } // namespace VideoCommon::Shader | 408 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 03c888def..59a083d90 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -5,13 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | ||
| 9 | #include <map> | 8 | #include <map> |
| 10 | #include <optional> | 9 | #include <optional> |
| 11 | #include <set> | 10 | #include <set> |
| 12 | #include <string> | ||
| 13 | #include <tuple> | 11 | #include <tuple> |
| 14 | #include <variant> | ||
| 15 | #include <vector> | 12 | #include <vector> |
| 16 | 13 | ||
| 17 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -210,8 +207,8 @@ private: | |||
| 210 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | 207 | Node GetInternalFlag(InternalFlag flag, bool negated = false); |
| 211 | /// Generates a node representing a local memory address | 208 | /// Generates a node representing a local memory address |
| 212 | Node GetLocalMemory(Node address); | 209 | Node GetLocalMemory(Node address); |
| 213 | /// Generates a temporal, internally it uses a post-RZ register | 210 | /// Generates a temporary, internally it uses a post-RZ register |
| 214 | Node GetTemporal(u32 id); | 211 | Node GetTemporary(u32 id); |
| 215 | 212 | ||
| 216 | /// Sets a register. src value must be a number-evaluated node. | 213 | /// Sets a register. src value must be a number-evaluated node. |
| 217 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | 214 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); |
| @@ -221,8 +218,8 @@ private: | |||
| 221 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | 218 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); |
| 222 | /// Sets a local memory address. address and value must be a number-evaluated node | 219 | /// Sets a local memory address. address and value must be a number-evaluated node |
| 223 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | 220 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); |
| 224 | /// Sets a temporal. Internally it uses a post-RZ register | 221 | /// Sets a temporary. Internally it uses a post-RZ register |
| 225 | void SetTemporal(NodeBlock& bb, u32 id, Node value); | 222 | void SetTemporary(NodeBlock& bb, u32 id, Node value); |
| 226 | 223 | ||
| 227 | /// Sets internal flags from a float | 224 | /// Sets internal flags from a float |
| 228 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | 225 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); |
| @@ -328,7 +325,7 @@ private: | |||
| 328 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | 325 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
| 329 | Node op_c, Node imm_lut, bool sets_cc); | 326 | Node op_c, Node imm_lut, bool sets_cc); |
| 330 | 327 | ||
| 331 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 328 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 332 | 329 | ||
| 333 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 330 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 334 | 331 | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index fc957d980..a53e02253 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -15,56 +15,63 @@ namespace { | |||
| 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 16 | OperationCode operation_code) { | 16 | OperationCode operation_code) { |
| 17 | for (; cursor >= 0; --cursor) { | 17 | for (; cursor >= 0; --cursor) { |
| 18 | const Node node = code.at(cursor); | 18 | Node node = code.at(cursor); |
| 19 | |||
| 19 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 20 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 20 | if (operation->GetCode() == operation_code) { | 21 | if (operation->GetCode() == operation_code) { |
| 21 | return {node, cursor}; | 22 | return {std::move(node), cursor}; |
| 22 | } | 23 | } |
| 23 | } | 24 | } |
| 25 | |||
| 24 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 26 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 25 | const auto& conditional_code = conditional->GetCode(); | 27 | const auto& conditional_code = conditional->GetCode(); |
| 26 | const auto [found, internal_cursor] = FindOperation( | 28 | auto [found, internal_cursor] = FindOperation( |
| 27 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | 29 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); |
| 28 | if (found) { | 30 | if (found) { |
| 29 | return {found, cursor}; | 31 | return {std::move(found), cursor}; |
| 30 | } | 32 | } |
| 31 | } | 33 | } |
| 32 | } | 34 | } |
| 33 | return {}; | 35 | return {}; |
| 34 | } | 36 | } |
| 35 | } // namespace | 37 | } // Anonymous namespace |
| 36 | 38 | ||
| 37 | Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { | 39 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 40 | s64 cursor) const { | ||
| 38 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 41 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
| 39 | // Cbuf found, but it has to be immediate | 42 | // Constant buffer found, test if it's an immediate |
| 40 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | 43 | const auto offset = cbuf->GetOffset(); |
| 44 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 45 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 46 | } | ||
| 47 | return {}; | ||
| 41 | } | 48 | } |
| 42 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | 49 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { |
| 43 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | 50 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |
| 44 | return nullptr; | 51 | return {}; |
| 45 | } | 52 | } |
| 46 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | 53 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same |
| 47 | // register that it uses as operand | 54 | // register that it uses as operand |
| 48 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | 55 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); |
| 49 | if (!source) { | 56 | if (!source) { |
| 50 | return nullptr; | 57 | return {}; |
| 51 | } | 58 | } |
| 52 | return TrackCbuf(source, code, new_cursor); | 59 | return TrackCbuf(source, code, new_cursor); |
| 53 | } | 60 | } |
| 54 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 61 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 55 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | 62 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { |
| 56 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | 63 | if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) { |
| 57 | // Cbuf found in operand | 64 | // Cbuf found in operand. |
| 58 | return found; | 65 | return found; |
| 59 | } | 66 | } |
| 60 | } | 67 | } |
| 61 | return nullptr; | 68 | return {}; |
| 62 | } | 69 | } |
| 63 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | 70 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { |
| 64 | const auto& conditional_code = conditional->GetCode(); | 71 | const auto& conditional_code = conditional->GetCode(); |
| 65 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | 72 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); |
| 66 | } | 73 | } |
| 67 | return nullptr; | 74 | return {}; |
| 68 | } | 75 | } |
| 69 | 76 | ||
| 70 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | 77 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7f9623c62..a3a3770a7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -116,10 +116,10 @@ public: | |||
| 116 | std::lock_guard lock{mutex}; | 116 | std::lock_guard lock{mutex}; |
| 117 | auto& maxwell3d = system.GPU().Maxwell3D(); | 117 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 118 | 118 | ||
| 119 | if (!maxwell3d.dirty_flags.zeta_buffer) { | 119 | if (!maxwell3d.dirty.depth_buffer) { |
| 120 | return depth_buffer.view; | 120 | return depth_buffer.view; |
| 121 | } | 121 | } |
| 122 | maxwell3d.dirty_flags.zeta_buffer = false; | 122 | maxwell3d.dirty.depth_buffer = false; |
| 123 | 123 | ||
| 124 | const auto& regs{maxwell3d.regs}; | 124 | const auto& regs{maxwell3d.regs}; |
| 125 | const auto gpu_addr{regs.zeta.Address()}; | 125 | const auto gpu_addr{regs.zeta.Address()}; |
| @@ -145,10 +145,10 @@ public: | |||
| 145 | std::lock_guard lock{mutex}; | 145 | std::lock_guard lock{mutex}; |
| 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 147 | auto& maxwell3d = system.GPU().Maxwell3D(); | 147 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 148 | if (!maxwell3d.dirty_flags.color_buffer[index]) { | 148 | if (!maxwell3d.dirty.render_target[index]) { |
| 149 | return render_targets[index].view; | 149 | return render_targets[index].view; |
| 150 | } | 150 | } |
| 151 | maxwell3d.dirty_flags.color_buffer.reset(index); | 151 | maxwell3d.dirty.render_target[index] = false; |
| 152 | 152 | ||
| 153 | const auto& regs{maxwell3d.regs}; | 153 | const auto& regs{maxwell3d.regs}; |
| 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
| @@ -274,10 +274,11 @@ protected: | |||
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | 274 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 275 | const u32 index = surface->GetRenderTarget(); | 275 | const u32 index = surface->GetRenderTarget(); |
| 276 | if (index == DEPTH_RT) { | 276 | if (index == DEPTH_RT) { |
| 277 | maxwell3d.dirty_flags.zeta_buffer = true; | 277 | maxwell3d.dirty.depth_buffer = true; |
| 278 | } else { | 278 | } else { |
| 279 | maxwell3d.dirty_flags.color_buffer.set(index, true); | 279 | maxwell3d.dirty.render_target[index] = true; |
| 280 | } | 280 | } |
| 281 | maxwell3d.dirty.render_settings = true; | ||
| 281 | } | 282 | } |
| 282 | 283 | ||
| 283 | void Register(TSurface surface) { | 284 | void Register(TSurface surface) { |