diff options
Diffstat (limited to 'src')
40 files changed, 1188 insertions, 432 deletions
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index db3ab14ce..92169a97b 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -184,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | void Process::Run(s32 main_thread_priority, u64 stack_size) { | 186 | void Process::Run(s32 main_thread_priority, u64 stack_size) { |
| 187 | // The kernel always ensures that the given stack size is page aligned. | 187 | AllocateMainThreadStack(stack_size); |
| 188 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | 188 | tls_region_address = CreateTLSRegion(); |
| 189 | |||
| 190 | // Allocate and map the main thread stack | ||
| 191 | // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part | ||
| 192 | // of the user address space. | ||
| 193 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 194 | vm_manager | ||
| 195 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 196 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 197 | .Unwrap(); | ||
| 198 | 189 | ||
| 199 | vm_manager.LogLayout(); | 190 | vm_manager.LogLayout(); |
| 191 | |||
| 200 | ChangeStatus(ProcessStatus::Running); | 192 | ChangeStatus(ProcessStatus::Running); |
| 201 | 193 | ||
| 202 | SetupMainThread(*this, kernel, main_thread_priority); | 194 | SetupMainThread(*this, kernel, main_thread_priority); |
| @@ -226,6 +218,9 @@ void Process::PrepareForTermination() { | |||
| 226 | stop_threads(system.Scheduler(2).GetThreadList()); | 218 | stop_threads(system.Scheduler(2).GetThreadList()); |
| 227 | stop_threads(system.Scheduler(3).GetThreadList()); | 219 | stop_threads(system.Scheduler(3).GetThreadList()); |
| 228 | 220 | ||
| 221 | FreeTLSRegion(tls_region_address); | ||
| 222 | tls_region_address = 0; | ||
| 223 | |||
| 229 | ChangeStatus(ProcessStatus::Exited); | 224 | ChangeStatus(ProcessStatus::Exited); |
| 230 | } | 225 | } |
| 231 | 226 | ||
| @@ -325,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) { | |||
| 325 | WakeupAllWaitingThreads(); | 320 | WakeupAllWaitingThreads(); |
| 326 | } | 321 | } |
| 327 | 322 | ||
| 323 | void Process::AllocateMainThreadStack(u64 stack_size) { | ||
| 324 | // The kernel always ensures that the given stack size is page aligned. | ||
| 325 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | ||
| 326 | |||
| 327 | // Allocate and map the main thread stack | ||
| 328 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | ||
| 329 | vm_manager | ||
| 330 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | ||
| 331 | 0, main_thread_stack_size, MemoryState::Stack) | ||
| 332 | .Unwrap(); | ||
| 333 | } | ||
| 334 | |||
| 328 | } // namespace Kernel | 335 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 3196014da..c2df451f3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -135,6 +135,11 @@ public: | |||
| 135 | return mutex; | 135 | return mutex; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /// Gets the address to the process' dedicated TLS region. | ||
| 139 | VAddr GetTLSRegionAddress() const { | ||
| 140 | return tls_region_address; | ||
| 141 | } | ||
| 142 | |||
| 138 | /// Gets the current status of the process | 143 | /// Gets the current status of the process |
| 139 | ProcessStatus GetStatus() const { | 144 | ProcessStatus GetStatus() const { |
| 140 | return status; | 145 | return status; |
| @@ -296,6 +301,9 @@ private: | |||
| 296 | /// a process signal. | 301 | /// a process signal. |
| 297 | void ChangeStatus(ProcessStatus new_status); | 302 | void ChangeStatus(ProcessStatus new_status); |
| 298 | 303 | ||
| 304 | /// Allocates the main thread stack for the process, given the stack size in bytes. | ||
| 305 | void AllocateMainThreadStack(u64 stack_size); | ||
| 306 | |||
| 299 | /// Memory manager for this process. | 307 | /// Memory manager for this process. |
| 300 | Kernel::VMManager vm_manager; | 308 | Kernel::VMManager vm_manager; |
| 301 | 309 | ||
| @@ -358,6 +366,9 @@ private: | |||
| 358 | /// variable related facilities. | 366 | /// variable related facilities. |
| 359 | Mutex mutex; | 367 | Mutex mutex; |
| 360 | 368 | ||
| 369 | /// Address indicating the location of the process' dedicated TLS region. | ||
| 370 | VAddr tls_region_address = 0; | ||
| 371 | |||
| 361 | /// Random values for svcGetInfo RandomEntropy | 372 | /// Random values for svcGetInfo RandomEntropy |
| 362 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; | 373 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; |
| 363 | 374 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 0687839ff..1fd1a732a 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -843,9 +843,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 843 | return RESULT_SUCCESS; | 843 | return RESULT_SUCCESS; |
| 844 | 844 | ||
| 845 | case GetInfoType::UserExceptionContextAddr: | 845 | case GetInfoType::UserExceptionContextAddr: |
| 846 | LOG_WARNING(Kernel_SVC, | 846 | *result = process->GetTLSRegionAddress(); |
| 847 | "(STUBBED) Attempted to query user exception context address, returned 0"); | ||
| 848 | *result = 0; | ||
| 849 | return RESULT_SUCCESS; | 847 | return RESULT_SUCCESS; |
| 850 | 848 | ||
| 851 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: | 849 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..bd036cbe8 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { | |||
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 24 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 25 | gpu.Maxwell3D().dirty.OnMemoryWrite(); |
| 26 | 26 | ||
| 27 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 28 | 28 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..08586d33c 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 37 | const bool is_last_call = method_call.IsLastCall(); | 37 | const bool is_last_call = method_call.IsLastCall(); |
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | 38 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 39 | if (is_last_call) { | 39 | if (is_last_call) { |
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 40 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 41 | } | 41 | } |
| 42 | break; | 42 | break; |
| 43 | } | 43 | } |
| @@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | 52 | void KeplerCompute::ProcessLaunch() { |
| 53 | |||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 53 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 54 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 55 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 57 | 56 | ||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | 57 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; |
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | 58 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); |
| 59 | |||
| 60 | rasterizer.DispatchCompute(code_addr); | ||
| 60 | } | 61 | } |
| 61 | 62 | ||
| 62 | } // namespace Tegra::Engines | 63 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..44279de00 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | 35 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 36 | if (is_last_call) { | 36 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 37 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 38 | } | 38 | } |
| 39 | break; | 39 | break; |
| 40 | } | 40 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..74c46ec04 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste | |||
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitDirtySettings(); | ||
| 25 | InitializeRegisterDefaults(); | 26 | InitializeRegisterDefaults(); |
| 26 | } | 27 | } |
| 27 | 28 | ||
| @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 69 | regs.stencil_back_func_mask = 0xFFFFFFFF; | 70 | regs.stencil_back_func_mask = 0xFFFFFFFF; |
| 70 | regs.stencil_back_mask = 0xFFFFFFFF; | 71 | regs.stencil_back_mask = 0xFFFFFFFF; |
| 71 | 72 | ||
| 73 | regs.depth_test_func = Regs::ComparisonOp::Always; | ||
| 74 | regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; | ||
| 75 | regs.cull.cull_face = Regs::Cull::CullFace::Back; | ||
| 76 | |||
| 72 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a | 77 | // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a |
| 73 | // register carrying a default value. Assume it's OpenGL's default (1). | 78 | // register carrying a default value. Assume it's OpenGL's default (1). |
| 74 | regs.point_size = 1.0f; | 79 | regs.point_size = 1.0f; |
| @@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 86 | regs.rt_separate_frag_data = 1; | 91 | regs.rt_separate_frag_data = 1; |
| 87 | } | 92 | } |
| 88 | 93 | ||
| 94 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | ||
| 95 | |||
| 96 | void Maxwell3D::InitDirtySettings() { | ||
| 97 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | ||
| 98 | const auto start_itr = dirty_pointers.begin() + start; | ||
| 99 | const auto end_itr = start_itr + range; | ||
| 100 | std::fill(start_itr, end_itr, position); | ||
| 101 | }; | ||
| 102 | dirty.regs.fill(true); | ||
| 103 | |||
| 104 | // Init Render Targets | ||
| 105 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | ||
| 106 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | ||
| 107 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | ||
| 108 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | ||
| 109 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | ||
| 110 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | ||
| 111 | rt_dirty_reg++; | ||
| 112 | } | ||
| 113 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | ||
| 114 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | ||
| 115 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; | ||
| 116 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; | ||
| 117 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | ||
| 118 | constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); | ||
| 119 | set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); | ||
| 120 | |||
| 121 | // Init Vertex Arrays | ||
| 122 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | ||
| 123 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | ||
| 124 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | ||
| 125 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 126 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 127 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | ||
| 128 | vertex_reg += vertex_array_size) { | ||
| 129 | set_block(vertex_reg, 3, va_reg); | ||
| 130 | // The divisor concerns vertex array instances | ||
| 131 | dirty_pointers[vertex_reg + 3] = vi_reg; | ||
| 132 | va_reg++; | ||
| 133 | vi_reg++; | ||
| 134 | } | ||
| 135 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | ||
| 136 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | ||
| 137 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | ||
| 138 | va_reg = DIRTY_REGS_POS(vertex_array); | ||
| 139 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | ||
| 140 | vertex_reg += vertex_limit_size) { | ||
| 141 | set_block(vertex_reg, vertex_limit_size, va_reg); | ||
| 142 | va_reg++; | ||
| 143 | } | ||
| 144 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | ||
| 145 | constexpr u32 vertex_instance_size = | ||
| 146 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | ||
| 147 | constexpr u32 vertex_instance_end = | ||
| 148 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | ||
| 149 | vi_reg = DIRTY_REGS_POS(vertex_instance); | ||
| 150 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | ||
| 151 | vertex_reg += vertex_instance_size) { | ||
| 152 | set_block(vertex_reg, vertex_instance_size, vi_reg); | ||
| 153 | vi_reg++; | ||
| 154 | } | ||
| 155 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | ||
| 156 | DIRTY_REGS_POS(vertex_attrib_format)); | ||
| 157 | |||
| 158 | // Init Shaders | ||
| 159 | constexpr u32 shader_registers_count = | ||
| 160 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 161 | set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, | ||
| 162 | DIRTY_REGS_POS(shaders)); | ||
| 163 | |||
| 164 | // State | ||
| 165 | |||
| 166 | // Viewport | ||
| 167 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | ||
| 168 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | ||
| 169 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | ||
| 170 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | ||
| 171 | constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); | ||
| 172 | constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); | ||
| 173 | set_block(view_volume_start, view_volume_size, viewport_dirty_reg); | ||
| 174 | |||
| 175 | // Viewport transformation | ||
| 176 | constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); | ||
| 177 | constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); | ||
| 178 | set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); | ||
| 179 | |||
| 180 | // Cullmode | ||
| 181 | constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); | ||
| 182 | constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); | ||
| 183 | set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); | ||
| 184 | |||
| 185 | // Screen y control | ||
| 186 | dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); | ||
| 187 | |||
| 188 | // Primitive Restart | ||
| 189 | constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); | ||
| 190 | constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); | ||
| 191 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | ||
| 192 | |||
| 193 | // Depth Test | ||
| 194 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | ||
| 195 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | ||
| 196 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | ||
| 197 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | ||
| 198 | |||
| 199 | // Stencil Test | ||
| 200 | constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); | ||
| 201 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; | ||
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; | ||
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; | ||
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; | ||
| 205 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; | ||
| 206 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; | ||
| 207 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; | ||
| 208 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; | ||
| 209 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; | ||
| 210 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; | ||
| 211 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; | ||
| 212 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; | ||
| 213 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; | ||
| 214 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; | ||
| 215 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; | ||
| 216 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | ||
| 217 | |||
| 218 | // Color Mask | ||
| 219 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | ||
| 220 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | ||
| 221 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | ||
| 222 | color_mask_dirty_reg); | ||
| 223 | // Blend State | ||
| 224 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | ||
| 225 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | ||
| 226 | blend_state_dirty_reg); | ||
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | ||
| 228 | set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); | ||
| 229 | set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), | ||
| 230 | blend_state_dirty_reg); | ||
| 231 | |||
| 232 | // Scissor State | ||
| 233 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | ||
| 234 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | ||
| 235 | scissor_test_dirty_reg); | ||
| 236 | |||
| 237 | // Polygon Offset | ||
| 238 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | ||
| 239 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | ||
| 240 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | ||
| 241 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | ||
| 242 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; | ||
| 243 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; | ||
| 244 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | ||
| 245 | } | ||
| 246 | |||
| 89 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 247 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 90 | // Reset the current macro. | 248 | // Reset the current macro. |
| 91 | executing_macro = 0; | 249 | executing_macro = 0; |
| @@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 108 | 266 | ||
| 109 | const u32 method = method_call.method; | 267 | const u32 method = method_call.method; |
| 110 | 268 | ||
| 269 | if (method == cb_data_state.current) { | ||
| 270 | regs.reg_array[method] = method_call.argument; | ||
| 271 | ProcessCBData(method_call.argument); | ||
| 272 | return; | ||
| 273 | } else if (cb_data_state.current != null_cb_data) { | ||
| 274 | FinishCBData(); | ||
| 275 | } | ||
| 276 | |||
| 111 | // It is an error to write to a register other than the current macro's ARG register before it | 277 | // It is an error to write to a register other than the current macro's ARG register before it |
| 112 | // has finished execution. | 278 | // has finished execution. |
| 113 | if (executing_macro != 0) { | 279 | if (executing_macro != 0) { |
| @@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 143 | 309 | ||
| 144 | if (regs.reg_array[method] != method_call.argument) { | 310 | if (regs.reg_array[method] != method_call.argument) { |
| 145 | regs.reg_array[method] = method_call.argument; | 311 | regs.reg_array[method] = method_call.argument; |
| 146 | // Color buffers | 312 | const std::size_t dirty_reg = dirty_pointers[method]; |
| 147 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 313 | if (dirty_reg) { |
| 148 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 314 | dirty.regs[dirty_reg] = true; |
| 149 | if (method >= first_rt_reg && | 315 | if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && |
| 150 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 316 | dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { |
| 151 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; | 317 | dirty.vertex_array_buffers = true; |
| 152 | dirty_flags.color_buffer.set(rt_index); | 318 | } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && |
| 153 | } | 319 | dirty_reg < DIRTY_REGS_POS(vertex_instances)) { |
| 154 | 320 | dirty.vertex_instances = true; | |
| 155 | // Zeta buffer | 321 | } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && |
| 156 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 322 | dirty_reg < DIRTY_REGS_POS(render_settings)) { |
| 157 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || | 323 | dirty.render_settings = true; |
| 158 | method == MAXWELL3D_REG_INDEX(zeta_width) || | 324 | } |
| 159 | method == MAXWELL3D_REG_INDEX(zeta_height) || | ||
| 160 | (method >= MAXWELL3D_REG_INDEX(zeta) && | ||
| 161 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | ||
| 162 | dirty_flags.zeta_buffer = true; | ||
| 163 | } | ||
| 164 | |||
| 165 | // Shader | ||
| 166 | constexpr u32 shader_registers_count = | ||
| 167 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | ||
| 168 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | ||
| 169 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | ||
| 170 | dirty_flags.shaders = true; | ||
| 171 | } | ||
| 172 | |||
| 173 | // Vertex format | ||
| 174 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | ||
| 175 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | ||
| 177 | } | ||
| 178 | |||
| 179 | // Vertex buffer | ||
| 180 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 181 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { | ||
| 182 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 183 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 184 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { | ||
| 185 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 186 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 187 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { | ||
| 188 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 189 | } | 325 | } |
| 190 | } | 326 | } |
| 191 | 327 | ||
| @@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 214 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | 350 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): |
| 215 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | 351 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): |
| 216 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | 352 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { |
| 217 | ProcessCBData(method_call.argument); | 353 | StartCBData(method); |
| 218 | break; | 354 | break; |
| 219 | } | 355 | } |
| 220 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 356 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { |
| @@ -249,6 +385,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 249 | ProcessQueryGet(); | 385 | ProcessQueryGet(); |
| 250 | break; | 386 | break; |
| 251 | } | 387 | } |
| 388 | case MAXWELL3D_REG_INDEX(condition.mode): { | ||
| 389 | ProcessQueryCondition(); | ||
| 390 | break; | ||
| 391 | } | ||
| 252 | case MAXWELL3D_REG_INDEX(sync_info): { | 392 | case MAXWELL3D_REG_INDEX(sync_info): { |
| 253 | ProcessSyncPoint(); | 393 | ProcessSyncPoint(); |
| 254 | break; | 394 | break; |
| @@ -261,7 +401,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 261 | const bool is_last_call = method_call.IsLastCall(); | 401 | const bool is_last_call = method_call.IsLastCall(); |
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | 402 | upload_state.ProcessData(method_call.argument, is_last_call); |
| 263 | if (is_last_call) { | 403 | if (is_last_call) { |
| 264 | dirty_flags.OnMemoryWrite(); | 404 | dirty.OnMemoryWrite(); |
| 265 | } | 405 | } |
| 266 | break; | 406 | break; |
| 267 | } | 407 | } |
| @@ -302,6 +442,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 302 | result = regs.query.query_sequence; | 442 | result = regs.query.query_sequence; |
| 303 | break; | 443 | break; |
| 304 | default: | 444 | default: |
| 445 | result = 1; | ||
| 305 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 446 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", |
| 306 | static_cast<u32>(regs.query.query_get.select.Value())); | 447 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 307 | } | 448 | } |
| @@ -333,7 +474,6 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 333 | query_result.timestamp = system.CoreTiming().GetTicks(); | 474 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 334 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 475 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 335 | } | 476 | } |
| 336 | dirty_flags.OnMemoryWrite(); | ||
| 337 | break; | 477 | break; |
| 338 | } | 478 | } |
| 339 | default: | 479 | default: |
| @@ -342,6 +482,45 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 342 | } | 482 | } |
| 343 | } | 483 | } |
| 344 | 484 | ||
| 485 | void Maxwell3D::ProcessQueryCondition() { | ||
| 486 | const GPUVAddr condition_address{regs.condition.Address()}; | ||
| 487 | switch (regs.condition.mode) { | ||
| 488 | case Regs::ConditionMode::Always: { | ||
| 489 | execute_on = true; | ||
| 490 | break; | ||
| 491 | } | ||
| 492 | case Regs::ConditionMode::Never: { | ||
| 493 | execute_on = false; | ||
| 494 | break; | ||
| 495 | } | ||
| 496 | case Regs::ConditionMode::ResNonZero: { | ||
| 497 | Regs::QueryCompare cmp; | ||
| 498 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 499 | execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; | ||
| 500 | break; | ||
| 501 | } | ||
| 502 | case Regs::ConditionMode::Equal: { | ||
| 503 | Regs::QueryCompare cmp; | ||
| 504 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 505 | execute_on = | ||
| 506 | cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; | ||
| 507 | break; | ||
| 508 | } | ||
| 509 | case Regs::ConditionMode::NotEqual: { | ||
| 510 | Regs::QueryCompare cmp; | ||
| 511 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | ||
| 512 | execute_on = | ||
| 513 | cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; | ||
| 514 | break; | ||
| 515 | } | ||
| 516 | default: { | ||
| 517 | UNIMPLEMENTED_MSG("Uninplemented Condition Mode!"); | ||
| 518 | execute_on = true; | ||
| 519 | break; | ||
| 520 | } | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 345 | void Maxwell3D::ProcessSyncPoint() { | 524 | void Maxwell3D::ProcessSyncPoint() { |
| 346 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 525 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 347 | const u32 increment = regs.sync_info.increment.Value(); | 526 | const u32 increment = regs.sync_info.increment.Value(); |
| @@ -405,23 +584,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 405 | } | 584 | } |
| 406 | 585 | ||
| 407 | void Maxwell3D::ProcessCBData(u32 value) { | 586 | void Maxwell3D::ProcessCBData(u32 value) { |
| 587 | const u32 id = cb_data_state.id; | ||
| 588 | cb_data_state.buffer[id][cb_data_state.counter] = value; | ||
| 589 | // Increment the current buffer position. | ||
| 590 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | ||
| 591 | cb_data_state.counter++; | ||
| 592 | } | ||
| 593 | |||
| 594 | void Maxwell3D::StartCBData(u32 method) { | ||
| 595 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 596 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 597 | cb_data_state.id = method - first_cb_data; | ||
| 598 | cb_data_state.current = method; | ||
| 599 | cb_data_state.counter = 0; | ||
| 600 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | ||
| 601 | } | ||
| 602 | |||
| 603 | void Maxwell3D::FinishCBData() { | ||
| 408 | // Write the input value to the current const buffer at the current position. | 604 | // Write the input value to the current const buffer at the current position. |
| 409 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 605 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
| 410 | ASSERT(buffer_address != 0); | 606 | ASSERT(buffer_address != 0); |
| 411 | 607 | ||
| 412 | // Don't allow writing past the end of the buffer. | 608 | // Don't allow writing past the end of the buffer. |
| 413 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 609 | ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); |
| 414 | |||
| 415 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; | ||
| 416 | 610 | ||
| 417 | u8* ptr{memory_manager.GetPointer(address)}; | 611 | const GPUVAddr address{buffer_address + cb_data_state.start_pos}; |
| 418 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 612 | const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; |
| 419 | memory_manager.Write<u32>(address, value); | ||
| 420 | 613 | ||
| 421 | dirty_flags.OnMemoryWrite(); | 614 | const u32 id = cb_data_state.id; |
| 615 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); | ||
| 616 | dirty.OnMemoryWrite(); | ||
| 422 | 617 | ||
| 423 | // Increment the current buffer position. | 618 | cb_data_state.id = null_cb_data; |
| 424 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 619 | cb_data_state.current = null_cb_data; |
| 425 | } | 620 | } |
| 426 | 621 | ||
| 427 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 622 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8d15c8a48..1ee982b76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -90,6 +90,20 @@ public: | |||
| 90 | 90 | ||
| 91 | enum class QuerySelect : u32 { | 91 | enum class QuerySelect : u32 { |
| 92 | Zero = 0, | 92 | Zero = 0, |
| 93 | TimeElapsed = 2, | ||
| 94 | TransformFeedbackPrimitivesGenerated = 11, | ||
| 95 | PrimitivesGenerated = 18, | ||
| 96 | SamplesPassed = 21, | ||
| 97 | TransformFeedbackUnknown = 26, | ||
| 98 | }; | ||
| 99 | |||
| 100 | struct QueryCompare { | ||
| 101 | u32 initial_sequence; | ||
| 102 | u32 initial_mode; | ||
| 103 | u32 unknown1; | ||
| 104 | u32 unknown2; | ||
| 105 | u32 current_sequence; | ||
| 106 | u32 current_mode; | ||
| 93 | }; | 107 | }; |
| 94 | 108 | ||
| 95 | enum class QuerySyncCondition : u32 { | 109 | enum class QuerySyncCondition : u32 { |
| @@ -97,6 +111,14 @@ public: | |||
| 97 | GreaterThan = 1, | 111 | GreaterThan = 1, |
| 98 | }; | 112 | }; |
| 99 | 113 | ||
| 114 | enum class ConditionMode : u32 { | ||
| 115 | Never = 0, | ||
| 116 | Always = 1, | ||
| 117 | ResNonZero = 2, | ||
| 118 | Equal = 3, | ||
| 119 | NotEqual = 4, | ||
| 120 | }; | ||
| 121 | |||
| 100 | enum class ShaderProgram : u32 { | 122 | enum class ShaderProgram : u32 { |
| 101 | VertexA = 0, | 123 | VertexA = 0, |
| 102 | VertexB = 1, | 124 | VertexB = 1, |
| @@ -815,7 +837,18 @@ public: | |||
| 815 | BitField<4, 1, u32> alpha_to_one; | 837 | BitField<4, 1, u32> alpha_to_one; |
| 816 | } multisample_control; | 838 | } multisample_control; |
| 817 | 839 | ||
| 818 | INSERT_PADDING_WORDS(0x7); | 840 | INSERT_PADDING_WORDS(0x4); |
| 841 | |||
| 842 | struct { | ||
| 843 | u32 address_high; | ||
| 844 | u32 address_low; | ||
| 845 | ConditionMode mode; | ||
| 846 | |||
| 847 | GPUVAddr Address() const { | ||
| 848 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 849 | address_low); | ||
| 850 | } | ||
| 851 | } condition; | ||
| 819 | 852 | ||
| 820 | struct { | 853 | struct { |
| 821 | u32 tsc_address_high; | 854 | u32 tsc_address_high; |
| @@ -1124,23 +1157,77 @@ public: | |||
| 1124 | 1157 | ||
| 1125 | State state{}; | 1158 | State state{}; |
| 1126 | 1159 | ||
| 1127 | struct DirtyFlags { | 1160 | struct DirtyRegs { |
| 1128 | std::bitset<8> color_buffer{0xFF}; | 1161 | static constexpr std::size_t NUM_REGS = 256; |
| 1129 | std::bitset<32> vertex_array{0xFFFFFFFF}; | 1162 | union { |
| 1163 | struct { | ||
| 1164 | bool null_dirty; | ||
| 1165 | |||
| 1166 | // Vertex Attributes | ||
| 1167 | bool vertex_attrib_format; | ||
| 1168 | |||
| 1169 | // Vertex Arrays | ||
| 1170 | std::array<bool, 32> vertex_array; | ||
| 1171 | |||
| 1172 | bool vertex_array_buffers; | ||
| 1173 | |||
| 1174 | // Vertex Instances | ||
| 1175 | std::array<bool, 32> vertex_instance; | ||
| 1130 | 1176 | ||
| 1131 | bool vertex_attrib_format = true; | 1177 | bool vertex_instances; |
| 1132 | bool zeta_buffer = true; | 1178 | |
| 1133 | bool shaders = true; | 1179 | // Render Targets |
| 1180 | std::array<bool, 8> render_target; | ||
| 1181 | bool depth_buffer; | ||
| 1182 | |||
| 1183 | bool render_settings; | ||
| 1184 | |||
| 1185 | // Shaders | ||
| 1186 | bool shaders; | ||
| 1187 | |||
| 1188 | // Rasterizer State | ||
| 1189 | bool viewport; | ||
| 1190 | bool clip_coefficient; | ||
| 1191 | bool cull_mode; | ||
| 1192 | bool primitive_restart; | ||
| 1193 | bool depth_test; | ||
| 1194 | bool stencil_test; | ||
| 1195 | bool blend_state; | ||
| 1196 | bool scissor_test; | ||
| 1197 | bool transform_feedback; | ||
| 1198 | bool color_mask; | ||
| 1199 | bool polygon_offset; | ||
| 1200 | |||
| 1201 | // Complementary | ||
| 1202 | bool viewport_transform; | ||
| 1203 | bool screen_y_control; | ||
| 1204 | |||
| 1205 | bool memory_general; | ||
| 1206 | }; | ||
| 1207 | std::array<bool, NUM_REGS> regs; | ||
| 1208 | }; | ||
| 1209 | |||
| 1210 | void ResetVertexArrays() { | ||
| 1211 | vertex_array.fill(true); | ||
| 1212 | vertex_array_buffers = true; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | void ResetRenderTargets() { | ||
| 1216 | depth_buffer = true; | ||
| 1217 | render_target.fill(true); | ||
| 1218 | render_settings = true; | ||
| 1219 | } | ||
| 1134 | 1220 | ||
| 1135 | void OnMemoryWrite() { | 1221 | void OnMemoryWrite() { |
| 1136 | zeta_buffer = true; | ||
| 1137 | shaders = true; | 1222 | shaders = true; |
| 1138 | color_buffer.set(); | 1223 | memory_general = true; |
| 1139 | vertex_array.set(); | 1224 | ResetRenderTargets(); |
| 1225 | ResetVertexArrays(); | ||
| 1140 | } | 1226 | } |
| 1141 | }; | ||
| 1142 | 1227 | ||
| 1143 | DirtyFlags dirty_flags; | 1228 | } dirty{}; |
| 1229 | |||
| 1230 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1144 | 1231 | ||
| 1145 | /// Reads a register value located at the input method address | 1232 | /// Reads a register value located at the input method address |
| 1146 | u32 GetRegisterValue(u32 method) const; | 1233 | u32 GetRegisterValue(u32 method) const; |
| @@ -1169,6 +1256,10 @@ public: | |||
| 1169 | return macro_memory; | 1256 | return macro_memory; |
| 1170 | } | 1257 | } |
| 1171 | 1258 | ||
| 1259 | bool ShouldExecute() const { | ||
| 1260 | return execute_on; | ||
| 1261 | } | ||
| 1262 | |||
| 1172 | private: | 1263 | private: |
| 1173 | void InitializeRegisterDefaults(); | 1264 | void InitializeRegisterDefaults(); |
| 1174 | 1265 | ||
| @@ -1192,14 +1283,27 @@ private: | |||
| 1192 | /// Interpreter for the macro codes uploaded to the GPU. | 1283 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1193 | MacroInterpreter macro_interpreter; | 1284 | MacroInterpreter macro_interpreter; |
| 1194 | 1285 | ||
| 1286 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||
| 1287 | struct { | ||
| 1288 | std::array<std::array<u32, 0x4000>, 16> buffer; | ||
| 1289 | u32 current{null_cb_data}; | ||
| 1290 | u32 id{null_cb_data}; | ||
| 1291 | u32 start_pos{}; | ||
| 1292 | u32 counter{}; | ||
| 1293 | } cb_data_state; | ||
| 1294 | |||
| 1195 | Upload::State upload_state; | 1295 | Upload::State upload_state; |
| 1196 | 1296 | ||
| 1297 | bool execute_on{true}; | ||
| 1298 | |||
| 1197 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1299 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1198 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1300 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1199 | 1301 | ||
| 1200 | /// Retrieves information about a specific TSC entry from the TSC buffer. | 1302 | /// Retrieves information about a specific TSC entry from the TSC buffer. |
| 1201 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; | 1303 | Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; |
| 1202 | 1304 | ||
| 1305 | void InitDirtySettings(); | ||
| 1306 | |||
| 1203 | /** | 1307 | /** |
| 1204 | * Call a macro on this engine. | 1308 | * Call a macro on this engine. |
| 1205 | * @param method Method to call | 1309 | * @param method Method to call |
| @@ -1219,11 +1323,16 @@ private: | |||
| 1219 | /// Handles a write to the QUERY_GET register. | 1323 | /// Handles a write to the QUERY_GET register. |
| 1220 | void ProcessQueryGet(); | 1324 | void ProcessQueryGet(); |
| 1221 | 1325 | ||
| 1326 | // Handles Conditional Rendering | ||
| 1327 | void ProcessQueryCondition(); | ||
| 1328 | |||
| 1222 | /// Handles writes to syncing register. | 1329 | /// Handles writes to syncing register. |
| 1223 | void ProcessSyncPoint(); | 1330 | void ProcessSyncPoint(); |
| 1224 | 1331 | ||
| 1225 | /// Handles a write to the CB_DATA[i] register. | 1332 | /// Handles a write to the CB_DATA[i] register. |
| 1333 | void StartCBData(u32 method); | ||
| 1226 | void ProcessCBData(u32 value); | 1334 | void ProcessCBData(u32 value); |
| 1335 | void FinishCBData(); | ||
| 1227 | 1336 | ||
| 1228 | /// Handles a write to the CB_BIND register. | 1337 | /// Handles a write to the CB_BIND register. |
| 1229 | void ProcessCBBind(Regs::ShaderStage stage); | 1338 | void ProcessCBBind(Regs::ShaderStage stage); |
| @@ -1290,6 +1399,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | |||
| 1290 | ASSERT_REG_POSITION(point_size, 0x546); | 1399 | ASSERT_REG_POSITION(point_size, 0x546); |
| 1291 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1400 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1292 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1401 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1402 | ASSERT_REG_POSITION(condition, 0x554); | ||
| 1293 | ASSERT_REG_POSITION(tsc, 0x557); | 1403 | ASSERT_REG_POSITION(tsc, 0x557); |
| 1294 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); | 1404 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); |
| 1295 | ASSERT_REG_POSITION(tic, 0x55D); | 1405 | ASSERT_REG_POSITION(tic, 0x55D); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 758c154cb..a28c04473 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().dirty.OnMemoryWrite(); |
| 62 | 62 | ||
| 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 63 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 64 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 79d469b88..8520a0143 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -931,8 +931,6 @@ union Instruction { | |||
| 931 | } csetp; | 931 | } csetp; |
| 932 | 932 | ||
| 933 | union { | 933 | union { |
| 934 | BitField<35, 4, PredCondition> cond; | ||
| 935 | BitField<49, 1, u64> h_and; | ||
| 936 | BitField<6, 1, u64> ftz; | 934 | BitField<6, 1, u64> ftz; |
| 937 | BitField<45, 2, PredOperation> op; | 935 | BitField<45, 2, PredOperation> op; |
| 938 | BitField<3, 3, u64> pred3; | 936 | BitField<3, 3, u64> pred3; |
| @@ -940,9 +938,21 @@ union Instruction { | |||
| 940 | BitField<43, 1, u64> negate_a; | 938 | BitField<43, 1, u64> negate_a; |
| 941 | BitField<44, 1, u64> abs_a; | 939 | BitField<44, 1, u64> abs_a; |
| 942 | BitField<47, 2, HalfType> type_a; | 940 | BitField<47, 2, HalfType> type_a; |
| 943 | BitField<31, 1, u64> negate_b; | 941 | union { |
| 944 | BitField<30, 1, u64> abs_b; | 942 | BitField<35, 4, PredCondition> cond; |
| 945 | BitField<28, 2, HalfType> type_b; | 943 | BitField<49, 1, u64> h_and; |
| 944 | BitField<31, 1, u64> negate_b; | ||
| 945 | BitField<30, 1, u64> abs_b; | ||
| 946 | BitField<28, 2, HalfType> type_b; | ||
| 947 | } reg; | ||
| 948 | union { | ||
| 949 | BitField<56, 1, u64> negate_b; | ||
| 950 | BitField<54, 1, u64> abs_b; | ||
| 951 | } cbuf; | ||
| 952 | union { | ||
| 953 | BitField<49, 4, PredCondition> cond; | ||
| 954 | BitField<53, 1, u64> h_and; | ||
| 955 | } cbuf_and_imm; | ||
| 946 | BitField<42, 1, u64> neg_pred; | 956 | BitField<42, 1, u64> neg_pred; |
| 947 | BitField<39, 3, u64> pred39; | 957 | BitField<39, 3, u64> pred39; |
| 948 | } hsetp2; | 958 | } hsetp2; |
| @@ -1548,7 +1558,9 @@ public: | |||
| 1548 | HFMA2_RC, | 1558 | HFMA2_RC, |
| 1549 | HFMA2_RR, | 1559 | HFMA2_RR, |
| 1550 | HFMA2_IMM_R, | 1560 | HFMA2_IMM_R, |
| 1561 | HSETP2_C, | ||
| 1551 | HSETP2_R, | 1562 | HSETP2_R, |
| 1563 | HSETP2_IMM, | ||
| 1552 | HSET2_R, | 1564 | HSET2_R, |
| 1553 | POPC_C, | 1565 | POPC_C, |
| 1554 | POPC_R, | 1566 | POPC_R, |
| @@ -1831,7 +1843,9 @@ private: | |||
| 1831 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), | 1843 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), |
| 1832 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), | 1844 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), |
| 1833 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), | 1845 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), |
| 1834 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), | 1846 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), |
| 1847 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | ||
| 1848 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | ||
| 1835 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 1849 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 1836 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 1850 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
| 1837 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | 1851 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 6cb5fd4e1..21007d8b2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const { | |||
| 50 | return *maxwell_3d; | 50 | return *maxwell_3d; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | Engines::KeplerCompute& GPU::KeplerCompute() { | ||
| 54 | return *kepler_compute; | ||
| 55 | } | ||
| 56 | |||
| 57 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | ||
| 58 | return *kepler_compute; | ||
| 59 | } | ||
| 60 | |||
| 53 | MemoryManager& GPU::MemoryManager() { | 61 | MemoryManager& GPU::MemoryManager() { |
| 54 | return *memory_manager; | 62 | return *memory_manager; |
| 55 | } | 63 | } |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 5a8b1c74a..0055e5326 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -155,6 +155,12 @@ public: | |||
| 155 | /// Returns a const reference to the Maxwell3D GPU engine. | 155 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 156 | const Engines::Maxwell3D& Maxwell3D() const; | 156 | const Engines::Maxwell3D& Maxwell3D() const; |
| 157 | 157 | ||
| 158 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 159 | Engines::KeplerCompute& KeplerCompute(); | ||
| 160 | |||
| 161 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 162 | const Engines::KeplerCompute& KeplerCompute() const; | ||
| 163 | |||
| 158 | /// Returns a reference to the GPU memory manager. | 164 | /// Returns a reference to the GPU memory manager. |
| 159 | Tegra::MemoryManager& MemoryManager(); | 165 | Tegra::MemoryManager& MemoryManager(); |
| 160 | 166 | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2b7367568..9881df0d5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -34,6 +34,9 @@ public: | |||
| 34 | /// Clear the current framebuffer | 34 | /// Clear the current framebuffer |
| 35 | virtual void Clear() = 0; | 35 | virtual void Clear() = 0; |
| 36 | 36 | ||
| 37 | /// Dispatches a compute shader invocation | ||
| 38 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | ||
| 39 | |||
| 37 | /// Notify rasterizer that all caches should be flushed to Switch memory | 40 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 38 | virtual void FlushAll() = 0; | 41 | virtual void FlushAll() = 0; |
| 39 | 42 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0bb5c068c..c28ae795c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <bitset> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| @@ -19,6 +20,7 @@ | |||
| 19 | #include "core/core.h" | 20 | #include "core/core.h" |
| 20 | #include "core/hle/kernel/process.h" | 21 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 23 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -105,6 +107,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 105 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 107 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 106 | state.draw.shader_program = 0; | 108 | state.draw.shader_program = 0; |
| 107 | state.Apply(); | 109 | state.Apply(); |
| 110 | clear_framebuffer.Create(); | ||
| 108 | 111 | ||
| 109 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | 112 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 110 | CheckExtensions(); | 113 | CheckExtensions(); |
| @@ -124,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 124 | auto& gpu = system.GPU().Maxwell3D(); | 127 | auto& gpu = system.GPU().Maxwell3D(); |
| 125 | const auto& regs = gpu.regs; | 128 | const auto& regs = gpu.regs; |
| 126 | 129 | ||
| 127 | if (!gpu.dirty_flags.vertex_attrib_format) { | 130 | if (!gpu.dirty.vertex_attrib_format) { |
| 128 | return state.draw.vertex_array; | 131 | return state.draw.vertex_array; |
| 129 | } | 132 | } |
| 130 | gpu.dirty_flags.vertex_attrib_format = false; | 133 | gpu.dirty.vertex_attrib_format = false; |
| 131 | 134 | ||
| 132 | MICROPROFILE_SCOPE(OpenGL_VAO); | 135 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 133 | 136 | ||
| @@ -181,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 181 | } | 184 | } |
| 182 | 185 | ||
| 183 | // Rebinding the VAO invalidates the vertex buffer bindings. | 186 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 184 | gpu.dirty_flags.vertex_array.set(); | 187 | gpu.dirty.ResetVertexArrays(); |
| 185 | 188 | ||
| 186 | state.draw.vertex_array = vao_entry.handle; | 189 | state.draw.vertex_array = vao_entry.handle; |
| 187 | return vao_entry.handle; | 190 | return vao_entry.handle; |
| @@ -189,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 189 | 192 | ||
| 190 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | 193 | void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { |
| 191 | auto& gpu = system.GPU().Maxwell3D(); | 194 | auto& gpu = system.GPU().Maxwell3D(); |
| 192 | const auto& regs = gpu.regs; | 195 | if (!gpu.dirty.vertex_array_buffers) |
| 193 | |||
| 194 | if (gpu.dirty_flags.vertex_array.none()) | ||
| 195 | return; | 196 | return; |
| 197 | gpu.dirty.vertex_array_buffers = false; | ||
| 198 | |||
| 199 | const auto& regs = gpu.regs; | ||
| 196 | 200 | ||
| 197 | MICROPROFILE_SCOPE(OpenGL_VB); | 201 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 198 | 202 | ||
| 199 | // Upload all guest vertex arrays sequentially to our buffer | 203 | // Upload all guest vertex arrays sequentially to our buffer |
| 200 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 204 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 201 | if (!gpu.dirty_flags.vertex_array[index]) | 205 | if (!gpu.dirty.vertex_array[index]) |
| 202 | continue; | 206 | continue; |
| 207 | gpu.dirty.vertex_array[index] = false; | ||
| 208 | gpu.dirty.vertex_instance[index] = false; | ||
| 203 | 209 | ||
| 204 | const auto& vertex_array = regs.vertex_array[index]; | 210 | const auto& vertex_array = regs.vertex_array[index]; |
| 205 | if (!vertex_array.IsEnabled()) | 211 | if (!vertex_array.IsEnabled()) |
| @@ -224,8 +230,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 224 | glVertexArrayBindingDivisor(vao, index, 0); | 230 | glVertexArrayBindingDivisor(vao, index, 0); |
| 225 | } | 231 | } |
| 226 | } | 232 | } |
| 233 | } | ||
| 234 | |||
| 235 | void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { | ||
| 236 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 237 | |||
| 238 | if (!gpu.dirty.vertex_instances) | ||
| 239 | return; | ||
| 240 | gpu.dirty.vertex_instances = false; | ||
| 241 | |||
| 242 | const auto& regs = gpu.regs; | ||
| 243 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 244 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 245 | if (!gpu.dirty.vertex_instance[index]) | ||
| 246 | continue; | ||
| 247 | |||
| 248 | gpu.dirty.vertex_instance[index] = false; | ||
| 227 | 249 | ||
| 228 | gpu.dirty_flags.vertex_array.reset(); | 250 | if (regs.instanced_arrays.IsInstancingEnabled(index) && |
| 251 | regs.vertex_array[index].divisor != 0) { | ||
| 252 | // Enable vertex buffer instancing with the specified divisor. | ||
| 253 | glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); | ||
| 254 | } else { | ||
| 255 | // Disable the vertex buffer instancing. | ||
| 256 | glVertexArrayBindingDivisor(vao, index, 0); | ||
| 257 | } | ||
| 258 | } | ||
| 229 | } | 259 | } |
| 230 | 260 | ||
| 231 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { | 261 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| @@ -298,9 +328,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 298 | 328 | ||
| 299 | Shader shader{shader_cache.GetStageProgram(program)}; | 329 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 300 | 330 | ||
| 301 | const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; | 331 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 302 | SetupDrawConstBuffers(stage_enum, shader); | 332 | SetupDrawConstBuffers(stage_enum, shader); |
| 303 | SetupGlobalRegions(stage_enum, shader); | 333 | SetupDrawGlobalMemory(stage_enum, shader); |
| 304 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; | 334 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; |
| 305 | 335 | ||
| 306 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | 336 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; |
| @@ -341,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 341 | 371 | ||
| 342 | SyncClipEnabled(clip_distances); | 372 | SyncClipEnabled(clip_distances); |
| 343 | 373 | ||
| 344 | gpu.dirty_flags.shaders = false; | 374 | gpu.dirty.shaders = false; |
| 345 | } | 375 | } |
| 346 | 376 | ||
| 347 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 377 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -424,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 424 | 454 | ||
| 425 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 455 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 426 | single_color_target}; | 456 | single_color_target}; |
| 427 | if (fb_config_state == current_framebuffer_config_state && | 457 | if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) { |
| 428 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { | ||
| 429 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 458 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 430 | // single color targets). This is done because the guest registers may not change but the | 459 | // single color targets). This is done because the guest registers may not change but the |
| 431 | // host framebuffer may contain different attachments | 460 | // host framebuffer may contain different attachments |
| 432 | return current_depth_stencil_usage; | 461 | return current_depth_stencil_usage; |
| 433 | } | 462 | } |
| 463 | gpu.dirty.render_settings = false; | ||
| 434 | current_framebuffer_config_state = fb_config_state; | 464 | current_framebuffer_config_state = fb_config_state; |
| 435 | 465 | ||
| 436 | texture_cache.GuardRenderTargets(true); | 466 | texture_cache.GuardRenderTargets(true); |
| @@ -519,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 519 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | 549 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; |
| 520 | } | 550 | } |
| 521 | 551 | ||
| 552 | void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 553 | bool using_depth_fb, bool using_stencil_fb) { | ||
| 554 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 555 | const auto& regs = gpu.regs; | ||
| 556 | |||
| 557 | texture_cache.GuardRenderTargets(true); | ||
| 558 | View color_surface{}; | ||
| 559 | if (using_color_fb) { | ||
| 560 | color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); | ||
| 561 | } | ||
| 562 | View depth_surface{}; | ||
| 563 | if (using_depth_fb || using_stencil_fb) { | ||
| 564 | depth_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 565 | } | ||
| 566 | texture_cache.GuardRenderTargets(false); | ||
| 567 | |||
| 568 | current_state.draw.draw_framebuffer = clear_framebuffer.handle; | ||
| 569 | current_state.ApplyFramebufferState(); | ||
| 570 | |||
| 571 | if (color_surface) { | ||
| 572 | color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 573 | } else { | ||
| 574 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 575 | } | ||
| 576 | |||
| 577 | if (depth_surface) { | ||
| 578 | const auto& params = depth_surface->GetSurfaceParams(); | ||
| 579 | switch (params.type) { | ||
| 580 | case VideoCore::Surface::SurfaceType::Depth: { | ||
| 581 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 582 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 583 | break; | ||
| 584 | } | ||
| 585 | case VideoCore::Surface::SurfaceType::DepthStencil: { | ||
| 586 | depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 587 | break; | ||
| 588 | } | ||
| 589 | default: { UNIMPLEMENTED(); } | ||
| 590 | } | ||
| 591 | } else { | ||
| 592 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 593 | 0); | ||
| 594 | } | ||
| 595 | } | ||
| 596 | |||
| 522 | void RasterizerOpenGL::Clear() { | 597 | void RasterizerOpenGL::Clear() { |
| 523 | const auto& regs = system.GPU().Maxwell3D().regs; | 598 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 599 | |||
| 600 | if (!maxwell3d.ShouldExecute()) { | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | |||
| 604 | const auto& regs = maxwell3d.regs; | ||
| 524 | bool use_color{}; | 605 | bool use_color{}; |
| 525 | bool use_depth{}; | 606 | bool use_depth{}; |
| 526 | bool use_stencil{}; | 607 | bool use_stencil{}; |
| 527 | 608 | ||
| 528 | OpenGLState clear_state; | 609 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 610 | SCOPE_EXIT({ | ||
| 611 | prev_state.AllDirty(); | ||
| 612 | prev_state.Apply(); | ||
| 613 | }); | ||
| 614 | |||
| 615 | OpenGLState clear_state{OpenGLState::GetCurState()}; | ||
| 616 | clear_state.SetDefaultViewports(); | ||
| 529 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 617 | if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 530 | regs.clear_buffers.A) { | 618 | regs.clear_buffers.A) { |
| 531 | use_color = true; | 619 | use_color = true; |
| @@ -545,6 +633,7 @@ void RasterizerOpenGL::Clear() { | |||
| 545 | // true. | 633 | // true. |
| 546 | clear_state.depth.test_enabled = true; | 634 | clear_state.depth.test_enabled = true; |
| 547 | clear_state.depth.test_func = GL_ALWAYS; | 635 | clear_state.depth.test_func = GL_ALWAYS; |
| 636 | clear_state.depth.write_mask = GL_TRUE; | ||
| 548 | } | 637 | } |
| 549 | if (regs.clear_buffers.S) { | 638 | if (regs.clear_buffers.S) { |
| 550 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); | 639 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); |
| @@ -581,8 +670,9 @@ void RasterizerOpenGL::Clear() { | |||
| 581 | return; | 670 | return; |
| 582 | } | 671 | } |
| 583 | 672 | ||
| 584 | const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( | 673 | ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); |
| 585 | clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); | 674 | |
| 675 | SyncViewport(clear_state); | ||
| 586 | if (regs.clear_flags.scissor) { | 676 | if (regs.clear_flags.scissor) { |
| 587 | SyncScissorTest(clear_state); | 677 | SyncScissorTest(clear_state); |
| 588 | } | 678 | } |
| @@ -591,21 +681,18 @@ void RasterizerOpenGL::Clear() { | |||
| 591 | clear_state.EmulateViewportWithScissor(); | 681 | clear_state.EmulateViewportWithScissor(); |
| 592 | } | 682 | } |
| 593 | 683 | ||
| 594 | clear_state.ApplyColorMask(); | 684 | clear_state.AllDirty(); |
| 595 | clear_state.ApplyDepth(); | 685 | clear_state.Apply(); |
| 596 | clear_state.ApplyStencilTest(); | ||
| 597 | clear_state.ApplyViewport(); | ||
| 598 | clear_state.ApplyFramebufferState(); | ||
| 599 | 686 | ||
| 600 | if (use_color) { | 687 | if (use_color) { |
| 601 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 688 | glClearBufferfv(GL_COLOR, 0, regs.clear_color); |
| 602 | } | 689 | } |
| 603 | 690 | ||
| 604 | if (clear_depth && clear_stencil) { | 691 | if (use_depth && use_stencil) { |
| 605 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 692 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 606 | } else if (clear_depth) { | 693 | } else if (use_depth) { |
| 607 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); | 694 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); |
| 608 | } else if (clear_stencil) { | 695 | } else if (use_stencil) { |
| 609 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 696 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 610 | } | 697 | } |
| 611 | } | 698 | } |
| @@ -616,6 +703,11 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 616 | 703 | ||
| 617 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 704 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 618 | auto& gpu = system.GPU().Maxwell3D(); | 705 | auto& gpu = system.GPU().Maxwell3D(); |
| 706 | |||
| 707 | if (!gpu.ShouldExecute()) { | ||
| 708 | return; | ||
| 709 | } | ||
| 710 | |||
| 619 | const auto& regs = gpu.regs; | 711 | const auto& regs = gpu.regs; |
| 620 | 712 | ||
| 621 | SyncColorMask(); | 713 | SyncColorMask(); |
| @@ -661,6 +753,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 661 | 753 | ||
| 662 | // Upload vertex and index data. | 754 | // Upload vertex and index data. |
| 663 | SetupVertexBuffer(vao); | 755 | SetupVertexBuffer(vao); |
| 756 | SetupVertexInstances(vao); | ||
| 664 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | 757 | const GLintptr index_buffer_offset = SetupIndexBuffer(); |
| 665 | 758 | ||
| 666 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | 759 | // Setup draw parameters. It will automatically choose what glDraw* method to use. |
| @@ -687,7 +780,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 687 | 780 | ||
| 688 | if (invalidate) { | 781 | if (invalidate) { |
| 689 | // As all cached buffers are invalidated, we need to recheck their state. | 782 | // As all cached buffers are invalidated, we need to recheck their state. |
| 690 | gpu.dirty_flags.vertex_array.set(); | 783 | gpu.dirty.ResetVertexArrays(); |
| 691 | } | 784 | } |
| 692 | 785 | ||
| 693 | shader_program_manager->ApplyTo(state); | 786 | shader_program_manager->ApplyTo(state); |
| @@ -700,6 +793,46 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 700 | params.DispatchDraw(); | 793 | params.DispatchDraw(); |
| 701 | 794 | ||
| 702 | accelerate_draw = AccelDraw::Disabled; | 795 | accelerate_draw = AccelDraw::Disabled; |
| 796 | gpu.dirty.memory_general = false; | ||
| 797 | } | ||
| 798 | |||
| 799 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||
| 800 | if (!GLAD_GL_ARB_compute_variable_group_size) { | ||
| 801 | LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||
| 802 | "lack of GL_ARB_compute_variable_group_size"); | ||
| 803 | return; | ||
| 804 | } | ||
| 805 | |||
| 806 | auto kernel = shader_cache.GetComputeKernel(code_addr); | ||
| 807 | const auto [program, next_bindings] = kernel->GetProgramHandle({}); | ||
| 808 | state.draw.shader_program = program; | ||
| 809 | state.draw.program_pipeline = 0; | ||
| 810 | |||
| 811 | const std::size_t buffer_size = | ||
| 812 | Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 813 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 814 | buffer_cache.Map(buffer_size); | ||
| 815 | |||
| 816 | bind_ubo_pushbuffer.Setup(0); | ||
| 817 | bind_ssbo_pushbuffer.Setup(0); | ||
| 818 | |||
| 819 | SetupComputeConstBuffers(kernel); | ||
| 820 | SetupComputeGlobalMemory(kernel); | ||
| 821 | |||
| 822 | // TODO(Rodrigo): Bind images and samplers | ||
| 823 | |||
| 824 | buffer_cache.Unmap(); | ||
| 825 | |||
| 826 | bind_ubo_pushbuffer.Bind(); | ||
| 827 | bind_ssbo_pushbuffer.Bind(); | ||
| 828 | |||
| 829 | state.ApplyShaderProgram(); | ||
| 830 | state.ApplyProgramPipeline(); | ||
| 831 | |||
| 832 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 833 | glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||
| 834 | launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||
| 835 | launch_desc.block_dim_y, launch_desc.block_dim_z); | ||
| 703 | } | 836 | } |
| 704 | 837 | ||
| 705 | void RasterizerOpenGL::FlushAll() {} | 838 | void RasterizerOpenGL::FlushAll() {} |
| @@ -775,12 +908,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 775 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 908 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 776 | const Shader& shader) { | 909 | const Shader& shader) { |
| 777 | MICROPROFILE_SCOPE(OpenGL_UBO); | 910 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 778 | const auto stage_index = static_cast<std::size_t>(stage); | 911 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 779 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 912 | const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; |
| 780 | |||
| 781 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | ||
| 782 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 913 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 783 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 914 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 915 | SetupConstBuffer(buffer, entry); | ||
| 916 | } | ||
| 917 | } | ||
| 918 | |||
| 919 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | ||
| 920 | MICROPROFILE_SCOPE(OpenGL_UBO); | ||
| 921 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 922 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | ||
| 923 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 924 | const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); | ||
| 925 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 926 | buffer.address = config.Address(); | ||
| 927 | buffer.size = config.size; | ||
| 928 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 929 | SetupConstBuffer(buffer, entry); | ||
| 784 | } | 930 | } |
| 785 | } | 931 | } |
| 786 | 932 | ||
| @@ -801,24 +947,39 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 801 | bind_ubo_pushbuffer.Push(cbuf, offset, size); | 947 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 802 | } | 948 | } |
| 803 | 949 | ||
| 804 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 950 | void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 805 | const Shader& shader) { | 951 | const Shader& shader) { |
| 806 | auto& gpu{system.GPU()}; | 952 | auto& gpu{system.GPU()}; |
| 807 | auto& memory_manager{gpu.MemoryManager()}; | 953 | auto& memory_manager{gpu.MemoryManager()}; |
| 808 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | 954 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 809 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 810 | |||
| 811 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 955 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 812 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 956 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 813 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | 957 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 814 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 958 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 959 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 960 | } | ||
| 961 | } | ||
| 815 | 962 | ||
| 816 | const auto [ssbo, buffer_offset] = | 963 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { |
| 817 | buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); | 964 | auto& gpu{system.GPU()}; |
| 818 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | 965 | auto& memory_manager{gpu.MemoryManager()}; |
| 966 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | ||
| 967 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | ||
| 968 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 969 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||
| 970 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 971 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 819 | } | 972 | } |
| 820 | } | 973 | } |
| 821 | 974 | ||
| 975 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | ||
| 976 | GPUVAddr gpu_addr, std::size_t size) { | ||
| 977 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 978 | const auto [ssbo, buffer_offset] = | ||
| 979 | buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); | ||
| 980 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 981 | } | ||
| 982 | |||
| 822 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | 983 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 823 | BaseBindings base_bindings) { | 984 | BaseBindings base_bindings) { |
| 824 | MICROPROFILE_SCOPE(OpenGL_Texture); | 985 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| @@ -907,10 +1068,11 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 907 | } | 1068 | } |
| 908 | 1069 | ||
| 909 | void RasterizerOpenGL::SyncCullMode() { | 1070 | void RasterizerOpenGL::SyncCullMode() { |
| 910 | const auto& regs = system.GPU().Maxwell3D().regs; | 1071 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 911 | 1072 | ||
| 912 | state.cull.enabled = regs.cull.enabled != 0; | 1073 | const auto& regs = maxwell3d.regs; |
| 913 | 1074 | ||
| 1075 | state.cull.enabled = regs.cull.enabled != 0; | ||
| 914 | if (state.cull.enabled) { | 1076 | if (state.cull.enabled) { |
| 915 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 1077 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); |
| 916 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); | 1078 | state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); |
| @@ -943,16 +1105,21 @@ void RasterizerOpenGL::SyncDepthTestState() { | |||
| 943 | state.depth.test_enabled = regs.depth_test_enable != 0; | 1105 | state.depth.test_enabled = regs.depth_test_enable != 0; |
| 944 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; | 1106 | state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; |
| 945 | 1107 | ||
| 946 | if (!state.depth.test_enabled) | 1108 | if (!state.depth.test_enabled) { |
| 947 | return; | 1109 | return; |
| 1110 | } | ||
| 948 | 1111 | ||
| 949 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); | 1112 | state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); |
| 950 | } | 1113 | } |
| 951 | 1114 | ||
| 952 | void RasterizerOpenGL::SyncStencilTestState() { | 1115 | void RasterizerOpenGL::SyncStencilTestState() { |
| 953 | const auto& regs = system.GPU().Maxwell3D().regs; | 1116 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 954 | state.stencil.test_enabled = regs.stencil_enable != 0; | 1117 | if (!maxwell3d.dirty.stencil_test) { |
| 1118 | return; | ||
| 1119 | } | ||
| 1120 | const auto& regs = maxwell3d.regs; | ||
| 955 | 1121 | ||
| 1122 | state.stencil.test_enabled = regs.stencil_enable != 0; | ||
| 956 | if (!regs.stencil_enable) { | 1123 | if (!regs.stencil_enable) { |
| 957 | return; | 1124 | return; |
| 958 | } | 1125 | } |
| @@ -981,10 +1148,17 @@ void RasterizerOpenGL::SyncStencilTestState() { | |||
| 981 | state.stencil.back.action_depth_fail = GL_KEEP; | 1148 | state.stencil.back.action_depth_fail = GL_KEEP; |
| 982 | state.stencil.back.action_depth_pass = GL_KEEP; | 1149 | state.stencil.back.action_depth_pass = GL_KEEP; |
| 983 | } | 1150 | } |
| 1151 | state.MarkDirtyStencilState(); | ||
| 1152 | maxwell3d.dirty.stencil_test = false; | ||
| 984 | } | 1153 | } |
| 985 | 1154 | ||
| 986 | void RasterizerOpenGL::SyncColorMask() { | 1155 | void RasterizerOpenGL::SyncColorMask() { |
| 987 | const auto& regs = system.GPU().Maxwell3D().regs; | 1156 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1157 | if (!maxwell3d.dirty.color_mask) { | ||
| 1158 | return; | ||
| 1159 | } | ||
| 1160 | const auto& regs = maxwell3d.regs; | ||
| 1161 | |||
| 988 | const std::size_t count = | 1162 | const std::size_t count = |
| 989 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; | 1163 | regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; |
| 990 | for (std::size_t i = 0; i < count; i++) { | 1164 | for (std::size_t i = 0; i < count; i++) { |
| @@ -995,6 +1169,9 @@ void RasterizerOpenGL::SyncColorMask() { | |||
| 995 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; | 1169 | dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; |
| 996 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; | 1170 | dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; |
| 997 | } | 1171 | } |
| 1172 | |||
| 1173 | state.MarkDirtyColorMask(); | ||
| 1174 | maxwell3d.dirty.color_mask = false; | ||
| 998 | } | 1175 | } |
| 999 | 1176 | ||
| 1000 | void RasterizerOpenGL::SyncMultiSampleState() { | 1177 | void RasterizerOpenGL::SyncMultiSampleState() { |
| @@ -1009,7 +1186,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() { | |||
| 1009 | } | 1186 | } |
| 1010 | 1187 | ||
| 1011 | void RasterizerOpenGL::SyncBlendState() { | 1188 | void RasterizerOpenGL::SyncBlendState() { |
| 1012 | const auto& regs = system.GPU().Maxwell3D().regs; | 1189 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1190 | if (!maxwell3d.dirty.blend_state) { | ||
| 1191 | return; | ||
| 1192 | } | ||
| 1193 | const auto& regs = maxwell3d.regs; | ||
| 1013 | 1194 | ||
| 1014 | state.blend_color.red = regs.blend_color.r; | 1195 | state.blend_color.red = regs.blend_color.r; |
| 1015 | state.blend_color.green = regs.blend_color.g; | 1196 | state.blend_color.green = regs.blend_color.g; |
| @@ -1032,6 +1213,8 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1032 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 1213 | for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
| 1033 | state.blend[i].enabled = false; | 1214 | state.blend[i].enabled = false; |
| 1034 | } | 1215 | } |
| 1216 | maxwell3d.dirty.blend_state = false; | ||
| 1217 | state.MarkDirtyBlendState(); | ||
| 1035 | return; | 1218 | return; |
| 1036 | } | 1219 | } |
| 1037 | 1220 | ||
| @@ -1048,6 +1231,9 @@ void RasterizerOpenGL::SyncBlendState() { | |||
| 1048 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); | 1231 | blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); |
| 1049 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); | 1232 | blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); |
| 1050 | } | 1233 | } |
| 1234 | |||
| 1235 | state.MarkDirtyBlendState(); | ||
| 1236 | maxwell3d.dirty.blend_state = false; | ||
| 1051 | } | 1237 | } |
| 1052 | 1238 | ||
| 1053 | void RasterizerOpenGL::SyncLogicOpState() { | 1239 | void RasterizerOpenGL::SyncLogicOpState() { |
| @@ -1099,13 +1285,21 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1099 | } | 1285 | } |
| 1100 | 1286 | ||
| 1101 | void RasterizerOpenGL::SyncPolygonOffset() { | 1287 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1102 | const auto& regs = system.GPU().Maxwell3D().regs; | 1288 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1289 | if (!maxwell3d.dirty.polygon_offset) { | ||
| 1290 | return; | ||
| 1291 | } | ||
| 1292 | const auto& regs = maxwell3d.regs; | ||
| 1293 | |||
| 1103 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | 1294 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; |
| 1104 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | 1295 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; |
| 1105 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | 1296 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; |
| 1106 | state.polygon_offset.units = regs.polygon_offset_units; | 1297 | state.polygon_offset.units = regs.polygon_offset_units; |
| 1107 | state.polygon_offset.factor = regs.polygon_offset_factor; | 1298 | state.polygon_offset.factor = regs.polygon_offset_factor; |
| 1108 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | 1299 | state.polygon_offset.clamp = regs.polygon_offset_clamp; |
| 1300 | |||
| 1301 | state.MarkDirtyPolygonOffset(); | ||
| 1302 | maxwell3d.dirty.polygon_offset = false; | ||
| 1109 | } | 1303 | } |
| 1110 | 1304 | ||
| 1111 | void RasterizerOpenGL::SyncAlphaTest() { | 1305 | void RasterizerOpenGL::SyncAlphaTest() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 40b571d58..8b123c48d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -58,6 +58,7 @@ public: | |||
| 58 | 58 | ||
| 59 | void DrawArrays() override; | 59 | void DrawArrays() override; |
| 60 | void Clear() override; | 60 | void Clear() override; |
| 61 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 61 | void FlushAll() override; | 62 | void FlushAll() override; |
| 62 | void FlushRegion(CacheAddr addr, u64 size) override; | 63 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 63 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| @@ -108,17 +109,30 @@ private: | |||
| 108 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, | 109 | OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true, |
| 109 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); | 110 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); |
| 110 | 111 | ||
| 112 | void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, | ||
| 113 | bool using_depth_fb, bool using_stencil_fb); | ||
| 114 | |||
| 111 | /// Configures the current constbuffers to use for the draw command. | 115 | /// Configures the current constbuffers to use for the draw command. |
| 112 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 116 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 113 | const Shader& shader); | 117 | const Shader& shader); |
| 114 | 118 | ||
| 119 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 120 | void SetupComputeConstBuffers(const Shader& kernel); | ||
| 121 | |||
| 115 | /// Configures a constant buffer. | 122 | /// Configures a constant buffer. |
| 116 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 123 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, |
| 117 | const GLShader::ConstBufferEntry& entry); | 124 | const GLShader::ConstBufferEntry& entry); |
| 118 | 125 | ||
| 119 | /// Configures the current global memory entries to use for the draw command. | 126 | /// Configures the current global memory entries to use for the draw command. |
| 120 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 127 | void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 121 | const Shader& shader); | 128 | const Shader& shader); |
| 129 | |||
| 130 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 131 | void SetupComputeGlobalMemory(const Shader& kernel); | ||
| 132 | |||
| 133 | /// Configures a constant buffer. | ||
| 134 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 135 | std::size_t size); | ||
| 122 | 136 | ||
| 123 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | 137 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer |
| 124 | /// usage. | 138 | /// usage. |
| @@ -216,6 +230,7 @@ private: | |||
| 216 | GLuint SetupVertexFormat(); | 230 | GLuint SetupVertexFormat(); |
| 217 | 231 | ||
| 218 | void SetupVertexBuffer(GLuint vao); | 232 | void SetupVertexBuffer(GLuint vao); |
| 233 | void SetupVertexInstances(GLuint vao); | ||
| 219 | 234 | ||
| 220 | GLintptr SetupIndexBuffer(); | 235 | GLintptr SetupIndexBuffer(); |
| 221 | 236 | ||
| @@ -226,6 +241,8 @@ private: | |||
| 226 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 241 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 227 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 242 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 228 | 243 | ||
| 244 | OGLFramebuffer clear_framebuffer; | ||
| 245 | |||
| 229 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 246 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 230 | CachedPageMap cached_pages; | 247 | CachedPageMap cached_pages; |
| 231 | }; | 248 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 32dd9eae7..1c90facc3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -23,13 +23,13 @@ namespace OpenGL { | |||
| 23 | 23 | ||
| 24 | using VideoCommon::Shader::ProgramCode; | 24 | using VideoCommon::Shader::ProgramCode; |
| 25 | 25 | ||
| 26 | // One UBO is always reserved for emulation values | 26 | // One UBO is always reserved for emulation values on staged shaders |
| 27 | constexpr u32 RESERVED_UBOS = 1; | 27 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 28 | 28 | ||
| 29 | struct UnspecializedShader { | 29 | struct UnspecializedShader { |
| 30 | std::string code; | 30 | std::string code; |
| 31 | GLShader::ShaderEntries entries; | 31 | GLShader::ShaderEntries entries; |
| 32 | Maxwell::ShaderProgram program_type; | 32 | ProgramType program_type; |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| @@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | /// Gets the shader type from a Maxwell program type | 57 | /// Gets the shader type from a Maxwell program type |
| 58 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | 58 | constexpr GLenum GetShaderType(ProgramType program_type) { |
| 59 | switch (program_type) { | 59 | switch (program_type) { |
| 60 | case Maxwell::ShaderProgram::VertexA: | 60 | case ProgramType::VertexA: |
| 61 | case Maxwell::ShaderProgram::VertexB: | 61 | case ProgramType::VertexB: |
| 62 | return GL_VERTEX_SHADER; | 62 | return GL_VERTEX_SHADER; |
| 63 | case Maxwell::ShaderProgram::Geometry: | 63 | case ProgramType::Geometry: |
| 64 | return GL_GEOMETRY_SHADER; | 64 | return GL_GEOMETRY_SHADER; |
| 65 | case Maxwell::ShaderProgram::Fragment: | 65 | case ProgramType::Fragment: |
| 66 | return GL_FRAGMENT_SHADER; | 66 | return GL_FRAGMENT_SHADER; |
| 67 | case ProgramType::Compute: | ||
| 68 | return GL_COMPUTE_SHADER; | ||
| 67 | default: | 69 | default: |
| 68 | return GL_NONE; | 70 | return GL_NONE; |
| 69 | } | 71 | } |
| @@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 100 | } | 102 | } |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||
| 106 | switch (program) { | ||
| 107 | case Maxwell::ShaderProgram::VertexA: | ||
| 108 | return ProgramType::VertexA; | ||
| 109 | case Maxwell::ShaderProgram::VertexB: | ||
| 110 | return ProgramType::VertexB; | ||
| 111 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 112 | return ProgramType::TessellationControl; | ||
| 113 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 114 | return ProgramType::TessellationEval; | ||
| 115 | case Maxwell::ShaderProgram::Geometry: | ||
| 116 | return ProgramType::Geometry; | ||
| 117 | case Maxwell::ShaderProgram::Fragment: | ||
| 118 | return ProgramType::Fragment; | ||
| 119 | } | ||
| 120 | UNREACHABLE(); | ||
| 121 | return {}; | ||
| 122 | } | ||
| 123 | |||
| 103 | /// Calculates the size of a program stream | 124 | /// Calculates the size of a program stream |
| 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 125 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 105 | constexpr std::size_t start_offset = 10; | 126 | constexpr std::size_t start_offset = 10; |
| @@ -128,13 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 128 | } | 149 | } |
| 129 | 150 | ||
| 130 | /// Hashes one (or two) program streams | 151 | /// Hashes one (or two) program streams |
| 131 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | 152 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 132 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { | 153 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |
| 133 | if (size_a == 0) { | 154 | if (size_a == 0) { |
| 134 | size_a = CalculateProgramSize(code); | 155 | size_a = CalculateProgramSize(code); |
| 135 | } | 156 | } |
| 136 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | 157 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); |
| 137 | if (program_type != Maxwell::ShaderProgram::VertexA) { | 158 | if (program_type != ProgramType::VertexA) { |
| 138 | return unique_identifier; | 159 | return unique_identifier; |
| 139 | } | 160 | } |
| 140 | // VertexA programs include two programs | 161 | // VertexA programs include two programs |
| @@ -152,12 +173,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 152 | } | 173 | } |
| 153 | 174 | ||
| 154 | /// Creates an unspecialized program from code streams | 175 | /// Creates an unspecialized program from code streams |
| 155 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | 176 | GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, |
| 156 | ProgramCode program_code, ProgramCode program_code_b) { | 177 | ProgramCode program_code, ProgramCode program_code_b) { |
| 157 | GLShader::ShaderSetup setup(program_code); | 178 | GLShader::ShaderSetup setup(program_code); |
| 158 | setup.program.size_a = CalculateProgramSize(program_code); | 179 | setup.program.size_a = CalculateProgramSize(program_code); |
| 159 | setup.program.size_b = 0; | 180 | setup.program.size_b = 0; |
| 160 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 181 | if (program_type == ProgramType::VertexA) { |
| 161 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 182 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 162 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 183 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 163 | // stage here. | 184 | // stage here. |
| @@ -168,22 +189,23 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr | |||
| 168 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); | 189 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |
| 169 | 190 | ||
| 170 | switch (program_type) { | 191 | switch (program_type) { |
| 171 | case Maxwell::ShaderProgram::VertexA: | 192 | case ProgramType::VertexA: |
| 172 | case Maxwell::ShaderProgram::VertexB: | 193 | case ProgramType::VertexB: |
| 173 | return GLShader::GenerateVertexShader(device, setup); | 194 | return GLShader::GenerateVertexShader(device, setup); |
| 174 | case Maxwell::ShaderProgram::Geometry: | 195 | case ProgramType::Geometry: |
| 175 | return GLShader::GenerateGeometryShader(device, setup); | 196 | return GLShader::GenerateGeometryShader(device, setup); |
| 176 | case Maxwell::ShaderProgram::Fragment: | 197 | case ProgramType::Fragment: |
| 177 | return GLShader::GenerateFragmentShader(device, setup); | 198 | return GLShader::GenerateFragmentShader(device, setup); |
| 199 | case ProgramType::Compute: | ||
| 200 | return GLShader::GenerateComputeShader(device, setup); | ||
| 178 | default: | 201 | default: |
| 179 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 202 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 180 | UNREACHABLE(); | ||
| 181 | return {}; | 203 | return {}; |
| 182 | } | 204 | } |
| 183 | } | 205 | } |
| 184 | 206 | ||
| 185 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 207 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, |
| 186 | Maxwell::ShaderProgram program_type, const ProgramVariant& variant, | 208 | ProgramType program_type, const ProgramVariant& variant, |
| 187 | bool hint_retrievable = false) { | 209 | bool hint_retrievable = false) { |
| 188 | auto base_bindings{variant.base_bindings}; | 210 | auto base_bindings{variant.base_bindings}; |
| 189 | const auto primitive_mode{variant.primitive_mode}; | 211 | const auto primitive_mode{variant.primitive_mode}; |
| @@ -194,7 +216,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 194 | if (entries.shader_viewport_layer_array) { | 216 | if (entries.shader_viewport_layer_array) { |
| 195 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | 217 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; |
| 196 | } | 218 | } |
| 197 | source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 219 | if (program_type == ProgramType::Compute) { |
| 220 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||
| 221 | } | ||
| 222 | source += '\n'; | ||
| 223 | |||
| 224 | if (program_type != ProgramType::Compute) { | ||
| 225 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 226 | } | ||
| 198 | 227 | ||
| 199 | for (const auto& cbuf : entries.const_buffers) { | 228 | for (const auto& cbuf : entries.const_buffers) { |
| 200 | source += | 229 | source += |
| @@ -221,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 221 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | 250 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); |
| 222 | } | 251 | } |
| 223 | 252 | ||
| 224 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 253 | if (program_type == ProgramType::Geometry) { |
| 225 | const auto [glsl_topology, debug_name, max_vertices] = | 254 | const auto [glsl_topology, debug_name, max_vertices] = |
| 226 | GetPrimitiveDescription(primitive_mode); | 255 | GetPrimitiveDescription(primitive_mode); |
| 227 | 256 | ||
| 228 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | 257 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; |
| 229 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 258 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 230 | } | 259 | } |
| 260 | if (program_type == ProgramType::Compute) { | ||
| 261 | source += "layout (local_size_variable) in;\n"; | ||
| 262 | } | ||
| 231 | 263 | ||
| 232 | source += code; | 264 | source += code; |
| 233 | 265 | ||
| @@ -255,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 255 | 287 | ||
| 256 | } // Anonymous namespace | 288 | } // Anonymous namespace |
| 257 | 289 | ||
| 258 | CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 290 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 259 | GLShader::ProgramResult result) | 291 | GLShader::ProgramResult result) |
| 260 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, | 292 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, |
| 261 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 293 | unique_identifier{params.unique_identifier}, program_type{program_type}, |
| @@ -268,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 268 | ProgramCode&& program_code_b) { | 300 | ProgramCode&& program_code_b) { |
| 269 | const auto code_size{CalculateProgramSize(program_code)}; | 301 | const auto code_size{CalculateProgramSize(program_code)}; |
| 270 | const auto code_size_b{CalculateProgramSize(program_code_b)}; | 302 | const auto code_size_b{CalculateProgramSize(program_code_b)}; |
| 271 | auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; | 303 | auto result{ |
| 304 | CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||
| 272 | if (result.first.empty()) { | 305 | if (result.first.empty()) { |
| 273 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | 306 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |
| 274 | return {}; | 307 | return {}; |
| 275 | } | 308 | } |
| 276 | 309 | ||
| 277 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 310 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 278 | params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), | 311 | params.unique_identifier, GetProgramType(program_type), |
| 279 | static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), | 312 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), |
| 280 | std::move(program_code_b))); | 313 | std::move(program_code), std::move(program_code_b))); |
| 281 | 314 | ||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 315 | return std::shared_ptr<CachedShader>( |
| 316 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 283 | } | 317 | } |
| 284 | 318 | ||
| 285 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | 319 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, |
| 286 | Maxwell::ShaderProgram program_type, | 320 | Maxwell::ShaderProgram program_type, |
| 287 | GLShader::ProgramResult result) { | 321 | GLShader::ProgramResult result) { |
| 288 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 322 | return std::shared_ptr<CachedShader>( |
| 323 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 324 | } | ||
| 325 | |||
| 326 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | ||
| 327 | auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; | ||
| 328 | |||
| 329 | const auto code_size{CalculateProgramSize(code)}; | ||
| 330 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | ||
| 331 | static_cast<u32>(code_size / sizeof(u64)), 0, | ||
| 332 | std::move(code), {})); | ||
| 333 | |||
| 334 | return std::shared_ptr<CachedShader>( | ||
| 335 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 336 | } | ||
| 337 | |||
| 338 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | ||
| 339 | GLShader::ProgramResult result) { | ||
| 340 | return std::shared_ptr<CachedShader>( | ||
| 341 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 289 | } | 342 | } |
| 290 | 343 | ||
| 291 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 344 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 292 | GLuint handle{}; | 345 | GLuint handle{}; |
| 293 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 346 | if (program_type == ProgramType::Geometry) { |
| 294 | handle = GetGeometryShader(variant); | 347 | handle = GetGeometryShader(variant); |
| 295 | } else { | 348 | } else { |
| 296 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); | 349 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| @@ -308,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
| 308 | handle = program->handle; | 361 | handle = program->handle; |
| 309 | } | 362 | } |
| 310 | 363 | ||
| 311 | auto base_bindings{variant.base_bindings}; | 364 | auto base_bindings = variant.base_bindings; |
| 312 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | 365 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); |
| 366 | if (program_type != ProgramType::Compute) { | ||
| 367 | base_bindings.cbuf += STAGE_RESERVED_UBOS; | ||
| 368 | } | ||
| 313 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 369 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); |
| 314 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 370 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); |
| 315 | 371 | ||
| @@ -572,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 572 | } | 628 | } |
| 573 | 629 | ||
| 574 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 630 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 575 | if (!system.GPU().Maxwell3D().dirty_flags.shaders) { | 631 | if (!system.GPU().Maxwell3D().dirty.shaders) { |
| 576 | return last_shaders[static_cast<std::size_t>(program)]; | 632 | return last_shaders[static_cast<std::size_t>(program)]; |
| 577 | } | 633 | } |
| 578 | 634 | ||
| @@ -589,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 589 | // No shader found - create a new one | 645 | // No shader found - create a new one |
| 590 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 646 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |
| 591 | ProgramCode program_code_b; | 647 | ProgramCode program_code_b; |
| 592 | if (program == Maxwell::ShaderProgram::VertexA) { | 648 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; |
| 649 | if (is_program_a) { | ||
| 593 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 650 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 594 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 651 | program_code_b = GetShaderCode(memory_manager, program_addr_b, |
| 595 | memory_manager.GetPointer(program_addr_b)); | 652 | memory_manager.GetPointer(program_addr_b)); |
| 596 | } | 653 | } |
| 597 | 654 | ||
| 598 | const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 655 | const auto unique_identifier = |
| 656 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | ||
| 599 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 657 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; |
| 600 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | 658 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, |
| 601 | host_ptr, unique_identifier}; | 659 | host_ptr, unique_identifier}; |
| @@ -612,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 612 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 670 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 613 | } | 671 | } |
| 614 | 672 | ||
| 673 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 674 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 675 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 676 | auto kernel = TryGet(host_ptr); | ||
| 677 | if (kernel) { | ||
| 678 | return kernel; | ||
| 679 | } | ||
| 680 | |||
| 681 | // No kernel found - create a new one | ||
| 682 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||
| 683 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||
| 684 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||
| 685 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||
| 686 | host_ptr, unique_identifier}; | ||
| 687 | |||
| 688 | const auto found = precompiled_shaders.find(unique_identifier); | ||
| 689 | if (found == precompiled_shaders.end()) { | ||
| 690 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||
| 691 | } else { | ||
| 692 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | ||
| 693 | } | ||
| 694 | |||
| 695 | Register(kernel); | ||
| 696 | return kernel; | ||
| 697 | } | ||
| 698 | |||
| 615 | } // namespace OpenGL | 699 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index bbb53cdf4..a3106a0ff 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -61,6 +61,11 @@ public: | |||
| 61 | Maxwell::ShaderProgram program_type, | 61 | Maxwell::ShaderProgram program_type, |
| 62 | GLShader::ProgramResult result); | 62 | GLShader::ProgramResult result); |
| 63 | 63 | ||
| 64 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | ||
| 65 | |||
| 66 | static Shader CreateKernelFromCache(const ShaderParameters& params, | ||
| 67 | GLShader::ProgramResult result); | ||
| 68 | |||
| 64 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 65 | return cpu_addr; | 70 | return cpu_addr; |
| 66 | } | 71 | } |
| @@ -78,7 +83,7 @@ public: | |||
| 78 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | 83 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 79 | 84 | ||
| 80 | private: | 85 | private: |
| 81 | explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 86 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 82 | GLShader::ProgramResult result); | 87 | GLShader::ProgramResult result); |
| 83 | 88 | ||
| 84 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 89 | // Geometry programs. These are needed because GLSL needs an input topology but it's not |
| @@ -104,7 +109,7 @@ private: | |||
| 104 | u8* host_ptr{}; | 109 | u8* host_ptr{}; |
| 105 | VAddr cpu_addr{}; | 110 | VAddr cpu_addr{}; |
| 106 | u64 unique_identifier{}; | 111 | u64 unique_identifier{}; |
| 107 | Maxwell::ShaderProgram program_type{}; | 112 | ProgramType program_type{}; |
| 108 | ShaderDiskCacheOpenGL& disk_cache; | 113 | ShaderDiskCacheOpenGL& disk_cache; |
| 109 | const PrecompiledPrograms& precompiled_programs; | 114 | const PrecompiledPrograms& precompiled_programs; |
| 110 | 115 | ||
| @@ -132,6 +137,9 @@ public: | |||
| 132 | /// Gets the current specified shader stage program | 137 | /// Gets the current specified shader stage program |
| 133 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 138 | Shader GetStageProgram(Maxwell::ShaderProgram program); |
| 134 | 139 | ||
| 140 | /// Gets a compute kernel in the passed address | ||
| 141 | Shader GetComputeKernel(GPUVAddr code_addr); | ||
| 142 | |||
| 135 | protected: | 143 | protected: |
| 136 | // We do not have to flush this cache as things in it are never modified by us. | 144 | // We do not have to flush this cache as things in it are never modified by us. |
| 137 | void FlushObjectInner(const Shader& object) override {} | 145 | void FlushObjectInner(const Shader& object) override {} |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 119073776..ffe26b241 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -37,7 +37,6 @@ using namespace std::string_literals; | |||
| 37 | using namespace VideoCommon::Shader; | 37 | using namespace VideoCommon::Shader; |
| 38 | 38 | ||
| 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 40 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||
| 41 | using Operation = const OperationNode&; | 40 | using Operation = const OperationNode&; |
| 42 | 41 | ||
| 43 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| @@ -162,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 162 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 163 | } | 162 | } |
| 164 | 163 | ||
| 164 | constexpr bool IsVertexShader(ProgramType stage) { | ||
| 165 | return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | ||
| 166 | } | ||
| 167 | |||
| 165 | class GLSLDecompiler final { | 168 | class GLSLDecompiler final { |
| 166 | public: | 169 | public: |
| 167 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | 170 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 168 | std::string suffix) | 171 | std::string suffix) |
| 169 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 172 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 170 | 173 | ||
| @@ -248,25 +251,21 @@ public: | |||
| 248 | } | 251 | } |
| 249 | entries.clip_distances = ir.GetClipDistances(); | 252 | entries.clip_distances = ir.GetClipDistances(); |
| 250 | entries.shader_viewport_layer_array = | 253 | entries.shader_viewport_layer_array = |
| 251 | stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); | 254 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); |
| 252 | entries.shader_length = ir.GetLength(); | 255 | entries.shader_length = ir.GetLength(); |
| 253 | return entries; | 256 | return entries; |
| 254 | } | 257 | } |
| 255 | 258 | ||
| 256 | private: | 259 | private: |
| 257 | using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); | ||
| 258 | using OperationDecompilersArray = | ||
| 259 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 260 | |||
| 261 | void DeclareVertex() { | 260 | void DeclareVertex() { |
| 262 | if (stage != ShaderStage::Vertex) | 261 | if (!IsVertexShader(stage)) |
| 263 | return; | 262 | return; |
| 264 | 263 | ||
| 265 | DeclareVertexRedeclarations(); | 264 | DeclareVertexRedeclarations(); |
| 266 | } | 265 | } |
| 267 | 266 | ||
| 268 | void DeclareGeometry() { | 267 | void DeclareGeometry() { |
| 269 | if (stage != ShaderStage::Geometry) { | 268 | if (stage != ProgramType::Geometry) { |
| 270 | return; | 269 | return; |
| 271 | } | 270 | } |
| 272 | 271 | ||
| @@ -297,14 +296,14 @@ private: | |||
| 297 | break; | 296 | break; |
| 298 | } | 297 | } |
| 299 | } | 298 | } |
| 300 | if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { | 299 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { |
| 301 | if (ir.UsesLayer()) { | 300 | if (ir.UsesLayer()) { |
| 302 | code.AddLine("int gl_Layer;"); | 301 | code.AddLine("int gl_Layer;"); |
| 303 | } | 302 | } |
| 304 | if (ir.UsesViewportIndex()) { | 303 | if (ir.UsesViewportIndex()) { |
| 305 | code.AddLine("int gl_ViewportIndex;"); | 304 | code.AddLine("int gl_ViewportIndex;"); |
| 306 | } | 305 | } |
| 307 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && | 306 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && |
| 308 | !device.HasVertexViewportLayer()) { | 307 | !device.HasVertexViewportLayer()) { |
| 309 | LOG_ERROR( | 308 | LOG_ERROR( |
| 310 | Render_OpenGL, | 309 | Render_OpenGL, |
| @@ -341,11 +340,16 @@ private: | |||
| 341 | } | 340 | } |
| 342 | 341 | ||
| 343 | void DeclareLocalMemory() { | 342 | void DeclareLocalMemory() { |
| 344 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { | 343 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at |
| 345 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 344 | // specialization time. |
| 346 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | 345 | const u64 local_memory_size = |
| 347 | code.AddNewLine(); | 346 | stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); |
| 347 | if (local_memory_size == 0) { | ||
| 348 | return; | ||
| 348 | } | 349 | } |
| 350 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 351 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | ||
| 352 | code.AddNewLine(); | ||
| 349 | } | 353 | } |
| 350 | 354 | ||
| 351 | void DeclareInternalFlags() { | 355 | void DeclareInternalFlags() { |
| @@ -399,12 +403,12 @@ private: | |||
| 399 | const u32 location{GetGenericAttributeIndex(index)}; | 403 | const u32 location{GetGenericAttributeIndex(index)}; |
| 400 | 404 | ||
| 401 | std::string name{GetInputAttribute(index)}; | 405 | std::string name{GetInputAttribute(index)}; |
| 402 | if (stage == ShaderStage::Geometry) { | 406 | if (stage == ProgramType::Geometry) { |
| 403 | name = "gs_" + name + "[]"; | 407 | name = "gs_" + name + "[]"; |
| 404 | } | 408 | } |
| 405 | 409 | ||
| 406 | std::string suffix; | 410 | std::string suffix; |
| 407 | if (stage == ShaderStage::Fragment) { | 411 | if (stage == ProgramType::Fragment) { |
| 408 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 412 | const auto input_mode{header.ps.GetAttributeUse(location)}; |
| 409 | if (skip_unused && input_mode == AttributeUse::Unused) { | 413 | if (skip_unused && input_mode == AttributeUse::Unused) { |
| 410 | return; | 414 | return; |
| @@ -416,7 +420,7 @@ private: | |||
| 416 | } | 420 | } |
| 417 | 421 | ||
| 418 | void DeclareOutputAttributes() { | 422 | void DeclareOutputAttributes() { |
| 419 | if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { | 423 | if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { |
| 420 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | 424 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { |
| 421 | DeclareOutputAttribute(ToGenericAttribute(i)); | 425 | DeclareOutputAttribute(ToGenericAttribute(i)); |
| 422 | } | 426 | } |
| @@ -538,7 +542,7 @@ private: | |||
| 538 | constexpr u32 element_stride{4}; | 542 | constexpr u32 element_stride{4}; |
| 539 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 543 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 540 | 544 | ||
| 541 | const bool declared{stage != ShaderStage::Fragment || | 545 | const bool declared{stage != ProgramType::Fragment || |
| 542 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; | 546 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; |
| 543 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; | 547 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; |
| 544 | code.AddLine("case 0x{:x}: return {};", address, value); | 548 | code.AddLine("case 0x{:x}: return {};", address, value); |
| @@ -642,7 +646,7 @@ private: | |||
| 642 | } | 646 | } |
| 643 | 647 | ||
| 644 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | 648 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { |
| 645 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, | 649 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, |
| 646 | "Physical attributes in geometry shaders are not implemented"); | 650 | "Physical attributes in geometry shaders are not implemented"); |
| 647 | if (abuf->IsPhysicalBuffer()) { | 651 | if (abuf->IsPhysicalBuffer()) { |
| 648 | return fmt::format("readPhysicalAttribute(ftou({}))", | 652 | return fmt::format("readPhysicalAttribute(ftou({}))", |
| @@ -697,6 +701,9 @@ private: | |||
| 697 | } | 701 | } |
| 698 | 702 | ||
| 699 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 703 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| 704 | if (stage == ProgramType::Compute) { | ||
| 705 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 706 | } | ||
| 700 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 707 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 701 | } | 708 | } |
| 702 | 709 | ||
| @@ -726,7 +733,7 @@ private: | |||
| 726 | 733 | ||
| 727 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | 734 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { |
| 728 | const auto GeometryPass = [&](std::string_view name) { | 735 | const auto GeometryPass = [&](std::string_view name) { |
| 729 | if (stage == ShaderStage::Geometry && buffer) { | 736 | if (stage == ProgramType::Geometry && buffer) { |
| 730 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 737 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 731 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 738 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 732 | // this happens and what's its intent. | 739 | // this happens and what's its intent. |
| @@ -738,10 +745,10 @@ private: | |||
| 738 | switch (attribute) { | 745 | switch (attribute) { |
| 739 | case Attribute::Index::Position: | 746 | case Attribute::Index::Position: |
| 740 | switch (stage) { | 747 | switch (stage) { |
| 741 | case ShaderStage::Geometry: | 748 | case ProgramType::Geometry: |
| 742 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), | 749 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), |
| 743 | GetSwizzle(element)); | 750 | GetSwizzle(element)); |
| 744 | case ShaderStage::Fragment: | 751 | case ProgramType::Fragment: |
| 745 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); | 752 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); |
| 746 | default: | 753 | default: |
| 747 | UNREACHABLE(); | 754 | UNREACHABLE(); |
| @@ -762,7 +769,7 @@ private: | |||
| 762 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 769 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 763 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 770 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 764 | // shader. | 771 | // shader. |
| 765 | ASSERT(stage == ShaderStage::Vertex); | 772 | ASSERT(IsVertexShader(stage)); |
| 766 | switch (element) { | 773 | switch (element) { |
| 767 | case 2: | 774 | case 2: |
| 768 | // Config pack's first value is instance_id. | 775 | // Config pack's first value is instance_id. |
| @@ -774,7 +781,7 @@ private: | |||
| 774 | return "0"; | 781 | return "0"; |
| 775 | case Attribute::Index::FrontFacing: | 782 | case Attribute::Index::FrontFacing: |
| 776 | // TODO(Subv): Find out what the values are for the other elements. | 783 | // TODO(Subv): Find out what the values are for the other elements. |
| 777 | ASSERT(stage == ShaderStage::Fragment); | 784 | ASSERT(stage == ProgramType::Fragment); |
| 778 | switch (element) { | 785 | switch (element) { |
| 779 | case 3: | 786 | case 3: |
| 780 | return "itof(gl_FrontFacing ? -1 : 0)"; | 787 | return "itof(gl_FrontFacing ? -1 : 0)"; |
| @@ -796,7 +803,7 @@ private: | |||
| 796 | return value; | 803 | return value; |
| 797 | } | 804 | } |
| 798 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders | 805 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders |
| 799 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | 806 | const std::string precise = stage != ProgramType::Fragment ? "precise " : ""; |
| 800 | 807 | ||
| 801 | const std::string temporary = code.GenerateTemporary(); | 808 | const std::string temporary = code.GenerateTemporary(); |
| 802 | code.AddLine("{}float {} = {};", precise, temporary, value); | 809 | code.AddLine("{}float {} = {};", precise, temporary, value); |
| @@ -831,12 +838,12 @@ private: | |||
| 831 | UNIMPLEMENTED(); | 838 | UNIMPLEMENTED(); |
| 832 | return {}; | 839 | return {}; |
| 833 | case 1: | 840 | case 1: |
| 834 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | 841 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 835 | return {}; | 842 | return {}; |
| 836 | } | 843 | } |
| 837 | return std::make_pair("gl_Layer", true); | 844 | return std::make_pair("gl_Layer", true); |
| 838 | case 2: | 845 | case 2: |
| 839 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | 846 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 840 | return {}; | 847 | return {}; |
| 841 | } | 848 | } |
| 842 | return std::make_pair("gl_ViewportIndex", true); | 849 | return std::make_pair("gl_ViewportIndex", true); |
| @@ -1073,6 +1080,9 @@ private: | |||
| 1073 | target = result->first; | 1080 | target = result->first; |
| 1074 | is_integer = result->second; | 1081 | is_integer = result->second; |
| 1075 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1082 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 1083 | if (stage == ProgramType::Compute) { | ||
| 1084 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 1085 | } | ||
| 1076 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 1086 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1077 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1087 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1078 | const std::string real = Visit(gmem->GetRealAddress()); | 1088 | const std::string real = Visit(gmem->GetRealAddress()); |
| @@ -1400,14 +1410,10 @@ private: | |||
| 1400 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); | 1410 | return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); |
| 1401 | } | 1411 | } |
| 1402 | 1412 | ||
| 1403 | std::string LogicalAll2(Operation operation) { | 1413 | std::string LogicalAnd2(Operation operation) { |
| 1404 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | 1414 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); |
| 1405 | } | 1415 | } |
| 1406 | 1416 | ||
| 1407 | std::string LogicalAny2(Operation operation) { | ||
| 1408 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | ||
| 1409 | } | ||
| 1410 | |||
| 1411 | template <bool with_nan> | 1417 | template <bool with_nan> |
| 1412 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { | 1418 | std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { |
| 1413 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, | 1419 | const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, |
| @@ -1630,7 +1636,7 @@ private: | |||
| 1630 | } | 1636 | } |
| 1631 | 1637 | ||
| 1632 | std::string Exit(Operation operation) { | 1638 | std::string Exit(Operation operation) { |
| 1633 | if (stage != ShaderStage::Fragment) { | 1639 | if (stage != ProgramType::Fragment) { |
| 1634 | code.AddLine("return;"); | 1640 | code.AddLine("return;"); |
| 1635 | return {}; | 1641 | return {}; |
| 1636 | } | 1642 | } |
| @@ -1681,7 +1687,7 @@ private: | |||
| 1681 | } | 1687 | } |
| 1682 | 1688 | ||
| 1683 | std::string EmitVertex(Operation operation) { | 1689 | std::string EmitVertex(Operation operation) { |
| 1684 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1690 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1685 | "EmitVertex is expected to be used in a geometry shader."); | 1691 | "EmitVertex is expected to be used in a geometry shader."); |
| 1686 | 1692 | ||
| 1687 | // If a geometry shader is attached, it will always flip (it's the last stage before | 1693 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| @@ -1692,7 +1698,7 @@ private: | |||
| 1692 | } | 1698 | } |
| 1693 | 1699 | ||
| 1694 | std::string EndPrimitive(Operation operation) { | 1700 | std::string EndPrimitive(Operation operation) { |
| 1695 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1701 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1696 | "EndPrimitive is expected to be used in a geometry shader."); | 1702 | "EndPrimitive is expected to be used in a geometry shader."); |
| 1697 | 1703 | ||
| 1698 | code.AddLine("EndPrimitive();"); | 1704 | code.AddLine("EndPrimitive();"); |
| @@ -1714,7 +1720,7 @@ private: | |||
| 1714 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; | 1720 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; |
| 1715 | } | 1721 | } |
| 1716 | 1722 | ||
| 1717 | static constexpr OperationDecompilersArray operation_decompilers = { | 1723 | static constexpr std::array operation_decompilers = { |
| 1718 | &GLSLDecompiler::Assign, | 1724 | &GLSLDecompiler::Assign, |
| 1719 | 1725 | ||
| 1720 | &GLSLDecompiler::Select, | 1726 | &GLSLDecompiler::Select, |
| @@ -1798,8 +1804,7 @@ private: | |||
| 1798 | &GLSLDecompiler::LogicalXor, | 1804 | &GLSLDecompiler::LogicalXor, |
| 1799 | &GLSLDecompiler::LogicalNegate, | 1805 | &GLSLDecompiler::LogicalNegate, |
| 1800 | &GLSLDecompiler::LogicalPick2, | 1806 | &GLSLDecompiler::LogicalPick2, |
| 1801 | &GLSLDecompiler::LogicalAll2, | 1807 | &GLSLDecompiler::LogicalAnd2, |
| 1802 | &GLSLDecompiler::LogicalAny2, | ||
| 1803 | 1808 | ||
| 1804 | &GLSLDecompiler::LogicalLessThan<Type::Float>, | 1809 | &GLSLDecompiler::LogicalLessThan<Type::Float>, |
| 1805 | &GLSLDecompiler::LogicalEqual<Type::Float>, | 1810 | &GLSLDecompiler::LogicalEqual<Type::Float>, |
| @@ -1863,6 +1868,7 @@ private: | |||
| 1863 | &GLSLDecompiler::WorkGroupId<1>, | 1868 | &GLSLDecompiler::WorkGroupId<1>, |
| 1864 | &GLSLDecompiler::WorkGroupId<2>, | 1869 | &GLSLDecompiler::WorkGroupId<2>, |
| 1865 | }; | 1870 | }; |
| 1871 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1866 | 1872 | ||
| 1867 | std::string GetRegister(u32 index) const { | 1873 | std::string GetRegister(u32 index) const { |
| 1868 | return GetDeclarationWithSuffix(index, "gpr"); | 1874 | return GetDeclarationWithSuffix(index, "gpr"); |
| @@ -1927,7 +1933,7 @@ private: | |||
| 1927 | } | 1933 | } |
| 1928 | 1934 | ||
| 1929 | u32 GetNumPhysicalInputAttributes() const { | 1935 | u32 GetNumPhysicalInputAttributes() const { |
| 1930 | return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 1936 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 1931 | } | 1937 | } |
| 1932 | 1938 | ||
| 1933 | u32 GetNumPhysicalAttributes() const { | 1939 | u32 GetNumPhysicalAttributes() const { |
| @@ -1940,7 +1946,7 @@ private: | |||
| 1940 | 1946 | ||
| 1941 | const Device& device; | 1947 | const Device& device; |
| 1942 | const ShaderIR& ir; | 1948 | const ShaderIR& ir; |
| 1943 | const ShaderStage stage; | 1949 | const ProgramType stage; |
| 1944 | const std::string suffix; | 1950 | const std::string suffix; |
| 1945 | const Header header; | 1951 | const Header header; |
| 1946 | 1952 | ||
| @@ -1971,7 +1977,7 @@ std::string GetCommonDeclarations() { | |||
| 1971 | MAX_CONSTBUFFER_ELEMENTS); | 1977 | MAX_CONSTBUFFER_ELEMENTS); |
| 1972 | } | 1978 | } |
| 1973 | 1979 | ||
| 1974 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, | 1980 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 1975 | const std::string& suffix) { | 1981 | const std::string& suffix) { |
| 1976 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 1982 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 1977 | decompiler.Decompile(); | 1983 | decompiler.Decompile(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02586736d..2ea02f5bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,14 +12,26 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/shader/shader_ir.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL { | ||
| 16 | class Device; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 20 | class ShaderIR; | 16 | class ShaderIR; |
| 21 | } | 17 | } |
| 22 | 18 | ||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | |||
| 23 | enum class ProgramType : u32 { | ||
| 24 | VertexA = 0, | ||
| 25 | VertexB = 1, | ||
| 26 | TessellationControl = 2, | ||
| 27 | TessellationEval = 3, | ||
| 28 | Geometry = 4, | ||
| 29 | Fragment = 5, | ||
| 30 | Compute = 6 | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL | ||
| 34 | |||
| 23 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 24 | 36 | ||
| 25 | struct ShaderEntries; | 37 | struct ShaderEntries; |
| @@ -85,6 +97,6 @@ struct ShaderEntries { | |||
| 85 | std::string GetCommonDeclarations(); | 97 | std::string GetCommonDeclarations(); |
| 86 | 98 | ||
| 87 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 99 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 88 | Maxwell::ShaderStage stage, const std::string& suffix); | 100 | ProgramType stage, const std::string& suffix); |
| 89 | 101 | ||
| 90 | } // namespace OpenGL::GLShader | 102 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 7893d1e26..969fe9ced 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 51 | 51 | ||
| 52 | } // namespace | 52 | } // namespace |
| 53 | 53 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | 55 | u32 program_code_size, u32 program_code_size_b, |
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 56 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 57 | : unique_identifier{unique_identifier}, program_type{program_type}, |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4f296dda6..cc8bbd61e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | 22 | ||
| 24 | namespace Core { | 23 | namespace Core { |
| @@ -34,14 +33,11 @@ namespace OpenGL { | |||
| 34 | struct ShaderDiskCacheUsage; | 33 | struct ShaderDiskCacheUsage; |
| 35 | struct ShaderDiskCacheDump; | 34 | struct ShaderDiskCacheDump; |
| 36 | 35 | ||
| 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 38 | |||
| 39 | using ProgramCode = std::vector<u64>; | 36 | using ProgramCode = std::vector<u64>; |
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 41 | |||
| 42 | using TextureBufferUsage = std::bitset<64>; | 38 | using TextureBufferUsage = std::bitset<64>; |
| 43 | 39 | ||
| 44 | /// Allocated bindings used by an OpenGL shader program. | 40 | /// Allocated bindings used by an OpenGL shader program |
| 45 | struct BaseBindings { | 41 | struct BaseBindings { |
| 46 | u32 cbuf{}; | 42 | u32 cbuf{}; |
| 47 | u32 gmem{}; | 43 | u32 gmem{}; |
| @@ -126,7 +122,7 @@ namespace OpenGL { | |||
| 126 | /// Describes a shader how it's used by the guest GPU | 122 | /// Describes a shader how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 123 | class ShaderDiskCacheRaw { |
| 128 | public: | 124 | public: |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 130 | u32 program_code_size, u32 program_code_size_b, | 126 | u32 program_code_size, u32 program_code_size_b, |
| 131 | ProgramCode program_code, ProgramCode program_code_b); | 127 | ProgramCode program_code, ProgramCode program_code_b); |
| 132 | ShaderDiskCacheRaw(); | 128 | ShaderDiskCacheRaw(); |
| @@ -141,30 +137,13 @@ public: | |||
| 141 | } | 137 | } |
| 142 | 138 | ||
| 143 | bool HasProgramA() const { | 139 | bool HasProgramA() const { |
| 144 | return program_type == Maxwell::ShaderProgram::VertexA; | 140 | return program_type == ProgramType::VertexA; |
| 145 | } | 141 | } |
| 146 | 142 | ||
| 147 | Maxwell::ShaderProgram GetProgramType() const { | 143 | ProgramType GetProgramType() const { |
| 148 | return program_type; | 144 | return program_type; |
| 149 | } | 145 | } |
| 150 | 146 | ||
| 151 | Maxwell::ShaderStage GetProgramStage() const { | ||
| 152 | switch (program_type) { | ||
| 153 | case Maxwell::ShaderProgram::VertexA: | ||
| 154 | case Maxwell::ShaderProgram::VertexB: | ||
| 155 | return Maxwell::ShaderStage::Vertex; | ||
| 156 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 157 | return Maxwell::ShaderStage::TesselationControl; | ||
| 158 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 159 | return Maxwell::ShaderStage::TesselationEval; | ||
| 160 | case Maxwell::ShaderProgram::Geometry: | ||
| 161 | return Maxwell::ShaderStage::Geometry; | ||
| 162 | case Maxwell::ShaderProgram::Fragment: | ||
| 163 | return Maxwell::ShaderStage::Fragment; | ||
| 164 | } | ||
| 165 | UNREACHABLE(); | ||
| 166 | } | ||
| 167 | |||
| 168 | const ProgramCode& GetProgramCode() const { | 147 | const ProgramCode& GetProgramCode() const { |
| 169 | return program_code; | 148 | return program_code; |
| 170 | } | 149 | } |
| @@ -175,7 +154,7 @@ public: | |||
| 175 | 154 | ||
| 176 | private: | 155 | private: |
| 177 | u64 unique_identifier{}; | 156 | u64 unique_identifier{}; |
| 178 | Maxwell::ShaderProgram program_type{}; | 157 | ProgramType program_type{}; |
| 179 | u32 program_code_size{}; | 158 | u32 program_code_size{}; |
| 180 | u32 program_code_size_b{}; | 159 | u32 program_code_size_b{}; |
| 181 | 160 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index f9ee8429e..3a8d9e1da 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D; | |||
| 14 | using VideoCommon::Shader::ProgramCode; | 14 | using VideoCommon::Shader::ProgramCode; |
| 15 | using VideoCommon::Shader::ShaderIR; | 15 | using VideoCommon::Shader::ShaderIR; |
| 16 | 16 | ||
| 17 | static constexpr u32 PROGRAM_OFFSET{10}; | 17 | static constexpr u32 PROGRAM_OFFSET = 10; |
| 18 | static constexpr u32 COMPUTE_OFFSET = 0; | ||
| 18 | 19 | ||
| 19 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| @@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 29 | }; | 30 | }; |
| 30 | 31 | ||
| 31 | )"; | 32 | )"; |
| 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 33 | ProgramResult program = | ||
| 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||
| 35 | 33 | ||
| 34 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 35 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | ||
| 36 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||
| 36 | out += program.first; | 37 | out += program.first; |
| 37 | 38 | ||
| 38 | if (setup.IsDualProgram()) { | 39 | if (setup.IsDualProgram()) { |
| 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); | 40 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); |
| 40 | ProgramResult program_b = | 41 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |
| 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||
| 42 | |||
| 43 | out += program_b.first; | 42 | out += program_b.first; |
| 44 | } | 43 | } |
| 45 | 44 | ||
| @@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | )"; | 81 | )"; |
| 82 | |||
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 84 | ProgramResult program = | 84 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |
| 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | ||
| 86 | out += program.first; | 85 | out += program.first; |
| 87 | 86 | ||
| 88 | out += R"( | 87 | out += R"( |
| @@ -116,9 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 116 | 115 | ||
| 117 | )"; | 116 | )"; |
| 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 117 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 119 | ProgramResult program = | 118 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |
| 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 121 | |||
| 122 | out += program.first; | 119 | out += program.first; |
| 123 | 120 | ||
| 124 | out += R"( | 121 | out += R"( |
| @@ -130,4 +127,22 @@ void main() { | |||
| 130 | return {std::move(out), std::move(program.second)}; | 127 | return {std::move(out), std::move(program.second)}; |
| 131 | } | 128 | } |
| 132 | 129 | ||
| 130 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | ||
| 131 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 132 | |||
| 133 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 134 | out += GetCommonDeclarations(); | ||
| 135 | |||
| 136 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | ||
| 137 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 138 | out += program.first; | ||
| 139 | |||
| 140 | out += R"( | ||
| 141 | void main() { | ||
| 142 | execute_compute(); | ||
| 143 | } | ||
| 144 | )"; | ||
| 145 | return {std::move(out), std::move(program.second)}; | ||
| 146 | } | ||
| 147 | |||
| 133 | } // namespace OpenGL::GLShader | 148 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 7cbc590f8..3833e88ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se | |||
| 54 | /// Generates the GLSL fragment shader program source code for the given FS program | 54 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); |
| 56 | 56 | ||
| 57 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 58 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | ||
| 59 | |||
| 57 | } // namespace OpenGL::GLShader | 60 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5f3fe067e..9e74eda0d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,21 +10,25 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadShader(const char* source, GLenum type) { | 13 | namespace { |
| 14 | const char* debug_type; | 14 | const char* GetStageDebugName(GLenum type) { |
| 15 | switch (type) { | 15 | switch (type) { |
| 16 | case GL_VERTEX_SHADER: | 16 | case GL_VERTEX_SHADER: |
| 17 | debug_type = "vertex"; | 17 | return "vertex"; |
| 18 | break; | ||
| 19 | case GL_GEOMETRY_SHADER: | 18 | case GL_GEOMETRY_SHADER: |
| 20 | debug_type = "geometry"; | 19 | return "geometry"; |
| 21 | break; | ||
| 22 | case GL_FRAGMENT_SHADER: | 20 | case GL_FRAGMENT_SHADER: |
| 23 | debug_type = "fragment"; | 21 | return "fragment"; |
| 24 | break; | 22 | case GL_COMPUTE_SHADER: |
| 25 | default: | 23 | return "compute"; |
| 26 | UNREACHABLE(); | ||
| 27 | } | 24 | } |
| 25 | UNIMPLEMENTED(); | ||
| 26 | return "unknown"; | ||
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | GLuint LoadShader(const char* source, GLenum type) { | ||
| 31 | const char* debug_type = GetStageDebugName(type); | ||
| 28 | const GLuint shader_id = glCreateShader(type); | 32 | const GLuint shader_id = glCreateShader(type); |
| 29 | glShaderSource(shader_id, 1, &source, nullptr); | 33 | glShaderSource(shader_id, 1, &source, nullptr); |
| 30 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | 34 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 0eae98afe..f4777d0b0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -165,6 +165,25 @@ OpenGLState::OpenGLState() { | |||
| 165 | alpha_test.ref = 0.0f; | 165 | alpha_test.ref = 0.0f; |
| 166 | } | 166 | } |
| 167 | 167 | ||
| 168 | void OpenGLState::SetDefaultViewports() { | ||
| 169 | for (auto& item : viewports) { | ||
| 170 | item.x = 0; | ||
| 171 | item.y = 0; | ||
| 172 | item.width = 0; | ||
| 173 | item.height = 0; | ||
| 174 | item.depth_range_near = 0.0f; | ||
| 175 | item.depth_range_far = 1.0f; | ||
| 176 | item.scissor.enabled = false; | ||
| 177 | item.scissor.x = 0; | ||
| 178 | item.scissor.y = 0; | ||
| 179 | item.scissor.width = 0; | ||
| 180 | item.scissor.height = 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | depth_clamp.far_plane = false; | ||
| 184 | depth_clamp.near_plane = false; | ||
| 185 | } | ||
| 186 | |||
| 168 | void OpenGLState::ApplyDefaultState() { | 187 | void OpenGLState::ApplyDefaultState() { |
| 169 | glEnable(GL_BLEND); | 188 | glEnable(GL_BLEND); |
| 170 | glDisable(GL_FRAMEBUFFER_SRGB); | 189 | glDisable(GL_FRAMEBUFFER_SRGB); |
| @@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const { | |||
| 526 | } | 545 | } |
| 527 | } | 546 | } |
| 528 | 547 | ||
| 529 | void OpenGLState::Apply() const { | 548 | void OpenGLState::Apply() { |
| 530 | MICROPROFILE_SCOPE(OpenGL_State); | 549 | MICROPROFILE_SCOPE(OpenGL_State); |
| 531 | ApplyFramebufferState(); | 550 | ApplyFramebufferState(); |
| 532 | ApplyVertexArrayState(); | 551 | ApplyVertexArrayState(); |
| @@ -536,19 +555,31 @@ void OpenGLState::Apply() const { | |||
| 536 | ApplyPointSize(); | 555 | ApplyPointSize(); |
| 537 | ApplyFragmentColorClamp(); | 556 | ApplyFragmentColorClamp(); |
| 538 | ApplyMultisample(); | 557 | ApplyMultisample(); |
| 558 | if (dirty.color_mask) { | ||
| 559 | ApplyColorMask(); | ||
| 560 | dirty.color_mask = false; | ||
| 561 | } | ||
| 539 | ApplyDepthClamp(); | 562 | ApplyDepthClamp(); |
| 540 | ApplyColorMask(); | ||
| 541 | ApplyViewport(); | 563 | ApplyViewport(); |
| 542 | ApplyStencilTest(); | 564 | if (dirty.stencil_state) { |
| 565 | ApplyStencilTest(); | ||
| 566 | dirty.stencil_state = false; | ||
| 567 | } | ||
| 543 | ApplySRgb(); | 568 | ApplySRgb(); |
| 544 | ApplyCulling(); | 569 | ApplyCulling(); |
| 545 | ApplyDepth(); | 570 | ApplyDepth(); |
| 546 | ApplyPrimitiveRestart(); | 571 | ApplyPrimitiveRestart(); |
| 547 | ApplyBlending(); | 572 | if (dirty.blend_state) { |
| 573 | ApplyBlending(); | ||
| 574 | dirty.blend_state = false; | ||
| 575 | } | ||
| 548 | ApplyLogicOp(); | 576 | ApplyLogicOp(); |
| 549 | ApplyTextures(); | 577 | ApplyTextures(); |
| 550 | ApplySamplers(); | 578 | ApplySamplers(); |
| 551 | ApplyPolygonOffset(); | 579 | if (dirty.polygon_offset) { |
| 580 | ApplyPolygonOffset(); | ||
| 581 | dirty.polygon_offset = false; | ||
| 582 | } | ||
| 552 | ApplyAlphaTest(); | 583 | ApplyAlphaTest(); |
| 553 | } | 584 | } |
| 554 | 585 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b0140495d..fdf9a8a12 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -195,8 +195,9 @@ public: | |||
| 195 | s_rgb_used = false; | 195 | s_rgb_used = false; |
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | void SetDefaultViewports(); | ||
| 198 | /// Apply this state as the current OpenGL state | 199 | /// Apply this state as the current OpenGL state |
| 199 | void Apply() const; | 200 | void Apply(); |
| 200 | 201 | ||
| 201 | void ApplyFramebufferState() const; | 202 | void ApplyFramebufferState() const; |
| 202 | void ApplyVertexArrayState() const; | 203 | void ApplyVertexArrayState() const; |
| @@ -237,11 +238,41 @@ public: | |||
| 237 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test | 238 | /// Viewport does not affects glClearBuffer so emulate viewport using scissor test |
| 238 | void EmulateViewportWithScissor(); | 239 | void EmulateViewportWithScissor(); |
| 239 | 240 | ||
| 241 | void MarkDirtyBlendState() { | ||
| 242 | dirty.blend_state = true; | ||
| 243 | } | ||
| 244 | |||
| 245 | void MarkDirtyStencilState() { | ||
| 246 | dirty.stencil_state = true; | ||
| 247 | } | ||
| 248 | |||
| 249 | void MarkDirtyPolygonOffset() { | ||
| 250 | dirty.polygon_offset = true; | ||
| 251 | } | ||
| 252 | |||
| 253 | void MarkDirtyColorMask() { | ||
| 254 | dirty.color_mask = true; | ||
| 255 | } | ||
| 256 | |||
| 257 | void AllDirty() { | ||
| 258 | dirty.blend_state = true; | ||
| 259 | dirty.stencil_state = true; | ||
| 260 | dirty.polygon_offset = true; | ||
| 261 | dirty.color_mask = true; | ||
| 262 | } | ||
| 263 | |||
| 240 | private: | 264 | private: |
| 241 | static OpenGLState cur_state; | 265 | static OpenGLState cur_state; |
| 242 | 266 | ||
| 243 | // Workaround for sRGB problems caused by QT not supporting srgb output | 267 | // Workaround for sRGB problems caused by QT not supporting srgb output |
| 244 | static bool s_rgb_used; | 268 | static bool s_rgb_used; |
| 269 | struct { | ||
| 270 | bool blend_state; | ||
| 271 | bool stencil_state; | ||
| 272 | bool viewport_state; | ||
| 273 | bool polygon_offset; | ||
| 274 | bool color_mask; | ||
| 275 | } dirty{}; | ||
| 245 | }; | 276 | }; |
| 246 | 277 | ||
| 247 | } // namespace OpenGL | 278 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6ecb02c45..408332f90 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -484,11 +484,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 484 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 484 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 485 | 485 | ||
| 486 | OpenGLState prev_state{OpenGLState::GetCurState()}; | 486 | OpenGLState prev_state{OpenGLState::GetCurState()}; |
| 487 | SCOPE_EXIT({ prev_state.Apply(); }); | 487 | SCOPE_EXIT({ |
| 488 | prev_state.AllDirty(); | ||
| 489 | prev_state.Apply(); | ||
| 490 | }); | ||
| 488 | 491 | ||
| 489 | OpenGLState state; | 492 | OpenGLState state; |
| 490 | state.draw.read_framebuffer = src_framebuffer.handle; | 493 | state.draw.read_framebuffer = src_framebuffer.handle; |
| 491 | state.draw.draw_framebuffer = dst_framebuffer.handle; | 494 | state.draw.draw_framebuffer = dst_framebuffer.handle; |
| 495 | state.AllDirty(); | ||
| 492 | state.Apply(); | 496 | state.Apply(); |
| 493 | 497 | ||
| 494 | u32 buffers{}; | 498 | u32 buffers{}; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 9ecdddb0d..a05cef3b9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 108 | 108 | ||
| 109 | // Maintain the rasterizer's state as a priority | 109 | // Maintain the rasterizer's state as a priority |
| 110 | OpenGLState prev_state = OpenGLState::GetCurState(); | 110 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 111 | state.AllDirty(); | ||
| 111 | state.Apply(); | 112 | state.Apply(); |
| 112 | 113 | ||
| 113 | if (framebuffer) { | 114 | if (framebuffer) { |
| @@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers( | |||
| 140 | system.GetPerfStats().BeginSystemFrame(); | 141 | system.GetPerfStats().BeginSystemFrame(); |
| 141 | 142 | ||
| 142 | // Restore the rasterizer state | 143 | // Restore the rasterizer state |
| 144 | prev_state.AllDirty(); | ||
| 143 | prev_state.Apply(); | 145 | prev_state.Apply(); |
| 144 | } | 146 | } |
| 145 | 147 | ||
| @@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 206 | // Link shaders and get variable locations | 208 | // Link shaders and get variable locations |
| 207 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | 209 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); |
| 208 | state.draw.shader_program = shader.handle; | 210 | state.draw.shader_program = shader.handle; |
| 211 | state.AllDirty(); | ||
| 209 | state.Apply(); | 212 | state.Apply(); |
| 210 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 213 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |
| 211 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); | 214 | uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); |
| @@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 338 | // Workaround brigthness problems in SMO by enabling sRGB in the final output | 341 | // Workaround brigthness problems in SMO by enabling sRGB in the final output |
| 339 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 | 342 | // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 |
| 340 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); | 343 | state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); |
| 344 | state.AllDirty(); | ||
| 341 | state.Apply(); | 345 | state.Apply(); |
| 342 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); | 346 | glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); |
| 343 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 347 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 344 | // Restore default state | 348 | // Restore default state |
| 345 | state.framebuffer_srgb.enabled = false; | 349 | state.framebuffer_srgb.enabled = false; |
| 346 | state.texture_units[0].texture = 0; | 350 | state.texture_units[0].texture = 0; |
| 351 | state.AllDirty(); | ||
| 347 | state.Apply(); | 352 | state.Apply(); |
| 348 | // Clear sRGB state for the next frame | 353 | // Clear sRGB state for the next frame |
| 349 | OpenGLState::ClearsRGBUsed(); | 354 | OpenGLState::ClearsRGBUsed(); |
| @@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 388 | GLuint old_read_fb = state.draw.read_framebuffer; | 393 | GLuint old_read_fb = state.draw.read_framebuffer; |
| 389 | GLuint old_draw_fb = state.draw.draw_framebuffer; | 394 | GLuint old_draw_fb = state.draw.draw_framebuffer; |
| 390 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; | 395 | state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; |
| 396 | state.AllDirty(); | ||
| 391 | state.Apply(); | 397 | state.Apply(); |
| 392 | 398 | ||
| 393 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 399 | Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| @@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() { | |||
| 407 | screenshot_framebuffer.Release(); | 413 | screenshot_framebuffer.Release(); |
| 408 | state.draw.read_framebuffer = old_read_fb; | 414 | state.draw.read_framebuffer = old_read_fb; |
| 409 | state.draw.draw_framebuffer = old_draw_fb; | 415 | state.draw.draw_framebuffer = old_draw_fb; |
| 416 | state.AllDirty(); | ||
| 410 | state.Apply(); | 417 | state.Apply(); |
| 411 | glDeleteRenderbuffers(1, &renderbuffer); | 418 | glDeleteRenderbuffers(1, &renderbuffer); |
| 412 | 419 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 9b2d8e987..d267712c9 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -205,10 +205,6 @@ public: | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | private: | 207 | private: |
| 208 | using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation); | ||
| 209 | using OperationDecompilersArray = | ||
| 210 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||
| 211 | |||
| 212 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | 208 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); |
| 213 | 209 | ||
| 214 | void AllocateBindings() { | 210 | void AllocateBindings() { |
| @@ -804,12 +800,7 @@ private: | |||
| 804 | return {}; | 800 | return {}; |
| 805 | } | 801 | } |
| 806 | 802 | ||
| 807 | Id LogicalAll2(Operation operation) { | 803 | Id LogicalAnd2(Operation operation) { |
| 808 | UNIMPLEMENTED(); | ||
| 809 | return {}; | ||
| 810 | } | ||
| 811 | |||
| 812 | Id LogicalAny2(Operation operation) { | ||
| 813 | UNIMPLEMENTED(); | 804 | UNIMPLEMENTED(); |
| 814 | return {}; | 805 | return {}; |
| 815 | } | 806 | } |
| @@ -1206,7 +1197,7 @@ private: | |||
| 1206 | return {}; | 1197 | return {}; |
| 1207 | } | 1198 | } |
| 1208 | 1199 | ||
| 1209 | static constexpr OperationDecompilersArray operation_decompilers = { | 1200 | static constexpr std::array operation_decompilers = { |
| 1210 | &SPIRVDecompiler::Assign, | 1201 | &SPIRVDecompiler::Assign, |
| 1211 | 1202 | ||
| 1212 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, | 1203 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, |
| @@ -1291,8 +1282,7 @@ private: | |||
| 1291 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, | 1282 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, |
| 1292 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, | 1283 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, |
| 1293 | &SPIRVDecompiler::LogicalPick2, | 1284 | &SPIRVDecompiler::LogicalPick2, |
| 1294 | &SPIRVDecompiler::LogicalAll2, | 1285 | &SPIRVDecompiler::LogicalAnd2, |
| 1295 | &SPIRVDecompiler::LogicalAny2, | ||
| 1296 | 1286 | ||
| 1297 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, | 1287 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, |
| 1298 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, | 1288 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, |
| @@ -1357,6 +1347,7 @@ private: | |||
| 1357 | &SPIRVDecompiler::WorkGroupId<1>, | 1347 | &SPIRVDecompiler::WorkGroupId<1>, |
| 1358 | &SPIRVDecompiler::WorkGroupId<2>, | 1348 | &SPIRVDecompiler::WorkGroupId<2>, |
| 1359 | }; | 1349 | }; |
| 1350 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 1360 | 1351 | ||
| 1361 | const VKDevice& device; | 1352 | const VKDevice& device; |
| 1362 | const ShaderIR& ir; | 1353 | const ShaderIR& ir; |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 29c8895c5..afffd157f 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -46,12 +46,12 @@ void ShaderIR::Decode() { | |||
| 46 | coverage_end = shader_info.end; | 46 | coverage_end = shader_info.end; |
| 47 | if (shader_info.decompilable) { | 47 | if (shader_info.decompilable) { |
| 48 | disable_flow_stack = true; | 48 | disable_flow_stack = true; |
| 49 | const auto insert_block = ([this](NodeBlock& nodes, u32 label) { | 49 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { |
| 50 | if (label == exit_branch) { | 50 | if (label == exit_branch) { |
| 51 | return; | 51 | return; |
| 52 | } | 52 | } |
| 53 | basic_blocks.insert({label, nodes}); | 53 | basic_blocks.insert({label, nodes}); |
| 54 | }); | 54 | }; |
| 55 | const auto& blocks = shader_info.blocks; | 55 | const auto& blocks = shader_info.blocks; |
| 56 | NodeBlock current_block; | 56 | NodeBlock current_block; |
| 57 | u32 current_label = exit_branch; | 57 | u32 current_label = exit_branch; |
| @@ -103,7 +103,7 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | |||
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
| 106 | const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { | 106 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { |
| 107 | Node result = n; | 107 | Node result = n; |
| 108 | if (cond.cc != ConditionCode::T) { | 108 | if (cond.cc != ConditionCode::T) { |
| 109 | result = Conditional(GetConditionCode(cond.cc), {result}); | 109 | result = Conditional(GetConditionCode(cond.cc), {result}); |
| @@ -117,7 +117,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | |||
| 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); | 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); |
| 118 | } | 118 | } |
| 119 | return result; | 119 | return result; |
| 120 | }); | 120 | }; |
| 121 | if (block.branch.address < 0) { | 121 | if (block.branch.address < 0) { |
| 122 | if (block.branch.kills) { | 122 | if (block.branch.kills) { |
| 123 | Node n = Operation(OperationCode::Discard); | 123 | Node n = Operation(OperationCode::Discard); |
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 4587dbd00..a82a6a15c 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | 23 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |
| 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | 24 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
| 25 | 25 | ||
| 26 | Node op_b = [&]() { | 26 | Tegra::Shader::PredCondition cond{}; |
| 27 | switch (opcode->get().GetId()) { | 27 | bool h_and{}; |
| 28 | case OpCode::Id::HSETP2_R: | 28 | Node op_b{}; |
| 29 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | 29 | switch (opcode->get().GetId()) { |
| 30 | instr.hsetp2.negate_b); | 30 | case OpCode::Id::HSETP2_C: |
| 31 | default: | 31 | cond = instr.hsetp2.cbuf_and_imm.cond; |
| 32 | UNREACHABLE(); | 32 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 33 | return Immediate(0); | 33 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
| 34 | } | 34 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); |
| 35 | }(); | 35 | break; |
| 36 | op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); | 36 | case OpCode::Id::HSETP2_IMM: |
| 37 | 37 | cond = instr.hsetp2.cbuf_and_imm.cond; | |
| 38 | // We can't use the constant predicate as destination. | 38 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 39 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 39 | op_b = UnpackHalfImmediate(instr, true); |
| 40 | 40 | break; | |
| 41 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | 41 | case OpCode::Id::HSETP2_R: |
| 42 | cond = instr.hsetp2.reg.cond; | ||
| 43 | h_and = instr.hsetp2.reg.h_and; | ||
| 44 | op_b = | ||
| 45 | UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, | ||
| 46 | instr.hsetp2.reg.negate_b), | ||
| 47 | instr.hsetp2.reg.type_b); | ||
| 48 | break; | ||
| 49 | default: | ||
| 50 | UNREACHABLE(); | ||
| 51 | op_b = Immediate(0); | ||
| 52 | } | ||
| 42 | 53 | ||
| 43 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | 54 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |
| 44 | const OperationCode pair_combiner = | 55 | const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); |
| 45 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 46 | |||
| 47 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); | ||
| 48 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 49 | 56 | ||
| 50 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 57 | const auto Write = [&](u64 dest, Node src) { |
| 51 | const Node value = Operation(combiner, first_pred, second_pred); | 58 | SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39)); |
| 52 | SetPredicate(bb, instr.hsetp2.pred3, value); | 59 | }; |
| 53 | 60 | ||
| 54 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 61 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); |
| 55 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | 62 | const u64 first = instr.hsetp2.pred0; |
| 56 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | 63 | const u64 second = instr.hsetp2.pred3; |
| 57 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | 64 | if (h_and) { |
| 65 | const Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 66 | Write(first, joined); | ||
| 67 | Write(second, Operation(OperationCode::LogicalNegate, joined)); | ||
| 68 | } else { | ||
| 69 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); | ||
| 70 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); | ||
| 58 | } | 71 | } |
| 59 | 72 | ||
| 60 | return pc; | 73 | return pc; |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ab207a33b..ed108bea8 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 95 | const Node op_b = | 95 | const Node op_b = |
| 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | 96 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); |
| 97 | 97 | ||
| 98 | SetTemporal(bb, 0, op_a); | 98 | SetTemporary(bb, 0, op_a); |
| 99 | SetTemporal(bb, 1, op_b); | 99 | SetTemporary(bb, 1, op_b); |
| 100 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 100 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | 101 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); |
| 102 | break; | 102 | break; |
| 103 | } | 103 | } |
| 104 | default: | 104 | default: |
| @@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 136 | } | 136 | } |
| 137 | }(); | 137 | }(); |
| 138 | for (u32 i = 0; i < count; ++i) | 138 | for (u32 i = 0; i < count; ++i) |
| 139 | SetTemporal(bb, i, GetLmem(i * 4)); | 139 | SetTemporary(bb, i, GetLmem(i * 4)); |
| 140 | for (u32 i = 0; i < count; ++i) | 140 | for (u32 i = 0; i < count; ++i) |
| 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 141 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 142 | break; | 142 | break; |
| 143 | } | 143 | } |
| 144 | default: | 144 | default: |
| @@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 172 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 173 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 174 | 174 | ||
| 175 | SetTemporal(bb, i, gmem); | 175 | SetTemporary(bb, i, gmem); |
| 176 | } | 176 | } |
| 177 | for (u32 i = 0; i < count; ++i) { | 177 | for (u32 i = 0; i < count; ++i) { |
| 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 178 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 179 | } | 179 | } |
| 180 | break; | 180 | break; |
| 181 | } | 181 | } |
| @@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 253 | TrackAndGetGlobalMemory(bb, instr, true); | 253 | TrackAndGetGlobalMemory(bb, instr, true); |
| 254 | 254 | ||
| 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | 255 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} |
| 256 | SetTemporal(bb, 0, real_address_base); | 256 | SetTemporary(bb, 0, real_address_base); |
| 257 | 257 | ||
| 258 | const u32 count = GetUniformTypeElementsCount(type); | 258 | const u32 count = GetUniformTypeElementsCount(type); |
| 259 | for (u32 i = 0; i < count; ++i) { | 259 | for (u32 i = 0; i < count; ++i) { |
| 260 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | 260 | SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); |
| 261 | } | 261 | } |
| 262 | for (u32 i = 0; i < count; ++i) { | 262 | for (u32 i = 0; i < count; ++i) { |
| 263 | const Node it_offset = Immediate(i * 4); | 263 | const Node it_offset = Immediate(i * 4); |
| @@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 265 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 266 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 267 | 267 | ||
| 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | 268 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); |
| 269 | } | 269 | } |
| 270 | break; | 270 | break; |
| 271 | } | 271 | } |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index e1ee5c190..0b934a069 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 181 | const Node value = | 181 | const Node value = |
| 182 | Operation(OperationCode::TextureQueryDimensions, meta, | 182 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | 183 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |
| 184 | SetTemporal(bb, indexer++, value); | 184 | SetTemporary(bb, indexer++, value); |
| 185 | } | 185 | } |
| 186 | for (u32 i = 0; i < indexer; ++i) { | 186 | for (u32 i = 0; i < indexer; ++i) { |
| 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 187 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 188 | } | 188 | } |
| 189 | break; | 189 | break; |
| 190 | } | 190 | } |
| @@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 238 | auto params = coords; | 238 | auto params = coords; |
| 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | 239 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 240 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 241 | SetTemporal(bb, indexer++, value); | 241 | SetTemporary(bb, indexer++, value); |
| 242 | } | 242 | } |
| 243 | for (u32 i = 0; i < indexer; ++i) { | 243 | for (u32 i = 0; i < indexer; ++i) { |
| 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 244 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 245 | } | 245 | } |
| 246 | break; | 246 | break; |
| 247 | } | 247 | } |
| @@ -336,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const | |||
| 336 | // Skip disabled components | 336 | // Skip disabled components |
| 337 | continue; | 337 | continue; |
| 338 | } | 338 | } |
| 339 | SetTemporal(bb, dest_elem++, components[elem]); | 339 | SetTemporary(bb, dest_elem++, components[elem]); |
| 340 | } | 340 | } |
| 341 | // After writing values in temporals, move them to the real registers | 341 | // After writing values in temporals, move them to the real registers |
| 342 | for (u32 i = 0; i < dest_elem; ++i) { | 342 | for (u32 i = 0; i < dest_elem; ++i) { |
| 343 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 343 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 344 | } | 344 | } |
| 345 | } | 345 | } |
| 346 | 346 | ||
| @@ -353,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | |||
| 353 | for (u32 component = 0; component < 4; ++component) { | 353 | for (u32 component = 0; component < 4; ++component) { |
| 354 | if (!instr.texs.IsComponentEnabled(component)) | 354 | if (!instr.texs.IsComponentEnabled(component)) |
| 355 | continue; | 355 | continue; |
| 356 | SetTemporal(bb, dest_elem++, components[component]); | 356 | SetTemporary(bb, dest_elem++, components[component]); |
| 357 | } | 357 | } |
| 358 | 358 | ||
| 359 | for (u32 i = 0; i < dest_elem; ++i) { | 359 | for (u32 i = 0; i < dest_elem; ++i) { |
| 360 | if (i < 2) { | 360 | if (i < 2) { |
| 361 | // Write the first two swizzle components to gpr0 and gpr0+1 | 361 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 362 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | 362 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); |
| 363 | } else { | 363 | } else { |
| 364 | ASSERT(instr.texs.HasTwoDestinations()); | 364 | ASSERT(instr.texs.HasTwoDestinations()); |
| 365 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 365 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 366 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | 366 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); |
| 367 | } | 367 | } |
| 368 | } | 368 | } |
| 369 | } | 369 | } |
| @@ -391,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 391 | return; | 391 | return; |
| 392 | } | 392 | } |
| 393 | 393 | ||
| 394 | SetTemporal(bb, 0, first_value); | 394 | SetTemporary(bb, 0, first_value); |
| 395 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | 395 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |
| 396 | 396 | ||
| 397 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | 397 | SetRegister(bb, instr.gpr0, GetTemporary(0)); |
| 398 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | 398 | SetRegister(bb, instr.gpr28, GetTemporary(1)); |
| 399 | } | 399 | } |
| 400 | 400 | ||
| 401 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 401 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 93dee77d1..206961909 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 73 | if (is_psl) { | 73 | if (is_psl) { |
| 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | 74 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); |
| 75 | } | 75 | } |
| 76 | SetTemporal(bb, 0, product); | 76 | SetTemporary(bb, 0, product); |
| 77 | product = GetTemporal(0); | 77 | product = GetTemporary(0); |
| 78 | 78 | ||
| 79 | const Node original_c = op_c; | 79 | const Node original_c = op_c; |
| 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | 80 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error |
| @@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | |||
| 98 | } | 98 | } |
| 99 | }(); | 99 | }(); |
| 100 | 100 | ||
| 101 | SetTemporal(bb, 1, op_c); | 101 | SetTemporary(bb, 1, op_c); |
| 102 | op_c = GetTemporal(1); | 102 | op_c = GetTemporary(1); |
| 103 | 103 | ||
| 104 | // TODO(Rodrigo): Use an appropiate sign for this operation | 104 | // TODO(Rodrigo): Use an appropiate sign for this operation |
| 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); | 105 | Node sum = Operation(OperationCode::IAdd, product, op_c); |
| 106 | SetTemporal(bb, 2, sum); | 106 | SetTemporary(bb, 2, sum); |
| 107 | sum = GetTemporal(2); | 107 | sum = GetTemporary(2); |
| 108 | if (is_merge) { | 108 | if (is_merge) { |
| 109 | const Node a = BitfieldExtract(sum, 0, 16); | 109 | const Node a = BitfieldExtract(sum, 0, 16); |
| 110 | const Node b = | 110 | const Node b = |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 7427ed896..715184d67 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -101,8 +101,7 @@ enum class OperationCode { | |||
| 101 | LogicalXor, /// (bool a, bool b) -> bool | 101 | LogicalXor, /// (bool a, bool b) -> bool |
| 102 | LogicalNegate, /// (bool a) -> bool | 102 | LogicalNegate, /// (bool a) -> bool |
| 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool | 103 | LogicalPick2, /// (bool2 pair, uint index) -> bool |
| 104 | LogicalAll2, /// (bool2 a) -> bool | 104 | LogicalAnd2, /// (bool2 a) -> bool |
| 105 | LogicalAny2, /// (bool2 a) -> bool | ||
| 106 | 105 | ||
| 107 | LogicalFLessThan, /// (float a, float b) -> bool | 106 | LogicalFLessThan, /// (float a, float b) -> bool |
| 108 | LogicalFEqual, /// (float a, float b) -> bool | 107 | LogicalFEqual, /// (float a, float b) -> bool |
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 6fccbbba3..b3dcd291c 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 13 | 13 | ||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | 14 | Node Conditional(Node condition, std::vector<Node> code) { |
| 15 | return MakeNode<ConditionalNode>(condition, std::move(code)); | 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | Node Comment(std::string text) { | 18 | Node Comment(std::string text) { |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 78bd1cf1e..5e91fe129 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -61,7 +61,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | |||
| 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); |
| 62 | entry->second.MarkAsUsedIndirect(); | 62 | entry->second.MarkAsUsedIndirect(); |
| 63 | 63 | ||
| 64 | const Node final_offset = [&]() { | 64 | Node final_offset = [&] { |
| 65 | // Attempt to inline constant buffer without a variable offset. This is done to allow | 65 | // Attempt to inline constant buffer without a variable offset. This is done to allow |
| 66 | // tracking LDC calls. | 66 | // tracking LDC calls. |
| 67 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | 67 | if (const auto gpr = std::get_if<GprNode>(&*node)) { |
| @@ -69,9 +69,9 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | |||
| 69 | return Immediate(offset); | 69 | return Immediate(offset); |
| 70 | } | 70 | } |
| 71 | } | 71 | } |
| 72 | return Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | 72 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); |
| 73 | }(); | 73 | }(); |
| 74 | return MakeNode<CbufNode>(index, final_offset); | 74 | return MakeNode<CbufNode>(index, std::move(final_offset)); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | 77 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |
| @@ -89,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) { | |||
| 89 | 89 | ||
| 90 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | 90 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 91 | used_input_attributes.emplace(index); | 91 | used_input_attributes.emplace(index); |
| 92 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 92 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | 95 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { |
| @@ -122,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff | |||
| 122 | } | 122 | } |
| 123 | used_output_attributes.insert(index); | 123 | used_output_attributes.insert(index); |
| 124 | 124 | ||
| 125 | return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); | 125 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | 128 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { |
| @@ -134,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | |||
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | Node ShaderIR::GetLocalMemory(Node address) { | 136 | Node ShaderIR::GetLocalMemory(Node address) { |
| 137 | return MakeNode<LmemNode>(address); | 137 | return MakeNode<LmemNode>(std::move(address)); |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | Node ShaderIR::GetTemporal(u32 id) { | 140 | Node ShaderIR::GetTemporary(u32 id) { |
| 141 | return GetRegister(Register::ZeroIndex + 1 + id); | 141 | return GetRegister(Register::ZeroIndex + 1 + id); |
| 142 | } | 142 | } |
| 143 | 143 | ||
| 144 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | 144 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
| 145 | if (absolute) { | 145 | if (absolute) { |
| 146 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | 146 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); |
| 147 | } | 147 | } |
| 148 | if (negate) { | 148 | if (negate) { |
| 149 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | 149 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); |
| 150 | } | 150 | } |
| 151 | return value; | 151 | return value; |
| 152 | } | 152 | } |
| @@ -155,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | |||
| 155 | if (!saturate) { | 155 | if (!saturate) { |
| 156 | return value; | 156 | return value; |
| 157 | } | 157 | } |
| 158 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 158 | |
| 159 | const Node positive_one = Immediate(1.0f); | 159 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 160 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | 160 | Node positive_one = Immediate(1.0f); |
| 161 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 162 | std::move(positive_one)); | ||
| 161 | } | 163 | } |
| 162 | 164 | ||
| 163 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | 165 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { |
| 164 | switch (size) { | 166 | switch (size) { |
| 165 | case Register::Size::Byte: | 167 | case Register::Size::Byte: |
| 166 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 168 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 167 | Immediate(24)); | 169 | std::move(value), Immediate(24)); |
| 168 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 170 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 169 | Immediate(24)); | 171 | std::move(value), Immediate(24)); |
| 170 | return value; | 172 | return value; |
| 171 | case Register::Size::Short: | 173 | case Register::Size::Short: |
| 172 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | 174 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |
| 173 | Immediate(16)); | 175 | std::move(value), Immediate(16)); |
| 174 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | 176 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |
| 175 | Immediate(16)); | 177 | std::move(value), Immediate(16)); |
| 176 | case Register::Size::Word: | 178 | case Register::Size::Word: |
| 177 | // Default - do nothing | 179 | // Default - do nothing |
| 178 | return value; | 180 | return value; |
| @@ -188,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b | |||
| 188 | return value; | 190 | return value; |
| 189 | } | 191 | } |
| 190 | if (absolute) { | 192 | if (absolute) { |
| 191 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | 193 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); |
| 192 | } | 194 | } |
| 193 | if (negate) { | 195 | if (negate) { |
| 194 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | 196 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); |
| 195 | } | 197 | } |
| 196 | return value; | 198 | return value; |
| 197 | } | 199 | } |
| 198 | 200 | ||
| 199 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | 201 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |
| 200 | const Node value = Immediate(instr.half_imm.PackImmediates()); | 202 | Node value = Immediate(instr.half_imm.PackImmediates()); |
| 201 | if (!has_negation) { | 203 | if (!has_negation) { |
| 202 | return value; | 204 | return value; |
| 203 | } | 205 | } |
| 204 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 205 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 206 | 206 | ||
| 207 | return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); | 207 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
| 208 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 209 | |||
| 210 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 211 | std::move(second_negate)); | ||
| 208 | } | 212 | } |
| 209 | 213 | ||
| 210 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | 214 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { |
| 211 | return Operation(OperationCode::HUnpack, type, value); | 215 | return Operation(OperationCode::HUnpack, type, std::move(value)); |
| 212 | } | 216 | } |
| 213 | 217 | ||
| 214 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | 218 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
| @@ -216,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 216 | case Tegra::Shader::HalfMerge::H0_H1: | 220 | case Tegra::Shader::HalfMerge::H0_H1: |
| 217 | return src; | 221 | return src; |
| 218 | case Tegra::Shader::HalfMerge::F32: | 222 | case Tegra::Shader::HalfMerge::F32: |
| 219 | return Operation(OperationCode::HMergeF32, src); | 223 | return Operation(OperationCode::HMergeF32, std::move(src)); |
| 220 | case Tegra::Shader::HalfMerge::Mrg_H0: | 224 | case Tegra::Shader::HalfMerge::Mrg_H0: |
| 221 | return Operation(OperationCode::HMergeH0, dest, src); | 225 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); |
| 222 | case Tegra::Shader::HalfMerge::Mrg_H1: | 226 | case Tegra::Shader::HalfMerge::Mrg_H1: |
| 223 | return Operation(OperationCode::HMergeH1, dest, src); | 227 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); |
| 224 | } | 228 | } |
| 225 | UNREACHABLE(); | 229 | UNREACHABLE(); |
| 226 | return src; | 230 | return src; |
| @@ -228,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 228 | 232 | ||
| 229 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | 233 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
| 230 | if (absolute) { | 234 | if (absolute) { |
| 231 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); | 235 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); |
| 232 | } | 236 | } |
| 233 | if (negate) { | 237 | if (negate) { |
| 234 | value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), | 238 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), |
| 235 | GetPredicate(true)); | 239 | GetPredicate(true)); |
| 236 | } | 240 | } |
| 237 | return value; | 241 | return value; |
| @@ -241,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | |||
| 241 | if (!saturate) { | 245 | if (!saturate) { |
| 242 | return value; | 246 | return value; |
| 243 | } | 247 | } |
| 244 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | 248 | |
| 245 | const Node positive_one = Immediate(1.0f); | 249 | Node positive_zero = Immediate(std::copysignf(0, 1)); |
| 246 | return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); | 250 | Node positive_one = Immediate(1.0f); |
| 251 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 252 | std::move(positive_one)); | ||
| 247 | } | 253 | } |
| 248 | 254 | ||
| 249 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 255 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| @@ -271,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 271 | condition == PredCondition::LessEqualWithNan || | 277 | condition == PredCondition::LessEqualWithNan || |
| 272 | condition == PredCondition::GreaterThanWithNan || | 278 | condition == PredCondition::GreaterThanWithNan || |
| 273 | condition == PredCondition::GreaterEqualWithNan) { | 279 | condition == PredCondition::GreaterEqualWithNan) { |
| 274 | |||
| 275 | predicate = Operation(OperationCode::LogicalOr, predicate, | 280 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| 276 | Operation(OperationCode::LogicalFIsNan, op_a)); | 281 | Operation(OperationCode::LogicalFIsNan, op_a)); |
| 277 | predicate = Operation(OperationCode::LogicalOr, predicate, | 282 | predicate = Operation(OperationCode::LogicalOr, predicate, |
| @@ -300,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 300 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 305 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 301 | "Unknown predicate comparison operation"); | 306 | "Unknown predicate comparison operation"); |
| 302 | 307 | ||
| 303 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | 308 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), |
| 309 | std::move(op_b)); | ||
| 304 | 310 | ||
| 305 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | 311 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
| 306 | condition == PredCondition::NotEqualWithNan || | 312 | condition == PredCondition::NotEqualWithNan || |
| @@ -330,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition | |||
| 330 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 336 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 331 | "Unknown predicate comparison operation"); | 337 | "Unknown predicate comparison operation"); |
| 332 | 338 | ||
| 333 | const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | 339 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); |
| 334 | |||
| 335 | return predicate; | ||
| 336 | } | 340 | } |
| 337 | 341 | ||
| 338 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 342 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| @@ -358,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | |||
| 358 | } | 362 | } |
| 359 | 363 | ||
| 360 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | 364 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { |
| 361 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | 365 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); |
| 362 | } | 366 | } |
| 363 | 367 | ||
| 364 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | 368 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { |
| 365 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | 369 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); |
| 366 | } | 370 | } |
| 367 | 371 | ||
| 368 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | 372 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { |
| 369 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | 373 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); |
| 370 | } | 374 | } |
| 371 | 375 | ||
| 372 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | 376 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { |
| 373 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | 377 | bb.push_back( |
| 378 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 374 | } | 379 | } |
| 375 | 380 | ||
| 376 | void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { | 381 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { |
| 377 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | 382 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); |
| 378 | } | 383 | } |
| 379 | 384 | ||
| 380 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | 385 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { |
| 381 | if (!sets_cc) { | 386 | if (!sets_cc) { |
| 382 | return; | 387 | return; |
| 383 | } | 388 | } |
| 384 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | 389 | Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); |
| 385 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 390 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 386 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 391 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 387 | } | 392 | } |
| 388 | 393 | ||
| @@ -390,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_ | |||
| 390 | if (!sets_cc) { | 395 | if (!sets_cc) { |
| 391 | return; | 396 | return; |
| 392 | } | 397 | } |
| 393 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | 398 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); |
| 394 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | 399 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |
| 395 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | 400 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
| 396 | } | 401 | } |
| 397 | 402 | ||
| 398 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | 403 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
| 399 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | 404 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), |
| 400 | Immediate(bits)); | 405 | Immediate(offset), Immediate(bits)); |
| 401 | } | 406 | } |
| 402 | 407 | ||
| 403 | } // namespace VideoCommon::Shader | 408 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 126c78136..59a083d90 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -5,13 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | ||
| 9 | #include <map> | 8 | #include <map> |
| 10 | #include <optional> | 9 | #include <optional> |
| 11 | #include <set> | 10 | #include <set> |
| 12 | #include <string> | ||
| 13 | #include <tuple> | 11 | #include <tuple> |
| 14 | #include <variant> | ||
| 15 | #include <vector> | 12 | #include <vector> |
| 16 | 13 | ||
| 17 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -210,8 +207,8 @@ private: | |||
| 210 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | 207 | Node GetInternalFlag(InternalFlag flag, bool negated = false); |
| 211 | /// Generates a node representing a local memory address | 208 | /// Generates a node representing a local memory address |
| 212 | Node GetLocalMemory(Node address); | 209 | Node GetLocalMemory(Node address); |
| 213 | /// Generates a temporal, internally it uses a post-RZ register | 210 | /// Generates a temporary, internally it uses a post-RZ register |
| 214 | Node GetTemporal(u32 id); | 211 | Node GetTemporary(u32 id); |
| 215 | 212 | ||
| 216 | /// Sets a register. src value must be a number-evaluated node. | 213 | /// Sets a register. src value must be a number-evaluated node. |
| 217 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | 214 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); |
| @@ -221,8 +218,8 @@ private: | |||
| 221 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | 218 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); |
| 222 | /// Sets a local memory address. address and value must be a number-evaluated node | 219 | /// Sets a local memory address. address and value must be a number-evaluated node |
| 223 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | 220 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); |
| 224 | /// Sets a temporal. Internally it uses a post-RZ register | 221 | /// Sets a temporary. Internally it uses a post-RZ register |
| 225 | void SetTemporal(NodeBlock& bb, u32 id, Node value); | 222 | void SetTemporary(NodeBlock& bb, u32 id, Node value); |
| 226 | 223 | ||
| 227 | /// Sets internal flags from a float | 224 | /// Sets internal flags from a float |
| 228 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | 225 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index dc132a4a3..a53e02253 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -15,18 +15,20 @@ namespace { | |||
| 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 16 | OperationCode operation_code) { | 16 | OperationCode operation_code) { |
| 17 | for (; cursor >= 0; --cursor) { | 17 | for (; cursor >= 0; --cursor) { |
| 18 | const Node node = code.at(cursor); | 18 | Node node = code.at(cursor); |
| 19 | |||
| 19 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 20 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 20 | if (operation->GetCode() == operation_code) { | 21 | if (operation->GetCode() == operation_code) { |
| 21 | return {node, cursor}; | 22 | return {std::move(node), cursor}; |
| 22 | } | 23 | } |
| 23 | } | 24 | } |
| 25 | |||
| 24 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 26 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 25 | const auto& conditional_code = conditional->GetCode(); | 27 | const auto& conditional_code = conditional->GetCode(); |
| 26 | const auto [found, internal_cursor] = FindOperation( | 28 | auto [found, internal_cursor] = FindOperation( |
| 27 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | 29 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); |
| 28 | if (found) { | 30 | if (found) { |
| 29 | return {found, cursor}; | 31 | return {std::move(found), cursor}; |
| 30 | } | 32 | } |
| 31 | } | 33 | } |
| 32 | } | 34 | } |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7f9623c62..a3a3770a7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -116,10 +116,10 @@ public: | |||
| 116 | std::lock_guard lock{mutex}; | 116 | std::lock_guard lock{mutex}; |
| 117 | auto& maxwell3d = system.GPU().Maxwell3D(); | 117 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 118 | 118 | ||
| 119 | if (!maxwell3d.dirty_flags.zeta_buffer) { | 119 | if (!maxwell3d.dirty.depth_buffer) { |
| 120 | return depth_buffer.view; | 120 | return depth_buffer.view; |
| 121 | } | 121 | } |
| 122 | maxwell3d.dirty_flags.zeta_buffer = false; | 122 | maxwell3d.dirty.depth_buffer = false; |
| 123 | 123 | ||
| 124 | const auto& regs{maxwell3d.regs}; | 124 | const auto& regs{maxwell3d.regs}; |
| 125 | const auto gpu_addr{regs.zeta.Address()}; | 125 | const auto gpu_addr{regs.zeta.Address()}; |
| @@ -145,10 +145,10 @@ public: | |||
| 145 | std::lock_guard lock{mutex}; | 145 | std::lock_guard lock{mutex}; |
| 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 147 | auto& maxwell3d = system.GPU().Maxwell3D(); | 147 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 148 | if (!maxwell3d.dirty_flags.color_buffer[index]) { | 148 | if (!maxwell3d.dirty.render_target[index]) { |
| 149 | return render_targets[index].view; | 149 | return render_targets[index].view; |
| 150 | } | 150 | } |
| 151 | maxwell3d.dirty_flags.color_buffer.reset(index); | 151 | maxwell3d.dirty.render_target[index] = false; |
| 152 | 152 | ||
| 153 | const auto& regs{maxwell3d.regs}; | 153 | const auto& regs{maxwell3d.regs}; |
| 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
| @@ -274,10 +274,11 @@ protected: | |||
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | 274 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 275 | const u32 index = surface->GetRenderTarget(); | 275 | const u32 index = surface->GetRenderTarget(); |
| 276 | if (index == DEPTH_RT) { | 276 | if (index == DEPTH_RT) { |
| 277 | maxwell3d.dirty_flags.zeta_buffer = true; | 277 | maxwell3d.dirty.depth_buffer = true; |
| 278 | } else { | 278 | } else { |
| 279 | maxwell3d.dirty_flags.color_buffer.set(index, true); | 279 | maxwell3d.dirty.render_target[index] = true; |
| 280 | } | 280 | } |
| 281 | maxwell3d.dirty.render_settings = true; | ||
| 281 | } | 282 | } |
| 282 | 283 | ||
| 283 | void Register(TSurface surface) { | 284 | void Register(TSurface surface) { |