diff options
Diffstat (limited to 'src')
19 files changed, 369 insertions, 44 deletions
diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h index bec065543..59dc11c22 100644 --- a/src/core/hle/kernel/resource_limit.h +++ b/src/core/hle/kernel/resource_limit.h | |||
| @@ -14,7 +14,7 @@ namespace Kernel { | |||
| 14 | 14 | ||
| 15 | class KernelCore; | 15 | class KernelCore; |
| 16 | 16 | ||
| 17 | enum class ResourceType { | 17 | enum class ResourceType : u32 { |
| 18 | PhysicalMemory, | 18 | PhysicalMemory, |
| 19 | Threads, | 19 | Threads, |
| 20 | Events, | 20 | Events, |
| @@ -25,6 +25,10 @@ enum class ResourceType { | |||
| 25 | ResourceTypeCount | 25 | ResourceTypeCount |
| 26 | }; | 26 | }; |
| 27 | 27 | ||
| 28 | constexpr bool IsValidResourceType(ResourceType type) { | ||
| 29 | return type < ResourceType::ResourceTypeCount; | ||
| 30 | } | ||
| 31 | |||
| 28 | class ResourceLimit final : public Object { | 32 | class ResourceLimit final : public Object { |
| 29 | public: | 33 | public: |
| 30 | /** | 34 | /** |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index f287f7c97..1f19d5576 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -105,6 +105,38 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add | |||
| 105 | 105 | ||
| 106 | return RESULT_SUCCESS; | 106 | return RESULT_SUCCESS; |
| 107 | } | 107 | } |
| 108 | |||
| 109 | enum class ResourceLimitValueType { | ||
| 110 | CurrentValue, | ||
| 111 | LimitValue, | ||
| 112 | }; | ||
| 113 | |||
| 114 | ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type, | ||
| 115 | ResourceLimitValueType value_type) { | ||
| 116 | const auto type = static_cast<ResourceType>(resource_type); | ||
| 117 | if (!IsValidResourceType(type)) { | ||
| 118 | LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); | ||
| 119 | return ERR_INVALID_ENUM_VALUE; | ||
| 120 | } | ||
| 121 | |||
| 122 | const auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 123 | const auto* const current_process = kernel.CurrentProcess(); | ||
| 124 | ASSERT(current_process != nullptr); | ||
| 125 | |||
| 126 | const auto resource_limit_object = | ||
| 127 | current_process->GetHandleTable().Get<ResourceLimit>(resource_limit); | ||
| 128 | if (!resource_limit_object) { | ||
| 129 | LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", | ||
| 130 | resource_limit); | ||
| 131 | return ERR_INVALID_HANDLE; | ||
| 132 | } | ||
| 133 | |||
| 134 | if (value_type == ResourceLimitValueType::CurrentValue) { | ||
| 135 | return MakeResult(resource_limit_object->GetCurrentResourceValue(type)); | ||
| 136 | } | ||
| 137 | |||
| 138 | return MakeResult(resource_limit_object->GetMaxResourceValue(type)); | ||
| 139 | } | ||
| 108 | } // Anonymous namespace | 140 | } // Anonymous namespace |
| 109 | 141 | ||
| 110 | /// Set the process heap to a given Size. It can both extend and shrink the heap. | 142 | /// Set the process heap to a given Size. It can both extend and shrink the heap. |
| @@ -1346,6 +1378,87 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) { | |||
| 1346 | return RESULT_SUCCESS; | 1378 | return RESULT_SUCCESS; |
| 1347 | } | 1379 | } |
| 1348 | 1380 | ||
| 1381 | static ResultCode CreateResourceLimit(Handle* out_handle) { | ||
| 1382 | LOG_DEBUG(Kernel_SVC, "called"); | ||
| 1383 | |||
| 1384 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 1385 | auto resource_limit = ResourceLimit::Create(kernel); | ||
| 1386 | |||
| 1387 | auto* const current_process = kernel.CurrentProcess(); | ||
| 1388 | ASSERT(current_process != nullptr); | ||
| 1389 | |||
| 1390 | const auto handle = current_process->GetHandleTable().Create(std::move(resource_limit)); | ||
| 1391 | if (handle.Failed()) { | ||
| 1392 | return handle.Code(); | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | *out_handle = *handle; | ||
| 1396 | return RESULT_SUCCESS; | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit, | ||
| 1400 | u32 resource_type) { | ||
| 1401 | LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); | ||
| 1402 | |||
| 1403 | const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type, | ||
| 1404 | ResourceLimitValueType::LimitValue); | ||
| 1405 | if (limit_value.Failed()) { | ||
| 1406 | return limit_value.Code(); | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | *out_value = static_cast<u64>(*limit_value); | ||
| 1410 | return RESULT_SUCCESS; | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit, | ||
| 1414 | u32 resource_type) { | ||
| 1415 | LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); | ||
| 1416 | |||
| 1417 | const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type, | ||
| 1418 | ResourceLimitValueType::CurrentValue); | ||
| 1419 | if (current_value.Failed()) { | ||
| 1420 | return current_value.Code(); | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | *out_value = static_cast<u64>(*current_value); | ||
| 1424 | return RESULT_SUCCESS; | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) { | ||
| 1428 | LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit, | ||
| 1429 | resource_type, value); | ||
| 1430 | |||
| 1431 | const auto type = static_cast<ResourceType>(resource_type); | ||
| 1432 | if (!IsValidResourceType(type)) { | ||
| 1433 | LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); | ||
| 1434 | return ERR_INVALID_ENUM_VALUE; | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 1438 | auto* const current_process = kernel.CurrentProcess(); | ||
| 1439 | ASSERT(current_process != nullptr); | ||
| 1440 | |||
| 1441 | auto resource_limit_object = | ||
| 1442 | current_process->GetHandleTable().Get<ResourceLimit>(resource_limit); | ||
| 1443 | if (!resource_limit_object) { | ||
| 1444 | LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", | ||
| 1445 | resource_limit); | ||
| 1446 | return ERR_INVALID_HANDLE; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value)); | ||
| 1450 | if (set_result.IsError()) { | ||
| 1451 | LOG_ERROR( | ||
| 1452 | Kernel_SVC, | ||
| 1453 | "Attempted to lower resource limit ({}) for category '{}' below its current value ({})", | ||
| 1454 | resource_limit_object->GetMaxResourceValue(type), resource_type, | ||
| 1455 | resource_limit_object->GetCurrentResourceValue(type)); | ||
| 1456 | return set_result; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | return RESULT_SUCCESS; | ||
| 1460 | } | ||
| 1461 | |||
| 1349 | namespace { | 1462 | namespace { |
| 1350 | struct FunctionDef { | 1463 | struct FunctionDef { |
| 1351 | using Func = void(); | 1464 | using Func = void(); |
| @@ -1405,8 +1518,8 @@ static const FunctionDef SVC_Table[] = { | |||
| 1405 | {0x2D, nullptr, "UnmapPhysicalMemory"}, | 1518 | {0x2D, nullptr, "UnmapPhysicalMemory"}, |
| 1406 | {0x2E, nullptr, "GetFutureThreadInfo"}, | 1519 | {0x2E, nullptr, "GetFutureThreadInfo"}, |
| 1407 | {0x2F, nullptr, "GetLastThreadInfo"}, | 1520 | {0x2F, nullptr, "GetLastThreadInfo"}, |
| 1408 | {0x30, nullptr, "GetResourceLimitLimitValue"}, | 1521 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, |
| 1409 | {0x31, nullptr, "GetResourceLimitCurrentValue"}, | 1522 | {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, |
| 1410 | {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, | 1523 | {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, |
| 1411 | {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, | 1524 | {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, |
| 1412 | {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, | 1525 | {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, |
| @@ -1482,8 +1595,8 @@ static const FunctionDef SVC_Table[] = { | |||
| 1482 | {0x7A, nullptr, "StartProcess"}, | 1595 | {0x7A, nullptr, "StartProcess"}, |
| 1483 | {0x7B, nullptr, "TerminateProcess"}, | 1596 | {0x7B, nullptr, "TerminateProcess"}, |
| 1484 | {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, | 1597 | {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, |
| 1485 | {0x7D, nullptr, "CreateResourceLimit"}, | 1598 | {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"}, |
| 1486 | {0x7E, nullptr, "SetResourceLimitLimitValue"}, | 1599 | {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, |
| 1487 | {0x7F, nullptr, "CallSecureMonitor"}, | 1600 | {0x7F, nullptr, "CallSecureMonitor"}, |
| 1488 | }; | 1601 | }; |
| 1489 | 1602 | ||
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 233a99fb0..fa1116624 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h | |||
| @@ -43,6 +43,14 @@ void SvcWrap() { | |||
| 43 | FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw); | 43 | FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | template <ResultCode func(u32*)> | ||
| 47 | void SvcWrap() { | ||
| 48 | u32 param = 0; | ||
| 49 | const u32 retval = func(¶m).raw; | ||
| 50 | Core::CurrentArmInterface().SetReg(1, param); | ||
| 51 | FuncReturn(retval); | ||
| 52 | } | ||
| 53 | |||
| 46 | template <ResultCode func(u32*, u32)> | 54 | template <ResultCode func(u32*, u32)> |
| 47 | void SvcWrap() { | 55 | void SvcWrap() { |
| 48 | u32 param_1 = 0; | 56 | u32 param_1 = 0; |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 28e8c13aa..8b9c548cc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB | |||
| 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { | 34 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { |
| 35 | MICROPROFILE_SCOPE(ProcessCommandLists); | 35 | MICROPROFILE_SCOPE(ProcessCommandLists); |
| 36 | 36 | ||
| 37 | // On entering GPU code, assume all memory may be touched by the ARM core. | ||
| 38 | maxwell_3d->dirty_flags.OnMemoryWrite(); | ||
| 39 | |||
| 37 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { | 40 | auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { |
| 38 | LOG_TRACE(HW_GPU, | 41 | LOG_TRACE(HW_GPU, |
| 39 | "Processing method {:08X} on subchannel {} value " | 42 | "Processing method {:08X} on subchannel {} value " |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 74e44c7fe..8d0700d13 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -2,8 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 6 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| 9 | 11 | ||
| @@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() { | |||
| 47 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); | 49 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); |
| 48 | 50 | ||
| 49 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { | 51 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { |
| 52 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 53 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 54 | |||
| 50 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); | 55 | rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); |
| 51 | // We have to invalidate the destination region to evict any outdated surfaces from the | 56 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 52 | // cache. We do this before actually writing the new data because the destination address | 57 | // cache. We do this before actually writing the new data because the destination address |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 585290d9f..2adbc9eaf 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -3,8 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "core/core.h" | ||
| 6 | #include "core/memory.h" | 7 | #include "core/memory.h" |
| 7 | #include "video_core/engines/kepler_memory.h" | 8 | #include "video_core/engines/kepler_memory.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/rasterizer_interface.h" | 10 | #include "video_core/rasterizer_interface.h" |
| 9 | 11 | ||
| 10 | namespace Tegra::Engines { | 12 | namespace Tegra::Engines { |
| @@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 47 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); | 49 | rasterizer.InvalidateRegion(dest_address, sizeof(u32)); |
| 48 | 50 | ||
| 49 | Memory::Write32(dest_address, data); | 51 | Memory::Write32(dest_address, data); |
| 52 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 50 | 53 | ||
| 51 | state.write_offset++; | 54 | state.write_offset++; |
| 52 | } | 55 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2bc534be3..f0a5470b9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 135 | 135 | ||
| 136 | if (regs.reg_array[method] != value) { | 136 | if (regs.reg_array[method] != value) { |
| 137 | regs.reg_array[method] = value; | 137 | regs.reg_array[method] = value; |
| 138 | // Vertex format | ||
| 138 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | 139 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && |
| 139 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | 140 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { |
| 140 | dirty_flags.vertex_attrib_format = true; | 141 | dirty_flags.vertex_attrib_format = true; |
| 141 | } | 142 | } |
| 143 | |||
| 144 | // Vertex buffer | ||
| 145 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && | ||
| 146 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { | ||
| 147 | dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | ||
| 148 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | ||
| 149 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { | ||
| 150 | dirty_flags.vertex_array |= | ||
| 151 | 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | ||
| 152 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | ||
| 153 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { | ||
| 154 | dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 155 | } | ||
| 142 | } | 156 | } |
| 143 | 157 | ||
| 144 | switch (method) { | 158 | switch (method) { |
| @@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 270 | query_result.timestamp = CoreTiming::GetTicks(); | 284 | query_result.timestamp = CoreTiming::GetTicks(); |
| 271 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); | 285 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); |
| 272 | } | 286 | } |
| 287 | dirty_flags.OnMemoryWrite(); | ||
| 273 | break; | 288 | break; |
| 274 | } | 289 | } |
| 275 | default: | 290 | default: |
| @@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 346 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | 361 | memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |
| 347 | 362 | ||
| 348 | Memory::Write32(*address, value); | 363 | Memory::Write32(*address, value); |
| 364 | dirty_flags.OnMemoryWrite(); | ||
| 349 | 365 | ||
| 350 | // Increment the current buffer position. | 366 | // Increment the current buffer position. |
| 351 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; | 367 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 4f137e693..9324d9710 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -590,10 +590,18 @@ public: | |||
| 590 | 590 | ||
| 591 | float clear_color[4]; | 591 | float clear_color[4]; |
| 592 | float clear_depth; | 592 | float clear_depth; |
| 593 | |||
| 593 | INSERT_PADDING_WORDS(0x3); | 594 | INSERT_PADDING_WORDS(0x3); |
| 595 | |||
| 594 | s32 clear_stencil; | 596 | s32 clear_stencil; |
| 595 | 597 | ||
| 596 | INSERT_PADDING_WORDS(0x17); | 598 | INSERT_PADDING_WORDS(0x7); |
| 599 | |||
| 600 | u32 polygon_offset_point_enable; | ||
| 601 | u32 polygon_offset_line_enable; | ||
| 602 | u32 polygon_offset_fill_enable; | ||
| 603 | |||
| 604 | INSERT_PADDING_WORDS(0xD); | ||
| 597 | 605 | ||
| 598 | std::array<ScissorTest, NumViewports> scissor_test; | 606 | std::array<ScissorTest, NumViewports> scissor_test; |
| 599 | 607 | ||
| @@ -728,6 +736,7 @@ public: | |||
| 728 | u32 frag_color_clamp; | 736 | u32 frag_color_clamp; |
| 729 | 737 | ||
| 730 | union { | 738 | union { |
| 739 | BitField<0, 1, u32> y_negate; | ||
| 731 | BitField<4, 1, u32> triangle_rast_flip; | 740 | BitField<4, 1, u32> triangle_rast_flip; |
| 732 | } screen_y_control; | 741 | } screen_y_control; |
| 733 | 742 | ||
| @@ -761,7 +770,11 @@ public: | |||
| 761 | } | 770 | } |
| 762 | } tsc; | 771 | } tsc; |
| 763 | 772 | ||
| 764 | INSERT_PADDING_WORDS(0x3); | 773 | INSERT_PADDING_WORDS(0x1); |
| 774 | |||
| 775 | float polygon_offset_factor; | ||
| 776 | |||
| 777 | INSERT_PADDING_WORDS(0x1); | ||
| 765 | 778 | ||
| 766 | struct { | 779 | struct { |
| 767 | u32 tic_address_high; | 780 | u32 tic_address_high; |
| @@ -786,7 +799,9 @@ public: | |||
| 786 | 799 | ||
| 787 | u32 framebuffer_srgb; | 800 | u32 framebuffer_srgb; |
| 788 | 801 | ||
| 789 | INSERT_PADDING_WORDS(0x12); | 802 | float polygon_offset_units; |
| 803 | |||
| 804 | INSERT_PADDING_WORDS(0x11); | ||
| 790 | 805 | ||
| 791 | union { | 806 | union { |
| 792 | BitField<2, 1, u32> coord_origin; | 807 | BitField<2, 1, u32> coord_origin; |
| @@ -863,7 +878,9 @@ public: | |||
| 863 | 878 | ||
| 864 | INSERT_PADDING_WORDS(0x7); | 879 | INSERT_PADDING_WORDS(0x7); |
| 865 | 880 | ||
| 866 | INSERT_PADDING_WORDS(0x20); | 881 | INSERT_PADDING_WORDS(0x1F); |
| 882 | |||
| 883 | float polygon_offset_clamp; | ||
| 867 | 884 | ||
| 868 | struct { | 885 | struct { |
| 869 | u32 is_instanced[NumVertexArrays]; | 886 | u32 is_instanced[NumVertexArrays]; |
| @@ -879,7 +896,13 @@ public: | |||
| 879 | 896 | ||
| 880 | Cull cull; | 897 | Cull cull; |
| 881 | 898 | ||
| 882 | INSERT_PADDING_WORDS(0x28); | 899 | u32 pixel_center_integer; |
| 900 | |||
| 901 | INSERT_PADDING_WORDS(0x1); | ||
| 902 | |||
| 903 | u32 viewport_transform_enabled; | ||
| 904 | |||
| 905 | INSERT_PADDING_WORDS(0x25); | ||
| 883 | 906 | ||
| 884 | struct { | 907 | struct { |
| 885 | u32 enable; | 908 | u32 enable; |
| @@ -1044,6 +1067,11 @@ public: | |||
| 1044 | 1067 | ||
| 1045 | struct DirtyFlags { | 1068 | struct DirtyFlags { |
| 1046 | bool vertex_attrib_format = true; | 1069 | bool vertex_attrib_format = true; |
| 1070 | u32 vertex_array = 0xFFFFFFFF; | ||
| 1071 | |||
| 1072 | void OnMemoryWrite() { | ||
| 1073 | vertex_array = 0xFFFFFFFF; | ||
| 1074 | } | ||
| 1047 | }; | 1075 | }; |
| 1048 | 1076 | ||
| 1049 | DirtyFlags dirty_flags; | 1077 | DirtyFlags dirty_flags; |
| @@ -1136,6 +1164,9 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D); | |||
| 1136 | ASSERT_REG_POSITION(clear_color[0], 0x360); | 1164 | ASSERT_REG_POSITION(clear_color[0], 0x360); |
| 1137 | ASSERT_REG_POSITION(clear_depth, 0x364); | 1165 | ASSERT_REG_POSITION(clear_depth, 0x364); |
| 1138 | ASSERT_REG_POSITION(clear_stencil, 0x368); | 1166 | ASSERT_REG_POSITION(clear_stencil, 0x368); |
| 1167 | ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); | ||
| 1168 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); | ||
| 1169 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); | ||
| 1139 | ASSERT_REG_POSITION(scissor_test, 0x380); | 1170 | ASSERT_REG_POSITION(scissor_test, 0x380); |
| 1140 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); | 1171 | ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); |
| 1141 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); | 1172 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); |
| @@ -1174,6 +1205,7 @@ ASSERT_REG_POSITION(point_size, 0x546); | |||
| 1174 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1205 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1175 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1206 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1176 | ASSERT_REG_POSITION(tsc, 0x557); | 1207 | ASSERT_REG_POSITION(tsc, 0x557); |
| 1208 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); | ||
| 1177 | ASSERT_REG_POSITION(tic, 0x55D); | 1209 | ASSERT_REG_POSITION(tic, 0x55D); |
| 1178 | ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); | 1210 | ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); |
| 1179 | ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); | 1211 | ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); |
| @@ -1181,13 +1213,17 @@ ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567); | |||
| 1181 | ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); | 1213 | ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); |
| 1182 | ASSERT_REG_POSITION(stencil_back_func_func, 0x569); | 1214 | ASSERT_REG_POSITION(stencil_back_func_func, 0x569); |
| 1183 | ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); | 1215 | ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); |
| 1216 | ASSERT_REG_POSITION(polygon_offset_units, 0x56F); | ||
| 1184 | ASSERT_REG_POSITION(point_coord_replace, 0x581); | 1217 | ASSERT_REG_POSITION(point_coord_replace, 0x581); |
| 1185 | ASSERT_REG_POSITION(code_address, 0x582); | 1218 | ASSERT_REG_POSITION(code_address, 0x582); |
| 1186 | ASSERT_REG_POSITION(draw, 0x585); | 1219 | ASSERT_REG_POSITION(draw, 0x585); |
| 1187 | ASSERT_REG_POSITION(primitive_restart, 0x591); | 1220 | ASSERT_REG_POSITION(primitive_restart, 0x591); |
| 1188 | ASSERT_REG_POSITION(index_array, 0x5F2); | 1221 | ASSERT_REG_POSITION(index_array, 0x5F2); |
| 1222 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); | ||
| 1189 | ASSERT_REG_POSITION(instanced_arrays, 0x620); | 1223 | ASSERT_REG_POSITION(instanced_arrays, 0x620); |
| 1190 | ASSERT_REG_POSITION(cull, 0x646); | 1224 | ASSERT_REG_POSITION(cull, 0x646); |
| 1225 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); | ||
| 1226 | ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); | ||
| 1191 | ASSERT_REG_POSITION(logic_op, 0x671); | 1227 | ASSERT_REG_POSITION(logic_op, 0x671); |
| 1192 | ASSERT_REG_POSITION(clear_buffers, 0x674); | 1228 | ASSERT_REG_POSITION(clear_buffers, 0x674); |
| 1193 | ASSERT_REG_POSITION(color_mask, 0x680); | 1229 | ASSERT_REG_POSITION(color_mask, 0x680); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index b8a78cf82..a34e884fe 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -2,7 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 5 | #include "core/memory.h" | 6 | #include "core/memory.h" |
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 6 | #include "video_core/engines/maxwell_dma.h" | 8 | #include "video_core/engines/maxwell_dma.h" |
| 7 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 8 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| @@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() { | |||
| 54 | return; | 56 | return; |
| 55 | } | 57 | } |
| 56 | 58 | ||
| 59 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 60 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 61 | |||
| 57 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 62 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { |
| 58 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 63 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
| 59 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, | 64 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 075192c3f..46a6c0308 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s | |||
| 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); | 76 | return std::make_tuple(uploaded_ptr, uploaded_offset); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void OGLBufferCache::Map(std::size_t max_size) { | 79 | bool OGLBufferCache::Map(std::size_t max_size) { |
| 80 | bool invalidate; | 80 | bool invalidate; |
| 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 81 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = |
| 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | 82 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); |
| @@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) { | |||
| 85 | if (invalidate) { | 85 | if (invalidate) { |
| 86 | InvalidateAll(); | 86 | InvalidateAll(); |
| 87 | } | 87 | } |
| 88 | return invalidate; | ||
| 88 | } | 89 | } |
| 89 | 90 | ||
| 90 | void OGLBufferCache::Unmap() { | 91 | void OGLBufferCache::Unmap() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 91fca3f6c..c11acfb79 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -50,7 +50,7 @@ public: | |||
| 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. | 50 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. |
| 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); | 51 | std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); |
| 52 | 52 | ||
| 53 | void Map(std::size_t max_size); | 53 | bool Map(std::size_t max_size); |
| 54 | void Unmap(); | 54 | void Unmap(); |
| 55 | 55 | ||
| 56 | GLuint GetHandle() const; | 56 | GLuint GetHandle() const; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 630a58e49..98fb5a9aa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 176 | } | 176 | } |
| 177 | state.draw.vertex_array = VAO.handle; | 177 | state.draw.vertex_array = VAO.handle; |
| 178 | state.ApplyVertexBufferState(); | 178 | state.ApplyVertexBufferState(); |
| 179 | |||
| 180 | // Rebinding the VAO invalidates the vertex buffer bindings. | ||
| 181 | gpu.dirty_flags.vertex_array = 0xFFFFFFFF; | ||
| 179 | } | 182 | } |
| 180 | 183 | ||
| 181 | void RasterizerOpenGL::SetupVertexBuffer() { | 184 | void RasterizerOpenGL::SetupVertexBuffer() { |
| 182 | MICROPROFILE_SCOPE(OpenGL_VB); | 185 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 183 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 184 | const auto& regs = gpu.regs; | 186 | const auto& regs = gpu.regs; |
| 185 | 187 | ||
| 188 | if (!gpu.dirty_flags.vertex_array) | ||
| 189 | return; | ||
| 190 | |||
| 191 | MICROPROFILE_SCOPE(OpenGL_VB); | ||
| 192 | |||
| 186 | // Upload all guest vertex arrays sequentially to our buffer | 193 | // Upload all guest vertex arrays sequentially to our buffer |
| 187 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 194 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 195 | if (~gpu.dirty_flags.vertex_array & (1u << index)) | ||
| 196 | continue; | ||
| 197 | |||
| 188 | const auto& vertex_array = regs.vertex_array[index]; | 198 | const auto& vertex_array = regs.vertex_array[index]; |
| 189 | if (!vertex_array.IsEnabled()) | 199 | if (!vertex_array.IsEnabled()) |
| 190 | continue; | 200 | continue; |
| @@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 211 | 221 | ||
| 212 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... | 222 | // Implicit set by glBindVertexBuffer. Stupid glstate handling... |
| 213 | state.draw.vertex_buffer = buffer_cache.GetHandle(); | 223 | state.draw.vertex_buffer = buffer_cache.GetHandle(); |
| 224 | |||
| 225 | gpu.dirty_flags.vertex_array = 0; | ||
| 214 | } | 226 | } |
| 215 | 227 | ||
| 216 | DrawParameters RasterizerOpenGL::SetupDraw() { | 228 | DrawParameters RasterizerOpenGL::SetupDraw() { |
| @@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 600 | return; | 612 | return; |
| 601 | 613 | ||
| 602 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 614 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 603 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 615 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 604 | const auto& regs = gpu.regs; | 616 | const auto& regs = gpu.regs; |
| 605 | 617 | ||
| 606 | ScopeAcquireGLContext acquire_context{emu_window}; | 618 | ScopeAcquireGLContext acquire_context{emu_window}; |
| @@ -620,7 +632,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 620 | SyncTransformFeedback(); | 632 | SyncTransformFeedback(); |
| 621 | SyncPointState(); | 633 | SyncPointState(); |
| 622 | CheckAlphaTests(); | 634 | CheckAlphaTests(); |
| 623 | 635 | SyncPolygonOffset(); | |
| 624 | // TODO(bunnei): Sync framebuffer_scale uniform here | 636 | // TODO(bunnei): Sync framebuffer_scale uniform here |
| 625 | // TODO(bunnei): Sync scissorbox uniform(s) here | 637 | // TODO(bunnei): Sync scissorbox uniform(s) here |
| 626 | 638 | ||
| @@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 653 | // Add space for at least 18 constant buffers | 665 | // Add space for at least 18 constant buffers |
| 654 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | 666 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); |
| 655 | 667 | ||
| 656 | buffer_cache.Map(buffer_size); | 668 | bool invalidate = buffer_cache.Map(buffer_size); |
| 669 | if (invalidate) { | ||
| 670 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 671 | gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF; | ||
| 672 | } | ||
| 657 | 673 | ||
| 658 | SetupVertexFormat(); | 674 | SetupVertexFormat(); |
| 659 | SetupVertexBuffer(); | 675 | SetupVertexBuffer(); |
| @@ -969,13 +985,25 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, | |||
| 969 | 985 | ||
| 970 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { | 986 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { |
| 971 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 987 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 972 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) { | 988 | const bool geometry_shaders_enabled = |
| 973 | const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; | 989 | regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); |
| 990 | const std::size_t viewport_count = | ||
| 991 | geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; | ||
| 992 | for (std::size_t i = 0; i < viewport_count; i++) { | ||
| 974 | auto& viewport = current_state.viewports[i]; | 993 | auto& viewport = current_state.viewports[i]; |
| 975 | viewport.x = viewport_rect.left; | 994 | const auto& src = regs.viewports[i]; |
| 976 | viewport.y = viewport_rect.bottom; | 995 | if (regs.viewport_transform_enabled) { |
| 977 | viewport.width = viewport_rect.GetWidth(); | 996 | const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; |
| 978 | viewport.height = viewport_rect.GetHeight(); | 997 | viewport.x = viewport_rect.left; |
| 998 | viewport.y = viewport_rect.bottom; | ||
| 999 | viewport.width = viewport_rect.GetWidth(); | ||
| 1000 | viewport.height = viewport_rect.GetHeight(); | ||
| 1001 | } else { | ||
| 1002 | viewport.x = src.x; | ||
| 1003 | viewport.y = src.y; | ||
| 1004 | viewport.width = src.width; | ||
| 1005 | viewport.height = src.height; | ||
| 1006 | } | ||
| 979 | viewport.depth_range_far = regs.viewports[i].depth_range_far; | 1007 | viewport.depth_range_far = regs.viewports[i].depth_range_far; |
| 980 | viewport.depth_range_near = regs.viewports[i].depth_range_near; | 1008 | viewport.depth_range_near = regs.viewports[i].depth_range_near; |
| 981 | } | 1009 | } |
| @@ -1149,7 +1177,11 @@ void RasterizerOpenGL::SyncLogicOpState() { | |||
| 1149 | 1177 | ||
| 1150 | void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { | 1178 | void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { |
| 1151 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 1179 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 1152 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) { | 1180 | const bool geometry_shaders_enabled = |
| 1181 | regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); | ||
| 1182 | const std::size_t viewport_count = | ||
| 1183 | geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; | ||
| 1184 | for (std::size_t i = 0; i < viewport_count; i++) { | ||
| 1153 | const auto& src = regs.scissor_test[i]; | 1185 | const auto& src = regs.scissor_test[i]; |
| 1154 | auto& dst = current_state.viewports[i].scissor; | 1186 | auto& dst = current_state.viewports[i].scissor; |
| 1155 | dst.enabled = (src.enable != 0); | 1187 | dst.enabled = (src.enable != 0); |
| @@ -1179,6 +1211,16 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1179 | state.point.size = regs.point_size; | 1211 | state.point.size = regs.point_size; |
| 1180 | } | 1212 | } |
| 1181 | 1213 | ||
| 1214 | void RasterizerOpenGL::SyncPolygonOffset() { | ||
| 1215 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | ||
| 1216 | state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; | ||
| 1217 | state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; | ||
| 1218 | state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; | ||
| 1219 | state.polygon_offset.units = regs.polygon_offset_units; | ||
| 1220 | state.polygon_offset.factor = regs.polygon_offset_factor; | ||
| 1221 | state.polygon_offset.clamp = regs.polygon_offset_clamp; | ||
| 1222 | } | ||
| 1223 | |||
| 1182 | void RasterizerOpenGL::CheckAlphaTests() { | 1224 | void RasterizerOpenGL::CheckAlphaTests() { |
| 1183 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 1225 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 1184 | 1226 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f4354289c..dfb4616f2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -183,6 +183,9 @@ private: | |||
| 183 | /// Syncs Color Mask | 183 | /// Syncs Color Mask |
| 184 | void SyncColorMask(); | 184 | void SyncColorMask(); |
| 185 | 185 | ||
| 186 | /// Syncs the polygon offsets | ||
| 187 | void SyncPolygonOffset(); | ||
| 188 | |||
| 186 | /// Check asserts for alpha testing. | 189 | /// Check asserts for alpha testing. |
| 187 | void CheckAlphaTests(); | 190 | void CheckAlphaTests(); |
| 188 | 191 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 05fe2d370..7c0935a4e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -867,7 +867,8 @@ private: | |||
| 867 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 867 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 868 | // shader. | 868 | // shader. |
| 869 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); | 869 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); |
| 870 | return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))"; | 870 | // Config pack's first value is instance_id. |
| 871 | return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))"; | ||
| 871 | case Attribute::Index::FrontFacing: | 872 | case Attribute::Index::FrontFacing: |
| 872 | // TODO(Subv): Find out what the values are for the other elements. | 873 | // TODO(Subv): Find out what the values are for the other elements. |
| 873 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | 874 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); |
| @@ -3400,6 +3401,10 @@ private: | |||
| 3400 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | 3401 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, |
| 3401 | 1); | 3402 | 1); |
| 3402 | } | 3403 | } |
| 3404 | if (instr.generates_cc.Value() != 0) { | ||
| 3405 | regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate); | ||
| 3406 | LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete"); | ||
| 3407 | } | ||
| 3403 | break; | 3408 | break; |
| 3404 | } | 3409 | } |
| 3405 | case OpCode::Type::IntegerSet: { | 3410 | case OpCode::Type::IntegerSet: { |
| @@ -3653,6 +3658,11 @@ private: | |||
| 3653 | regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); | 3658 | regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); |
| 3654 | break; | 3659 | break; |
| 3655 | } | 3660 | } |
| 3661 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 3662 | // Config pack's third value is Y_NEGATE's state. | ||
| 3663 | regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1); | ||
| 3664 | break; | ||
| 3665 | } | ||
| 3656 | default: { | 3666 | default: { |
| 3657 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | 3667 | UNIMPLEMENTED_MSG("Unhandled system move: {}", |
| 3658 | static_cast<u32>(instr.sys20.Value())); | 3668 | static_cast<u32>(instr.sys20.Value())); |
| @@ -3676,11 +3686,17 @@ private: | |||
| 3676 | "BRA with constant buffers are not implemented"); | 3686 | "BRA with constant buffers are not implemented"); |
| 3677 | 3687 | ||
| 3678 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 3688 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 3679 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3680 | "BRA condition code used: {}", static_cast<u32>(cc)); | ||
| 3681 | |||
| 3682 | const u32 target = offset + instr.bra.GetBranchTarget(); | 3689 | const u32 target = offset + instr.bra.GetBranchTarget(); |
| 3683 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | 3690 | if (cc != Tegra::Shader::ConditionCode::T) { |
| 3691 | const std::string condition_code = regs.GetConditionCode(cc); | ||
| 3692 | shader.AddLine("if (" + condition_code + "){"); | ||
| 3693 | shader.scope++; | ||
| 3694 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3695 | shader.scope--; | ||
| 3696 | shader.AddLine('}'); | ||
| 3697 | } else { | ||
| 3698 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3699 | } | ||
| 3684 | break; | 3700 | break; |
| 3685 | } | 3701 | } |
| 3686 | case OpCode::Id::IPA: { | 3702 | case OpCode::Id::IPA: { |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index eea090e52..23ed91e27 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -24,8 +24,7 @@ layout (location = 0) out vec4 position; | |||
| 24 | 24 | ||
| 25 | layout(std140) uniform vs_config { | 25 | layout(std140) uniform vs_config { |
| 26 | vec4 viewport_flip; | 26 | vec4 viewport_flip; |
| 27 | uvec4 instance_id; | 27 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 28 | uvec4 flip_stage; | ||
| 29 | uvec4 alpha_test; | 28 | uvec4 alpha_test; |
| 30 | }; | 29 | }; |
| 31 | )"; | 30 | )"; |
| @@ -63,7 +62,8 @@ void main() { | |||
| 63 | out += R"( | 62 | out += R"( |
| 64 | 63 | ||
| 65 | // Check if the flip stage is VertexB | 64 | // Check if the flip stage is VertexB |
| 66 | if (flip_stage[0] == 1) { | 65 | // Config pack's second value is flip_stage |
| 66 | if (config_pack[1] == 1) { | ||
| 67 | // Viewport can be flipped, which is unsupported by glViewport | 67 | // Viewport can be flipped, which is unsupported by glViewport |
| 68 | position.xy *= viewport_flip.xy; | 68 | position.xy *= viewport_flip.xy; |
| 69 | } | 69 | } |
| @@ -71,7 +71,7 @@ void main() { | |||
| 71 | 71 | ||
| 72 | // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 | 72 | // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 |
| 73 | // For now, this is here to bring order in lieu of proper emulation | 73 | // For now, this is here to bring order in lieu of proper emulation |
| 74 | if (flip_stage[0] == 1) { | 74 | if (config_pack[1] == 1) { |
| 75 | position.w = 1.0; | 75 | position.w = 1.0; |
| 76 | } | 76 | } |
| 77 | } | 77 | } |
| @@ -101,8 +101,7 @@ layout (location = 0) out vec4 position; | |||
| 101 | 101 | ||
| 102 | layout (std140) uniform gs_config { | 102 | layout (std140) uniform gs_config { |
| 103 | vec4 viewport_flip; | 103 | vec4 viewport_flip; |
| 104 | uvec4 instance_id; | 104 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 105 | uvec4 flip_stage; | ||
| 106 | uvec4 alpha_test; | 105 | uvec4 alpha_test; |
| 107 | }; | 106 | }; |
| 108 | 107 | ||
| @@ -139,8 +138,7 @@ layout (location = 0) in vec4 position; | |||
| 139 | 138 | ||
| 140 | layout (std140) uniform fs_config { | 139 | layout (std140) uniform fs_config { |
| 141 | vec4 viewport_flip; | 140 | vec4 viewport_flip; |
| 142 | uvec4 instance_id; | 141 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 143 | uvec4 flip_stage; | ||
| 144 | uvec4 alpha_test; | 142 | uvec4 alpha_test; |
| 145 | }; | 143 | }; |
| 146 | 144 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8b8869ecb..6a30c28d2 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -27,16 +27,18 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh | |||
| 27 | alpha_test.func = func; | 27 | alpha_test.func = func; |
| 28 | alpha_test.ref = regs.alpha_test_ref; | 28 | alpha_test.ref = regs.alpha_test_ref; |
| 29 | 29 | ||
| 30 | // We only assign the instance to the first component of the vector, the rest is just padding. | 30 | instance_id = state.current_instance; |
| 31 | instance_id[0] = state.current_instance; | ||
| 32 | 31 | ||
| 33 | // Assign in which stage the position has to be flipped | 32 | // Assign in which stage the position has to be flipped |
| 34 | // (the last stage before the fragment shader). | 33 | // (the last stage before the fragment shader). |
| 35 | if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { | 34 | if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { |
| 36 | flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); | 35 | flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); |
| 37 | } else { | 36 | } else { |
| 38 | flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); | 37 | flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); |
| 39 | } | 38 | } |
| 39 | |||
| 40 | // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. | ||
| 41 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f; | ||
| 40 | } | 42 | } |
| 41 | 43 | ||
| 42 | } // namespace OpenGL::GLShader | 44 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 9a5d7e289..b757f5f44 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -21,8 +21,11 @@ using Tegra::Engines::Maxwell3D; | |||
| 21 | struct MaxwellUniformData { | 21 | struct MaxwellUniformData { |
| 22 | void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); | 22 | void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); |
| 23 | alignas(16) GLvec4 viewport_flip; | 23 | alignas(16) GLvec4 viewport_flip; |
| 24 | alignas(16) GLuvec4 instance_id; | 24 | struct alignas(16) { |
| 25 | alignas(16) GLuvec4 flip_stage; | 25 | GLuint instance_id; |
| 26 | GLuint flip_stage; | ||
| 27 | GLfloat y_direction; | ||
| 28 | }; | ||
| 26 | struct alignas(16) { | 29 | struct alignas(16) { |
| 27 | GLuint enabled; | 30 | GLuint enabled; |
| 28 | GLuint func; | 31 | GLuint func; |
| @@ -30,7 +33,7 @@ struct MaxwellUniformData { | |||
| 30 | GLuint padding; | 33 | GLuint padding; |
| 31 | } alpha_test; | 34 | } alpha_test; |
| 32 | }; | 35 | }; |
| 33 | static_assert(sizeof(MaxwellUniformData) == 64, "MaxwellUniformData structure size is incorrect"); | 36 | static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect"); |
| 34 | static_assert(sizeof(MaxwellUniformData) < 16384, | 37 | static_assert(sizeof(MaxwellUniformData) < 16384, |
| 35 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | 38 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); |
| 36 | 39 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 934f4db78..b3bfad6a0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -92,6 +92,13 @@ OpenGLState::OpenGLState() { | |||
| 92 | 92 | ||
| 93 | point.size = 1; | 93 | point.size = 1; |
| 94 | fragment_color_clamp.enabled = false; | 94 | fragment_color_clamp.enabled = false; |
| 95 | |||
| 96 | polygon_offset.fill_enable = false; | ||
| 97 | polygon_offset.line_enable = false; | ||
| 98 | polygon_offset.point_enable = false; | ||
| 99 | polygon_offset.factor = 0.0f; | ||
| 100 | polygon_offset.units = 0.0f; | ||
| 101 | polygon_offset.clamp = 0.0f; | ||
| 95 | } | 102 | } |
| 96 | 103 | ||
| 97 | void OpenGLState::ApplyDefaultState() { | 104 | void OpenGLState::ApplyDefaultState() { |
| @@ -406,6 +413,55 @@ void OpenGLState::ApplyLogicOp() const { | |||
| 406 | } | 413 | } |
| 407 | } | 414 | } |
| 408 | 415 | ||
| 416 | void OpenGLState::ApplyPolygonOffset() const { | ||
| 417 | |||
| 418 | const bool fill_enable_changed = | ||
| 419 | polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; | ||
| 420 | const bool line_enable_changed = | ||
| 421 | polygon_offset.line_enable != cur_state.polygon_offset.line_enable; | ||
| 422 | const bool point_enable_changed = | ||
| 423 | polygon_offset.point_enable != cur_state.polygon_offset.point_enable; | ||
| 424 | const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; | ||
| 425 | const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; | ||
| 426 | const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; | ||
| 427 | |||
| 428 | if (fill_enable_changed) { | ||
| 429 | if (polygon_offset.fill_enable) { | ||
| 430 | glEnable(GL_POLYGON_OFFSET_FILL); | ||
| 431 | } else { | ||
| 432 | glDisable(GL_POLYGON_OFFSET_FILL); | ||
| 433 | } | ||
| 434 | } | ||
| 435 | |||
| 436 | if (line_enable_changed) { | ||
| 437 | if (polygon_offset.line_enable) { | ||
| 438 | glEnable(GL_POLYGON_OFFSET_LINE); | ||
| 439 | } else { | ||
| 440 | glDisable(GL_POLYGON_OFFSET_LINE); | ||
| 441 | } | ||
| 442 | } | ||
| 443 | |||
| 444 | if (point_enable_changed) { | ||
| 445 | if (polygon_offset.point_enable) { | ||
| 446 | glEnable(GL_POLYGON_OFFSET_POINT); | ||
| 447 | } else { | ||
| 448 | glDisable(GL_POLYGON_OFFSET_POINT); | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) && | ||
| 453 | (factor_changed || units_changed || clamp_changed)) { | ||
| 454 | |||
| 455 | if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { | ||
| 456 | glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); | ||
| 457 | } else { | ||
| 458 | glPolygonOffset(polygon_offset.factor, polygon_offset.units); | ||
| 459 | UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, | ||
| 460 | "Unimplemented Depth polygon offset clamp."); | ||
| 461 | } | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 409 | void OpenGLState::ApplyTextures() const { | 465 | void OpenGLState::ApplyTextures() const { |
| 410 | for (std::size_t i = 0; i < std::size(texture_units); ++i) { | 466 | for (std::size_t i = 0; i < std::size(texture_units); ++i) { |
| 411 | const auto& texture_unit = texture_units[i]; | 467 | const auto& texture_unit = texture_units[i]; |
| @@ -532,6 +588,7 @@ void OpenGLState::Apply() const { | |||
| 532 | ApplyLogicOp(); | 588 | ApplyLogicOp(); |
| 533 | ApplyTextures(); | 589 | ApplyTextures(); |
| 534 | ApplySamplers(); | 590 | ApplySamplers(); |
| 591 | ApplyPolygonOffset(); | ||
| 535 | cur_state = *this; | 592 | cur_state = *this; |
| 536 | } | 593 | } |
| 537 | 594 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 032fc43f0..0bf19ed07 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -176,6 +176,15 @@ public: | |||
| 176 | float size; // GL_POINT_SIZE | 176 | float size; // GL_POINT_SIZE |
| 177 | } point; | 177 | } point; |
| 178 | 178 | ||
| 179 | struct { | ||
| 180 | bool point_enable; | ||
| 181 | bool line_enable; | ||
| 182 | bool fill_enable; | ||
| 183 | GLfloat units; | ||
| 184 | GLfloat factor; | ||
| 185 | GLfloat clamp; | ||
| 186 | } polygon_offset; | ||
| 187 | |||
| 179 | std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE | 188 | std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE |
| 180 | 189 | ||
| 181 | OpenGLState(); | 190 | OpenGLState(); |
| @@ -226,6 +235,7 @@ private: | |||
| 226 | void ApplyLogicOp() const; | 235 | void ApplyLogicOp() const; |
| 227 | void ApplyTextures() const; | 236 | void ApplyTextures() const; |
| 228 | void ApplySamplers() const; | 237 | void ApplySamplers() const; |
| 238 | void ApplyPolygonOffset() const; | ||
| 229 | }; | 239 | }; |
| 230 | 240 | ||
| 231 | } // namespace OpenGL | 241 | } // namespace OpenGL |