summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/kernel/resource_limit.h6
-rw-r--r--src/core/hle/kernel/svc.cpp121
-rw-r--r--src/core/hle/kernel/svc_wrap.h8
-rw-r--r--src/video_core/command_processor.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.cpp5
-rw-r--r--src/video_core/engines/kepler_memory.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h46
-rw-r--r--src/video_core/engines/maxwell_dma.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp66
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp57
-rw-r--r--src/video_core/renderer_opengl/gl_state.h10
19 files changed, 369 insertions, 44 deletions
diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h
index bec065543..59dc11c22 100644
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -14,7 +14,7 @@ namespace Kernel {
14 14
15class KernelCore; 15class KernelCore;
16 16
17enum class ResourceType { 17enum class ResourceType : u32 {
18 PhysicalMemory, 18 PhysicalMemory,
19 Threads, 19 Threads,
20 Events, 20 Events,
@@ -25,6 +25,10 @@ enum class ResourceType {
25 ResourceTypeCount 25 ResourceTypeCount
26}; 26};
27 27
28constexpr bool IsValidResourceType(ResourceType type) {
29 return type < ResourceType::ResourceTypeCount;
30}
31
28class ResourceLimit final : public Object { 32class ResourceLimit final : public Object {
29public: 33public:
30 /** 34 /**
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index f287f7c97..1f19d5576 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -105,6 +105,38 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
105 105
106 return RESULT_SUCCESS; 106 return RESULT_SUCCESS;
107} 107}
108
109enum class ResourceLimitValueType {
110 CurrentValue,
111 LimitValue,
112};
113
114ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type,
115 ResourceLimitValueType value_type) {
116 const auto type = static_cast<ResourceType>(resource_type);
117 if (!IsValidResourceType(type)) {
118 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
119 return ERR_INVALID_ENUM_VALUE;
120 }
121
122 const auto& kernel = Core::System::GetInstance().Kernel();
123 const auto* const current_process = kernel.CurrentProcess();
124 ASSERT(current_process != nullptr);
125
126 const auto resource_limit_object =
127 current_process->GetHandleTable().Get<ResourceLimit>(resource_limit);
128 if (!resource_limit_object) {
129 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
130 resource_limit);
131 return ERR_INVALID_HANDLE;
132 }
133
134 if (value_type == ResourceLimitValueType::CurrentValue) {
135 return MakeResult(resource_limit_object->GetCurrentResourceValue(type));
136 }
137
138 return MakeResult(resource_limit_object->GetMaxResourceValue(type));
139}
108} // Anonymous namespace 140} // Anonymous namespace
109 141
110/// Set the process heap to a given Size. It can both extend and shrink the heap. 142/// Set the process heap to a given Size. It can both extend and shrink the heap.
@@ -1346,6 +1378,87 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) {
1346 return RESULT_SUCCESS; 1378 return RESULT_SUCCESS;
1347} 1379}
1348 1380
1381static ResultCode CreateResourceLimit(Handle* out_handle) {
1382 LOG_DEBUG(Kernel_SVC, "called");
1383
1384 auto& kernel = Core::System::GetInstance().Kernel();
1385 auto resource_limit = ResourceLimit::Create(kernel);
1386
1387 auto* const current_process = kernel.CurrentProcess();
1388 ASSERT(current_process != nullptr);
1389
1390 const auto handle = current_process->GetHandleTable().Create(std::move(resource_limit));
1391 if (handle.Failed()) {
1392 return handle.Code();
1393 }
1394
1395 *out_handle = *handle;
1396 return RESULT_SUCCESS;
1397}
1398
1399static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit,
1400 u32 resource_type) {
1401 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
1402
1403 const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type,
1404 ResourceLimitValueType::LimitValue);
1405 if (limit_value.Failed()) {
1406 return limit_value.Code();
1407 }
1408
1409 *out_value = static_cast<u64>(*limit_value);
1410 return RESULT_SUCCESS;
1411}
1412
1413static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit,
1414 u32 resource_type) {
1415 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type);
1416
1417 const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type,
1418 ResourceLimitValueType::CurrentValue);
1419 if (current_value.Failed()) {
1420 return current_value.Code();
1421 }
1422
1423 *out_value = static_cast<u64>(*current_value);
1424 return RESULT_SUCCESS;
1425}
1426
1427static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) {
1428 LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit,
1429 resource_type, value);
1430
1431 const auto type = static_cast<ResourceType>(resource_type);
1432 if (!IsValidResourceType(type)) {
1433 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
1434 return ERR_INVALID_ENUM_VALUE;
1435 }
1436
1437 auto& kernel = Core::System::GetInstance().Kernel();
1438 auto* const current_process = kernel.CurrentProcess();
1439 ASSERT(current_process != nullptr);
1440
1441 auto resource_limit_object =
1442 current_process->GetHandleTable().Get<ResourceLimit>(resource_limit);
1443 if (!resource_limit_object) {
1444 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
1445 resource_limit);
1446 return ERR_INVALID_HANDLE;
1447 }
1448
1449 const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value));
1450 if (set_result.IsError()) {
1451 LOG_ERROR(
1452 Kernel_SVC,
1453 "Attempted to lower resource limit ({}) for category '{}' below its current value ({})",
1454 resource_limit_object->GetMaxResourceValue(type), resource_type,
1455 resource_limit_object->GetCurrentResourceValue(type));
1456 return set_result;
1457 }
1458
1459 return RESULT_SUCCESS;
1460}
1461
1349namespace { 1462namespace {
1350struct FunctionDef { 1463struct FunctionDef {
1351 using Func = void(); 1464 using Func = void();
@@ -1405,8 +1518,8 @@ static const FunctionDef SVC_Table[] = {
1405 {0x2D, nullptr, "UnmapPhysicalMemory"}, 1518 {0x2D, nullptr, "UnmapPhysicalMemory"},
1406 {0x2E, nullptr, "GetFutureThreadInfo"}, 1519 {0x2E, nullptr, "GetFutureThreadInfo"},
1407 {0x2F, nullptr, "GetLastThreadInfo"}, 1520 {0x2F, nullptr, "GetLastThreadInfo"},
1408 {0x30, nullptr, "GetResourceLimitLimitValue"}, 1521 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
1409 {0x31, nullptr, "GetResourceLimitCurrentValue"}, 1522 {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"},
1410 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, 1523 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"},
1411 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, 1524 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
1412 {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, 1525 {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"},
@@ -1482,8 +1595,8 @@ static const FunctionDef SVC_Table[] = {
1482 {0x7A, nullptr, "StartProcess"}, 1595 {0x7A, nullptr, "StartProcess"},
1483 {0x7B, nullptr, "TerminateProcess"}, 1596 {0x7B, nullptr, "TerminateProcess"},
1484 {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, 1597 {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"},
1485 {0x7D, nullptr, "CreateResourceLimit"}, 1598 {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"},
1486 {0x7E, nullptr, "SetResourceLimitLimitValue"}, 1599 {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"},
1487 {0x7F, nullptr, "CallSecureMonitor"}, 1600 {0x7F, nullptr, "CallSecureMonitor"},
1488}; 1601};
1489 1602
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 233a99fb0..fa1116624 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -43,6 +43,14 @@ void SvcWrap() {
43 FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw); 43 FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw);
44} 44}
45 45
46template <ResultCode func(u32*)>
47void SvcWrap() {
48 u32 param = 0;
49 const u32 retval = func(&param).raw;
50 Core::CurrentArmInterface().SetReg(1, param);
51 FuncReturn(retval);
52}
53
46template <ResultCode func(u32*, u32)> 54template <ResultCode func(u32*, u32)>
47void SvcWrap() { 55void SvcWrap() {
48 u32 param_1 = 0; 56 u32 param_1 = 0;
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 28e8c13aa..8b9c548cc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { 34void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
35 MICROPROFILE_SCOPE(ProcessCommandLists); 35 MICROPROFILE_SCOPE(ProcessCommandLists);
36 36
37 // On entering GPU code, assume all memory may be touched by the ARM core.
38 maxwell_3d->dirty_flags.OnMemoryWrite();
39
37 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { 40 auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
38 LOG_TRACE(HW_GPU, 41 LOG_TRACE(HW_GPU,
39 "Processing method {:08X} on subchannel {} value " 42 "Processing method {:08X} on subchannel {} value "
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 74e44c7fe..8d0700d13 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,8 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
6#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
9 11
@@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
47 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 49 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
48 50
49 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { 51 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
52 // All copies here update the main memory, so mark all rasterizer states as invalid.
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54
50 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 55 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
51 // We have to invalidate the destination region to evict any outdated surfaces from the 56 // We have to invalidate the destination region to evict any outdated surfaces from the
52 // cache. We do this before actually writing the new data because the destination address 57 // cache. We do this before actually writing the new data because the destination address
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 585290d9f..2adbc9eaf 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -3,8 +3,10 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/core.h"
6#include "core/memory.h" 7#include "core/memory.h"
7#include "video_core/engines/kepler_memory.h" 8#include "video_core/engines/kepler_memory.h"
9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
9 11
10namespace Tegra::Engines { 12namespace Tegra::Engines {
@@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
47 rasterizer.InvalidateRegion(dest_address, sizeof(u32)); 49 rasterizer.InvalidateRegion(dest_address, sizeof(u32));
48 50
49 Memory::Write32(dest_address, data); 51 Memory::Write32(dest_address, data);
52 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
50 53
51 state.write_offset++; 54 state.write_offset++;
52} 55}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2bc534be3..f0a5470b9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
135 135
136 if (regs.reg_array[method] != value) { 136 if (regs.reg_array[method] != value) {
137 regs.reg_array[method] = value; 137 regs.reg_array[method] = value;
138 // Vertex format
138 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 139 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
139 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { 140 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
140 dirty_flags.vertex_attrib_format = true; 141 dirty_flags.vertex_attrib_format = true;
141 } 142 }
143
144 // Vertex buffer
145 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
146 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
147 dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
148 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
149 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
150 dirty_flags.vertex_array |=
151 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
152 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
153 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
154 dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
155 }
142 } 156 }
143 157
144 switch (method) { 158 switch (method) {
@@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() {
270 query_result.timestamp = CoreTiming::GetTicks(); 284 query_result.timestamp = CoreTiming::GetTicks();
271 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 285 Memory::WriteBlock(*address, &query_result, sizeof(query_result));
272 } 286 }
287 dirty_flags.OnMemoryWrite();
273 break; 288 break;
274 } 289 }
275 default: 290 default:
@@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
346 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 361 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
347 362
348 Memory::Write32(*address, value); 363 Memory::Write32(*address, value);
364 dirty_flags.OnMemoryWrite();
349 365
350 // Increment the current buffer position. 366 // Increment the current buffer position.
351 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 367 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4f137e693..9324d9710 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -590,10 +590,18 @@ public:
590 590
591 float clear_color[4]; 591 float clear_color[4];
592 float clear_depth; 592 float clear_depth;
593
593 INSERT_PADDING_WORDS(0x3); 594 INSERT_PADDING_WORDS(0x3);
595
594 s32 clear_stencil; 596 s32 clear_stencil;
595 597
596 INSERT_PADDING_WORDS(0x17); 598 INSERT_PADDING_WORDS(0x7);
599
600 u32 polygon_offset_point_enable;
601 u32 polygon_offset_line_enable;
602 u32 polygon_offset_fill_enable;
603
604 INSERT_PADDING_WORDS(0xD);
597 605
598 std::array<ScissorTest, NumViewports> scissor_test; 606 std::array<ScissorTest, NumViewports> scissor_test;
599 607
@@ -728,6 +736,7 @@ public:
728 u32 frag_color_clamp; 736 u32 frag_color_clamp;
729 737
730 union { 738 union {
739 BitField<0, 1, u32> y_negate;
731 BitField<4, 1, u32> triangle_rast_flip; 740 BitField<4, 1, u32> triangle_rast_flip;
732 } screen_y_control; 741 } screen_y_control;
733 742
@@ -761,7 +770,11 @@ public:
761 } 770 }
762 } tsc; 771 } tsc;
763 772
764 INSERT_PADDING_WORDS(0x3); 773 INSERT_PADDING_WORDS(0x1);
774
775 float polygon_offset_factor;
776
777 INSERT_PADDING_WORDS(0x1);
765 778
766 struct { 779 struct {
767 u32 tic_address_high; 780 u32 tic_address_high;
@@ -786,7 +799,9 @@ public:
786 799
787 u32 framebuffer_srgb; 800 u32 framebuffer_srgb;
788 801
789 INSERT_PADDING_WORDS(0x12); 802 float polygon_offset_units;
803
804 INSERT_PADDING_WORDS(0x11);
790 805
791 union { 806 union {
792 BitField<2, 1, u32> coord_origin; 807 BitField<2, 1, u32> coord_origin;
@@ -863,7 +878,9 @@ public:
863 878
864 INSERT_PADDING_WORDS(0x7); 879 INSERT_PADDING_WORDS(0x7);
865 880
866 INSERT_PADDING_WORDS(0x20); 881 INSERT_PADDING_WORDS(0x1F);
882
883 float polygon_offset_clamp;
867 884
868 struct { 885 struct {
869 u32 is_instanced[NumVertexArrays]; 886 u32 is_instanced[NumVertexArrays];
@@ -879,7 +896,13 @@ public:
879 896
880 Cull cull; 897 Cull cull;
881 898
882 INSERT_PADDING_WORDS(0x28); 899 u32 pixel_center_integer;
900
901 INSERT_PADDING_WORDS(0x1);
902
903 u32 viewport_transform_enabled;
904
905 INSERT_PADDING_WORDS(0x25);
883 906
884 struct { 907 struct {
885 u32 enable; 908 u32 enable;
@@ -1044,6 +1067,11 @@ public:
1044 1067
1045 struct DirtyFlags { 1068 struct DirtyFlags {
1046 bool vertex_attrib_format = true; 1069 bool vertex_attrib_format = true;
1070 u32 vertex_array = 0xFFFFFFFF;
1071
1072 void OnMemoryWrite() {
1073 vertex_array = 0xFFFFFFFF;
1074 }
1047 }; 1075 };
1048 1076
1049 DirtyFlags dirty_flags; 1077 DirtyFlags dirty_flags;
@@ -1136,6 +1164,9 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
1136ASSERT_REG_POSITION(clear_color[0], 0x360); 1164ASSERT_REG_POSITION(clear_color[0], 0x360);
1137ASSERT_REG_POSITION(clear_depth, 0x364); 1165ASSERT_REG_POSITION(clear_depth, 0x364);
1138ASSERT_REG_POSITION(clear_stencil, 0x368); 1166ASSERT_REG_POSITION(clear_stencil, 0x368);
1167ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1168ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1169ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
1139ASSERT_REG_POSITION(scissor_test, 0x380); 1170ASSERT_REG_POSITION(scissor_test, 0x380);
1140ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); 1171ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1141ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1172ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
@@ -1174,6 +1205,7 @@ ASSERT_REG_POSITION(point_size, 0x546);
1174ASSERT_REG_POSITION(zeta_enable, 0x54E); 1205ASSERT_REG_POSITION(zeta_enable, 0x54E);
1175ASSERT_REG_POSITION(multisample_control, 0x54F); 1206ASSERT_REG_POSITION(multisample_control, 0x54F);
1176ASSERT_REG_POSITION(tsc, 0x557); 1207ASSERT_REG_POSITION(tsc, 0x557);
1208ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
1177ASSERT_REG_POSITION(tic, 0x55D); 1209ASSERT_REG_POSITION(tic, 0x55D);
1178ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); 1210ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
1179ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); 1211ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1181,13 +1213,17 @@ ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
1181ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); 1213ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
1182ASSERT_REG_POSITION(stencil_back_func_func, 0x569); 1214ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
1183ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); 1215ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
1216ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
1184ASSERT_REG_POSITION(point_coord_replace, 0x581); 1217ASSERT_REG_POSITION(point_coord_replace, 0x581);
1185ASSERT_REG_POSITION(code_address, 0x582); 1218ASSERT_REG_POSITION(code_address, 0x582);
1186ASSERT_REG_POSITION(draw, 0x585); 1219ASSERT_REG_POSITION(draw, 0x585);
1187ASSERT_REG_POSITION(primitive_restart, 0x591); 1220ASSERT_REG_POSITION(primitive_restart, 0x591);
1188ASSERT_REG_POSITION(index_array, 0x5F2); 1221ASSERT_REG_POSITION(index_array, 0x5F2);
1222ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1189ASSERT_REG_POSITION(instanced_arrays, 0x620); 1223ASSERT_REG_POSITION(instanced_arrays, 0x620);
1190ASSERT_REG_POSITION(cull, 0x646); 1224ASSERT_REG_POSITION(cull, 0x646);
1225ASSERT_REG_POSITION(pixel_center_integer, 0x649);
1226ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
1191ASSERT_REG_POSITION(logic_op, 0x671); 1227ASSERT_REG_POSITION(logic_op, 0x671);
1192ASSERT_REG_POSITION(clear_buffers, 0x674); 1228ASSERT_REG_POSITION(clear_buffers, 0x674);
1193ASSERT_REG_POSITION(color_mask, 0x680); 1229ASSERT_REG_POSITION(color_mask, 0x680);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index b8a78cf82..a34e884fe 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,7 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
5#include "core/memory.h" 6#include "core/memory.h"
7#include "video_core/engines/maxwell_3d.h"
6#include "video_core/engines/maxwell_dma.h" 8#include "video_core/engines/maxwell_dma.h"
7#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
8#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
@@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
54 return; 56 return;
55 } 57 }
56 58
59 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
61
57 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
58 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
59 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 64 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 075192c3f..46a6c0308 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
76 return std::make_tuple(uploaded_ptr, uploaded_offset); 76 return std::make_tuple(uploaded_ptr, uploaded_offset);
77} 77}
78 78
79void OGLBufferCache::Map(std::size_t max_size) { 79bool OGLBufferCache::Map(std::size_t max_size) {
80 bool invalidate; 80 bool invalidate;
81 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 81 std::tie(buffer_ptr, buffer_offset_base, invalidate) =
82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); 82 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
85 if (invalidate) { 85 if (invalidate) {
86 InvalidateAll(); 86 InvalidateAll();
87 } 87 }
88 return invalidate;
88} 89}
89 90
90void OGLBufferCache::Unmap() { 91void OGLBufferCache::Unmap() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 91fca3f6c..c11acfb79 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -50,7 +50,7 @@ public:
50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset. 50 /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); 51 std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
52 52
53 void Map(std::size_t max_size); 53 bool Map(std::size_t max_size);
54 void Unmap(); 54 void Unmap();
55 55
56 GLuint GetHandle() const; 56 GLuint GetHandle() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 630a58e49..98fb5a9aa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
176 } 176 }
177 state.draw.vertex_array = VAO.handle; 177 state.draw.vertex_array = VAO.handle;
178 state.ApplyVertexBufferState(); 178 state.ApplyVertexBufferState();
179
180 // Rebinding the VAO invalidates the vertex buffer bindings.
181 gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
179} 182}
180 183
181void RasterizerOpenGL::SetupVertexBuffer() { 184void RasterizerOpenGL::SetupVertexBuffer() {
182 MICROPROFILE_SCOPE(OpenGL_VB); 185 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
183 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
184 const auto& regs = gpu.regs; 186 const auto& regs = gpu.regs;
185 187
188 if (!gpu.dirty_flags.vertex_array)
189 return;
190
191 MICROPROFILE_SCOPE(OpenGL_VB);
192
186 // Upload all guest vertex arrays sequentially to our buffer 193 // Upload all guest vertex arrays sequentially to our buffer
187 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 194 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
195 if (~gpu.dirty_flags.vertex_array & (1u << index))
196 continue;
197
188 const auto& vertex_array = regs.vertex_array[index]; 198 const auto& vertex_array = regs.vertex_array[index];
189 if (!vertex_array.IsEnabled()) 199 if (!vertex_array.IsEnabled())
190 continue; 200 continue;
@@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
211 221
212 // Implicit set by glBindVertexBuffer. Stupid glstate handling... 222 // Implicit set by glBindVertexBuffer. Stupid glstate handling...
213 state.draw.vertex_buffer = buffer_cache.GetHandle(); 223 state.draw.vertex_buffer = buffer_cache.GetHandle();
224
225 gpu.dirty_flags.vertex_array = 0;
214} 226}
215 227
216DrawParameters RasterizerOpenGL::SetupDraw() { 228DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() {
600 return; 612 return;
601 613
602 MICROPROFILE_SCOPE(OpenGL_Drawing); 614 MICROPROFILE_SCOPE(OpenGL_Drawing);
603 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 615 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
604 const auto& regs = gpu.regs; 616 const auto& regs = gpu.regs;
605 617
606 ScopeAcquireGLContext acquire_context{emu_window}; 618 ScopeAcquireGLContext acquire_context{emu_window};
@@ -620,7 +632,7 @@ void RasterizerOpenGL::DrawArrays() {
620 SyncTransformFeedback(); 632 SyncTransformFeedback();
621 SyncPointState(); 633 SyncPointState();
622 CheckAlphaTests(); 634 CheckAlphaTests();
623 635 SyncPolygonOffset();
624 // TODO(bunnei): Sync framebuffer_scale uniform here 636 // TODO(bunnei): Sync framebuffer_scale uniform here
625 // TODO(bunnei): Sync scissorbox uniform(s) here 637 // TODO(bunnei): Sync scissorbox uniform(s) here
626 638
@@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() {
653 // Add space for at least 18 constant buffers 665 // Add space for at least 18 constant buffers
654 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 666 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
655 667
656 buffer_cache.Map(buffer_size); 668 bool invalidate = buffer_cache.Map(buffer_size);
669 if (invalidate) {
670 // As all cached buffers are invalidated, we need to recheck their state.
671 gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
672 }
657 673
658 SetupVertexFormat(); 674 SetupVertexFormat();
659 SetupVertexBuffer(); 675 SetupVertexBuffer();
@@ -969,13 +985,25 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
969 985
970void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 986void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
971 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 987 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
972 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) { 988 const bool geometry_shaders_enabled =
973 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 989 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
990 const std::size_t viewport_count =
991 geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
992 for (std::size_t i = 0; i < viewport_count; i++) {
974 auto& viewport = current_state.viewports[i]; 993 auto& viewport = current_state.viewports[i];
975 viewport.x = viewport_rect.left; 994 const auto& src = regs.viewports[i];
976 viewport.y = viewport_rect.bottom; 995 if (regs.viewport_transform_enabled) {
977 viewport.width = viewport_rect.GetWidth(); 996 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
978 viewport.height = viewport_rect.GetHeight(); 997 viewport.x = viewport_rect.left;
998 viewport.y = viewport_rect.bottom;
999 viewport.width = viewport_rect.GetWidth();
1000 viewport.height = viewport_rect.GetHeight();
1001 } else {
1002 viewport.x = src.x;
1003 viewport.y = src.y;
1004 viewport.width = src.width;
1005 viewport.height = src.height;
1006 }
979 viewport.depth_range_far = regs.viewports[i].depth_range_far; 1007 viewport.depth_range_far = regs.viewports[i].depth_range_far;
980 viewport.depth_range_near = regs.viewports[i].depth_range_near; 1008 viewport.depth_range_near = regs.viewports[i].depth_range_near;
981 } 1009 }
@@ -1149,7 +1177,11 @@ void RasterizerOpenGL::SyncLogicOpState() {
1149 1177
1150void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { 1178void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1151 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1179 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1152 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) { 1180 const bool geometry_shaders_enabled =
1181 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
1182 const std::size_t viewport_count =
1183 geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
1184 for (std::size_t i = 0; i < viewport_count; i++) {
1153 const auto& src = regs.scissor_test[i]; 1185 const auto& src = regs.scissor_test[i];
1154 auto& dst = current_state.viewports[i].scissor; 1186 auto& dst = current_state.viewports[i].scissor;
1155 dst.enabled = (src.enable != 0); 1187 dst.enabled = (src.enable != 0);
@@ -1179,6 +1211,16 @@ void RasterizerOpenGL::SyncPointState() {
1179 state.point.size = regs.point_size; 1211 state.point.size = regs.point_size;
1180} 1212}
1181 1213
1214void RasterizerOpenGL::SyncPolygonOffset() {
1215 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1216 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1217 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1218 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1219 state.polygon_offset.units = regs.polygon_offset_units;
1220 state.polygon_offset.factor = regs.polygon_offset_factor;
1221 state.polygon_offset.clamp = regs.polygon_offset_clamp;
1222}
1223
1182void RasterizerOpenGL::CheckAlphaTests() { 1224void RasterizerOpenGL::CheckAlphaTests() {
1183 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1225 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1184 1226
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f4354289c..dfb4616f2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -183,6 +183,9 @@ private:
183 /// Syncs Color Mask 183 /// Syncs Color Mask
184 void SyncColorMask(); 184 void SyncColorMask();
185 185
186 /// Syncs the polygon offsets
187 void SyncPolygonOffset();
188
186 /// Check asserts for alpha testing. 189 /// Check asserts for alpha testing.
187 void CheckAlphaTests(); 190 void CheckAlphaTests();
188 191
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 05fe2d370..7c0935a4e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -867,7 +867,8 @@ private:
867 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 867 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
868 // shader. 868 // shader.
869 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); 869 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
870 return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))"; 870 // Config pack's first value is instance_id.
871 return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))";
871 case Attribute::Index::FrontFacing: 872 case Attribute::Index::FrontFacing:
872 // TODO(Subv): Find out what the values are for the other elements. 873 // TODO(Subv): Find out what the values are for the other elements.
873 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 874 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
@@ -3400,6 +3401,10 @@ private:
3400 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, 3401 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
3401 1); 3402 1);
3402 } 3403 }
3404 if (instr.generates_cc.Value() != 0) {
3405 regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate);
3406 LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete");
3407 }
3403 break; 3408 break;
3404 } 3409 }
3405 case OpCode::Type::IntegerSet: { 3410 case OpCode::Type::IntegerSet: {
@@ -3653,6 +3658,11 @@ private:
3653 regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); 3658 regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
3654 break; 3659 break;
3655 } 3660 }
3661 case Tegra::Shader::SystemVariable::Ydirection: {
3662 // Config pack's third value is Y_NEGATE's state.
3663 regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1);
3664 break;
3665 }
3656 default: { 3666 default: {
3657 UNIMPLEMENTED_MSG("Unhandled system move: {}", 3667 UNIMPLEMENTED_MSG("Unhandled system move: {}",
3658 static_cast<u32>(instr.sys20.Value())); 3668 static_cast<u32>(instr.sys20.Value()));
@@ -3676,11 +3686,17 @@ private:
3676 "BRA with constant buffers are not implemented"); 3686 "BRA with constant buffers are not implemented");
3677 3687
3678 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 3688 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3679 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3680 "BRA condition code used: {}", static_cast<u32>(cc));
3681
3682 const u32 target = offset + instr.bra.GetBranchTarget(); 3689 const u32 target = offset + instr.bra.GetBranchTarget();
3683 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); 3690 if (cc != Tegra::Shader::ConditionCode::T) {
3691 const std::string condition_code = regs.GetConditionCode(cc);
3692 shader.AddLine("if (" + condition_code + "){");
3693 shader.scope++;
3694 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3695 shader.scope--;
3696 shader.AddLine('}');
3697 } else {
3698 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3699 }
3684 break; 3700 break;
3685 } 3701 }
3686 case OpCode::Id::IPA: { 3702 case OpCode::Id::IPA: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index eea090e52..23ed91e27 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -24,8 +24,7 @@ layout (location = 0) out vec4 position;
24 24
25layout(std140) uniform vs_config { 25layout(std140) uniform vs_config {
26 vec4 viewport_flip; 26 vec4 viewport_flip;
27 uvec4 instance_id; 27 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
28 uvec4 flip_stage;
29 uvec4 alpha_test; 28 uvec4 alpha_test;
30}; 29};
31)"; 30)";
@@ -63,7 +62,8 @@ void main() {
63 out += R"( 62 out += R"(
64 63
65 // Check if the flip stage is VertexB 64 // Check if the flip stage is VertexB
66 if (flip_stage[0] == 1) { 65 // Config pack's second value is flip_stage
66 if (config_pack[1] == 1) {
67 // Viewport can be flipped, which is unsupported by glViewport 67 // Viewport can be flipped, which is unsupported by glViewport
68 position.xy *= viewport_flip.xy; 68 position.xy *= viewport_flip.xy;
69 } 69 }
@@ -71,7 +71,7 @@ void main() {
71 71
72 // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 72 // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
73 // For now, this is here to bring order in lieu of proper emulation 73 // For now, this is here to bring order in lieu of proper emulation
74 if (flip_stage[0] == 1) { 74 if (config_pack[1] == 1) {
75 position.w = 1.0; 75 position.w = 1.0;
76 } 76 }
77} 77}
@@ -101,8 +101,7 @@ layout (location = 0) out vec4 position;
101 101
102layout (std140) uniform gs_config { 102layout (std140) uniform gs_config {
103 vec4 viewport_flip; 103 vec4 viewport_flip;
104 uvec4 instance_id; 104 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
105 uvec4 flip_stage;
106 uvec4 alpha_test; 105 uvec4 alpha_test;
107}; 106};
108 107
@@ -139,8 +138,7 @@ layout (location = 0) in vec4 position;
139 138
140layout (std140) uniform fs_config { 139layout (std140) uniform fs_config {
141 vec4 viewport_flip; 140 vec4 viewport_flip;
142 uvec4 instance_id; 141 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
143 uvec4 flip_stage;
144 uvec4 alpha_test; 142 uvec4 alpha_test;
145}; 143};
146 144
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 8b8869ecb..6a30c28d2 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -27,16 +27,18 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
27 alpha_test.func = func; 27 alpha_test.func = func;
28 alpha_test.ref = regs.alpha_test_ref; 28 alpha_test.ref = regs.alpha_test_ref;
29 29
30 // We only assign the instance to the first component of the vector, the rest is just padding. 30 instance_id = state.current_instance;
31 instance_id[0] = state.current_instance;
32 31
33 // Assign in which stage the position has to be flipped 32 // Assign in which stage the position has to be flipped
34 // (the last stage before the fragment shader). 33 // (the last stage before the fragment shader).
35 if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { 34 if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
36 flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); 35 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
37 } else { 36 } else {
38 flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); 37 flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
39 } 38 }
39
40 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
41 y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
40} 42}
41 43
42} // namespace OpenGL::GLShader 44} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 9a5d7e289..b757f5f44 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -21,8 +21,11 @@ using Tegra::Engines::Maxwell3D;
21struct MaxwellUniformData { 21struct MaxwellUniformData {
22 void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); 22 void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
23 alignas(16) GLvec4 viewport_flip; 23 alignas(16) GLvec4 viewport_flip;
24 alignas(16) GLuvec4 instance_id; 24 struct alignas(16) {
25 alignas(16) GLuvec4 flip_stage; 25 GLuint instance_id;
26 GLuint flip_stage;
27 GLfloat y_direction;
28 };
26 struct alignas(16) { 29 struct alignas(16) {
27 GLuint enabled; 30 GLuint enabled;
28 GLuint func; 31 GLuint func;
@@ -30,7 +33,7 @@ struct MaxwellUniformData {
30 GLuint padding; 33 GLuint padding;
31 } alpha_test; 34 } alpha_test;
32}; 35};
33static_assert(sizeof(MaxwellUniformData) == 64, "MaxwellUniformData structure size is incorrect"); 36static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
34static_assert(sizeof(MaxwellUniformData) < 16384, 37static_assert(sizeof(MaxwellUniformData) < 16384,
35 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); 38 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
36 39
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 934f4db78..b3bfad6a0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -92,6 +92,13 @@ OpenGLState::OpenGLState() {
92 92
93 point.size = 1; 93 point.size = 1;
94 fragment_color_clamp.enabled = false; 94 fragment_color_clamp.enabled = false;
95
96 polygon_offset.fill_enable = false;
97 polygon_offset.line_enable = false;
98 polygon_offset.point_enable = false;
99 polygon_offset.factor = 0.0f;
100 polygon_offset.units = 0.0f;
101 polygon_offset.clamp = 0.0f;
95} 102}
96 103
97void OpenGLState::ApplyDefaultState() { 104void OpenGLState::ApplyDefaultState() {
@@ -406,6 +413,55 @@ void OpenGLState::ApplyLogicOp() const {
406 } 413 }
407} 414}
408 415
416void OpenGLState::ApplyPolygonOffset() const {
417
418 const bool fill_enable_changed =
419 polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
420 const bool line_enable_changed =
421 polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
422 const bool point_enable_changed =
423 polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
424 const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
425 const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
426 const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
427
428 if (fill_enable_changed) {
429 if (polygon_offset.fill_enable) {
430 glEnable(GL_POLYGON_OFFSET_FILL);
431 } else {
432 glDisable(GL_POLYGON_OFFSET_FILL);
433 }
434 }
435
436 if (line_enable_changed) {
437 if (polygon_offset.line_enable) {
438 glEnable(GL_POLYGON_OFFSET_LINE);
439 } else {
440 glDisable(GL_POLYGON_OFFSET_LINE);
441 }
442 }
443
444 if (point_enable_changed) {
445 if (polygon_offset.point_enable) {
446 glEnable(GL_POLYGON_OFFSET_POINT);
447 } else {
448 glDisable(GL_POLYGON_OFFSET_POINT);
449 }
450 }
451
452 if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
453 (factor_changed || units_changed || clamp_changed)) {
454
455 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
456 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
457 } else {
458 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
459 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
460 "Unimplemented Depth polygon offset clamp.");
461 }
462 }
463}
464
409void OpenGLState::ApplyTextures() const { 465void OpenGLState::ApplyTextures() const {
410 for (std::size_t i = 0; i < std::size(texture_units); ++i) { 466 for (std::size_t i = 0; i < std::size(texture_units); ++i) {
411 const auto& texture_unit = texture_units[i]; 467 const auto& texture_unit = texture_units[i];
@@ -532,6 +588,7 @@ void OpenGLState::Apply() const {
532 ApplyLogicOp(); 588 ApplyLogicOp();
533 ApplyTextures(); 589 ApplyTextures();
534 ApplySamplers(); 590 ApplySamplers();
591 ApplyPolygonOffset();
535 cur_state = *this; 592 cur_state = *this;
536} 593}
537 594
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 032fc43f0..0bf19ed07 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -176,6 +176,15 @@ public:
176 float size; // GL_POINT_SIZE 176 float size; // GL_POINT_SIZE
177 } point; 177 } point;
178 178
179 struct {
180 bool point_enable;
181 bool line_enable;
182 bool fill_enable;
183 GLfloat units;
184 GLfloat factor;
185 GLfloat clamp;
186 } polygon_offset;
187
179 std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE 188 std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE
180 189
181 OpenGLState(); 190 OpenGLState();
@@ -226,6 +235,7 @@ private:
226 void ApplyLogicOp() const; 235 void ApplyLogicOp() const;
227 void ApplyTextures() const; 236 void ApplyTextures() const;
228 void ApplySamplers() const; 237 void ApplySamplers() const;
238 void ApplyPolygonOffset() const;
229}; 239};
230 240
231} // namespace OpenGL 241} // namespace OpenGL