summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp48
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h2
-rw-r--r--src/core/hle/service/vi/vi.cpp46
-rw-r--r--src/video_core/engines/maxwell_3d.h77
-rw-r--r--src/video_core/engines/shader_bytecode.h57
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp4
-rw-r--r--src/video_core/shader/decode/conversion.cpp113
-rw-r--r--src/video_core/shader/decode/texture.cpp14
-rw-r--r--src/video_core/shader/decode/video.cpp58
-rw-r--r--src/video_core/shader/shader_ir.h3
-rw-r--r--src/video_core/texture_cache/texture_cache.h66
-rw-r--r--src/video_core/textures/astc.cpp241
-rw-r--r--src/video_core/textures/texture.h15
-rw-r--r--src/yuzu/game_list.cpp6
-rw-r--r--src/yuzu/game_list_p.h24
21 files changed, 623 insertions, 200 deletions
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 32b6f4b27..f1e3d832a 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -28,6 +28,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
28 buffer.slot = slot; 28 buffer.slot = slot;
29 buffer.igbp_buffer = igbp_buffer; 29 buffer.igbp_buffer = igbp_buffer;
30 buffer.status = Buffer::Status::Free; 30 buffer.status = Buffer::Status::Free;
31 free_buffers.push_back(slot);
31 32
32 queue.emplace_back(buffer); 33 queue.emplace_back(buffer);
33 buffer_wait_event.writable->Signal(); 34 buffer_wait_event.writable->Signal();
@@ -35,16 +36,37 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
35 36
36std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, 37std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
37 u32 height) { 38 u32 height) {
38 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
39 // Only consider free buffers. Buffers become free once again after they've been Acquired
40 // and Released by the compositor, see the NVFlinger::Compose method.
41 if (buffer.status != Buffer::Status::Free) {
42 return false;
43 }
44 39
45 // Make sure that the parameters match. 40 if (free_buffers.empty()) {
46 return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; 41 return {};
47 }); 42 }
43
44 auto f_itr = free_buffers.begin();
45 auto itr = queue.end();
46
47 while (f_itr != free_buffers.end()) {
48 auto slot = *f_itr;
49 itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
50 // Only consider free buffers. Buffers become free once again after they've been
51 // Acquired and Released by the compositor, see the NVFlinger::Compose method.
52 if (buffer.status != Buffer::Status::Free) {
53 return false;
54 }
55
56 if (buffer.slot != slot) {
57 return false;
58 }
59
60 // Make sure that the parameters match.
61 return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height;
62 });
63
64 if (itr != queue.end()) {
65 free_buffers.erase(f_itr);
66 break;
67 }
68 ++f_itr;
69 }
48 70
49 if (itr == queue.end()) { 71 if (itr == queue.end()) {
50 return {}; 72 return {};
@@ -99,10 +121,18 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
99 ASSERT(itr != queue.end()); 121 ASSERT(itr != queue.end());
100 ASSERT(itr->status == Buffer::Status::Acquired); 122 ASSERT(itr->status == Buffer::Status::Acquired);
101 itr->status = Buffer::Status::Free; 123 itr->status = Buffer::Status::Free;
124 free_buffers.push_back(slot);
102 125
103 buffer_wait_event.writable->Signal(); 126 buffer_wait_event.writable->Signal();
104} 127}
105 128
129void BufferQueue::Disconnect() {
130 queue.clear();
131 queue_sequence.clear();
132 id = 1;
133 layer_id = 1;
134}
135
106u32 BufferQueue::Query(QueryType type) { 136u32 BufferQueue::Query(QueryType type) {
107 LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); 137 LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type));
108 138
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index f4bbfd945..d5f31e567 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -87,6 +87,7 @@ public:
87 Service::Nvidia::MultiFence& multi_fence); 87 Service::Nvidia::MultiFence& multi_fence);
88 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 88 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
89 void ReleaseBuffer(u32 slot); 89 void ReleaseBuffer(u32 slot);
90 void Disconnect();
90 u32 Query(QueryType type); 91 u32 Query(QueryType type);
91 92
92 u32 GetId() const { 93 u32 GetId() const {
@@ -101,6 +102,7 @@ private:
101 u32 id; 102 u32 id;
102 u64 layer_id; 103 u64 layer_id;
103 104
105 std::list<u32> free_buffers;
104 std::vector<Buffer> queue; 106 std::vector<Buffer> queue;
105 std::list<u32> queue_sequence; 107 std::list<u32> queue_sequence;
106 Kernel::EventPair buffer_wait_event; 108 Kernel::EventPair buffer_wait_event;
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 519da74e0..fdc62d05b 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -513,7 +513,8 @@ private:
513 513
514 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 514 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
515 515
516 if (transaction == TransactionId::Connect) { 516 switch (transaction) {
517 case TransactionId::Connect: {
517 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 518 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
518 IGBPConnectResponseParcel response{ 519 IGBPConnectResponseParcel response{
519 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * 520 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
@@ -521,14 +522,18 @@ private:
521 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * 522 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
522 Settings::values.resolution_factor)}; 523 Settings::values.resolution_factor)};
523 ctx.WriteBuffer(response.Serialize()); 524 ctx.WriteBuffer(response.Serialize());
524 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 525 break;
526 }
527 case TransactionId::SetPreallocatedBuffer: {
525 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 528 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
526 529
527 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); 530 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
528 531
529 IGBPSetPreallocatedBufferResponseParcel response{}; 532 IGBPSetPreallocatedBufferResponseParcel response{};
530 ctx.WriteBuffer(response.Serialize()); 533 ctx.WriteBuffer(response.Serialize());
531 } else if (transaction == TransactionId::DequeueBuffer) { 534 break;
535 }
536 case TransactionId::DequeueBuffer: {
532 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 537 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
533 const u32 width{request.data.width}; 538 const u32 width{request.data.width};
534 const u32 height{request.data.height}; 539 const u32 height{request.data.height};
@@ -556,14 +561,18 @@ private:
556 }, 561 },
557 buffer_queue.GetWritableBufferWaitEvent()); 562 buffer_queue.GetWritableBufferWaitEvent());
558 } 563 }
559 } else if (transaction == TransactionId::RequestBuffer) { 564 break;
565 }
566 case TransactionId::RequestBuffer: {
560 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 567 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
561 568
562 auto& buffer = buffer_queue.RequestBuffer(request.slot); 569 auto& buffer = buffer_queue.RequestBuffer(request.slot);
563 570
564 IGBPRequestBufferResponseParcel response{buffer}; 571 IGBPRequestBufferResponseParcel response{buffer};
565 ctx.WriteBuffer(response.Serialize()); 572 ctx.WriteBuffer(response.Serialize());
566 } else if (transaction == TransactionId::QueueBuffer) { 573 break;
574 }
575 case TransactionId::QueueBuffer: {
567 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 576 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
568 577
569 buffer_queue.QueueBuffer(request.data.slot, request.data.transform, 578 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
@@ -572,7 +581,9 @@ private:
572 581
573 IGBPQueueBufferResponseParcel response{1280, 720}; 582 IGBPQueueBufferResponseParcel response{1280, 720};
574 ctx.WriteBuffer(response.Serialize()); 583 ctx.WriteBuffer(response.Serialize());
575 } else if (transaction == TransactionId::Query) { 584 break;
585 }
586 case TransactionId::Query: {
576 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 587 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
577 588
578 const u32 value = 589 const u32 value =
@@ -580,15 +591,30 @@ private:
580 591
581 IGBPQueryResponseParcel response{value}; 592 IGBPQueryResponseParcel response{value};
582 ctx.WriteBuffer(response.Serialize()); 593 ctx.WriteBuffer(response.Serialize());
583 } else if (transaction == TransactionId::CancelBuffer) { 594 break;
595 }
596 case TransactionId::CancelBuffer: {
584 LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer"); 597 LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer");
585 } else if (transaction == TransactionId::Disconnect || 598 break;
586 transaction == TransactionId::DetachBuffer) { 599 }
600 case TransactionId::Disconnect: {
601 LOG_WARNING(Service_VI, "(STUBBED) called, transaction=Disconnect");
602 const auto buffer = ctx.ReadBuffer();
603
604 buffer_queue.Disconnect();
605
606 IGBPEmptyResponseParcel response{};
607 ctx.WriteBuffer(response.Serialize());
608 break;
609 }
610 case TransactionId::DetachBuffer: {
587 const auto buffer = ctx.ReadBuffer(); 611 const auto buffer = ctx.ReadBuffer();
588 612
589 IGBPEmptyResponseParcel response{}; 613 IGBPEmptyResponseParcel response{};
590 ctx.WriteBuffer(response.Serialize()); 614 ctx.WriteBuffer(response.Serialize());
591 } else { 615 break;
616 }
617 default:
592 ASSERT_MSG(false, "Unimplemented"); 618 ASSERT_MSG(false, "Unimplemented");
593 } 619 }
594 620
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d24c9f657..2977a7d81 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -312,6 +312,35 @@ public:
312 } 312 }
313 }; 313 };
314 314
315 struct MsaaSampleLocation {
316 union {
317 BitField<0, 4, u32> x0;
318 BitField<4, 4, u32> y0;
319 BitField<8, 4, u32> x1;
320 BitField<12, 4, u32> y1;
321 BitField<16, 4, u32> x2;
322 BitField<20, 4, u32> y2;
323 BitField<24, 4, u32> x3;
324 BitField<28, 4, u32> y3;
325 };
326
327 constexpr std::pair<u32, u32> Location(int index) const {
328 switch (index) {
329 case 0:
330 return {x0, y0};
331 case 1:
332 return {x1, y1};
333 case 2:
334 return {x2, y2};
335 case 3:
336 return {x3, y3};
337 default:
338 UNREACHABLE();
339 return {0, 0};
340 }
341 }
342 };
343
315 enum class DepthMode : u32 { 344 enum class DepthMode : u32 {
316 MinusOneToOne = 0, 345 MinusOneToOne = 0,
317 ZeroToOne = 1, 346 ZeroToOne = 1,
@@ -793,7 +822,13 @@ public:
793 822
794 u32 rt_separate_frag_data; 823 u32 rt_separate_frag_data;
795 824
796 INSERT_UNION_PADDING_WORDS(0xC); 825 INSERT_UNION_PADDING_WORDS(0x1);
826
827 u32 multisample_raster_enable;
828 u32 multisample_raster_samples;
829 std::array<u32, 4> multisample_sample_mask;
830
831 INSERT_UNION_PADDING_WORDS(0x5);
797 832
798 struct { 833 struct {
799 u32 address_high; 834 u32 address_high;
@@ -830,7 +865,16 @@ public:
830 865
831 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 866 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
832 867
833 INSERT_UNION_PADDING_WORDS(0xF); 868 std::array<MsaaSampleLocation, 4> multisample_sample_locations;
869
870 INSERT_UNION_PADDING_WORDS(0x2);
871
872 union {
873 BitField<0, 1, u32> enable;
874 BitField<4, 3, u32> target;
875 } multisample_coverage_to_color;
876
877 INSERT_UNION_PADDING_WORDS(0x8);
834 878
835 struct { 879 struct {
836 union { 880 union {
@@ -922,7 +966,10 @@ public:
922 BitField<4, 1, u32> triangle_rast_flip; 966 BitField<4, 1, u32> triangle_rast_flip;
923 } screen_y_control; 967 } screen_y_control;
924 968
925 INSERT_UNION_PADDING_WORDS(0x21); 969 float line_width_smooth;
970 float line_width_aliased;
971
972 INSERT_UNION_PADDING_WORDS(0x1F);
926 973
927 u32 vb_element_base; 974 u32 vb_element_base;
928 u32 vb_base_instance; 975 u32 vb_base_instance;
@@ -943,7 +990,7 @@ public:
943 990
944 CounterReset counter_reset; 991 CounterReset counter_reset;
945 992
946 INSERT_UNION_PADDING_WORDS(0x1); 993 u32 multisample_enable;
947 994
948 u32 zeta_enable; 995 u32 zeta_enable;
949 996
@@ -980,7 +1027,7 @@ public:
980 1027
981 float polygon_offset_factor; 1028 float polygon_offset_factor;
982 1029
983 INSERT_UNION_PADDING_WORDS(0x1); 1030 u32 line_smooth_enable;
984 1031
985 struct { 1032 struct {
986 u32 tic_address_high; 1033 u32 tic_address_high;
@@ -1007,7 +1054,11 @@ public:
1007 1054
1008 float polygon_offset_units; 1055 float polygon_offset_units;
1009 1056
1010 INSERT_UNION_PADDING_WORDS(0x11); 1057 INSERT_UNION_PADDING_WORDS(0x4);
1058
1059 Tegra::Texture::MsaaMode multisample_mode;
1060
1061 INSERT_UNION_PADDING_WORDS(0xC);
1011 1062
1012 union { 1063 union {
1013 BitField<2, 1, u32> coord_origin; 1064 BitField<2, 1, u32> coord_origin;
@@ -1507,12 +1558,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1507ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1558ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
1508ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); 1559ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1509ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1560ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1510ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1511ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1561ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1562ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1563ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
1564ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
1565ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
1512ASSERT_REG_POSITION(zeta, 0x3F8); 1566ASSERT_REG_POSITION(zeta, 0x3F8);
1513ASSERT_REG_POSITION(clear_flags, 0x43E); 1567ASSERT_REG_POSITION(clear_flags, 0x43E);
1514ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1568ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1515ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1569ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1570ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
1571ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
1516ASSERT_REG_POSITION(rt_control, 0x487); 1572ASSERT_REG_POSITION(rt_control, 0x487);
1517ASSERT_REG_POSITION(zeta_width, 0x48a); 1573ASSERT_REG_POSITION(zeta_width, 0x48a);
1518ASSERT_REG_POSITION(zeta_height, 0x48b); 1574ASSERT_REG_POSITION(zeta_height, 0x48b);
@@ -1538,6 +1594,8 @@ ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6);
1538ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); 1594ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
1539ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); 1595ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
1540ASSERT_REG_POSITION(screen_y_control, 0x4EB); 1596ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1597ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
1598ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
1541ASSERT_REG_POSITION(vb_element_base, 0x50D); 1599ASSERT_REG_POSITION(vb_element_base, 0x50D);
1542ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1600ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1543ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1601ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
@@ -1545,11 +1603,13 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545);
1545ASSERT_REG_POSITION(point_size, 0x546); 1603ASSERT_REG_POSITION(point_size, 0x546);
1546ASSERT_REG_POSITION(point_sprite_enable, 0x548); 1604ASSERT_REG_POSITION(point_sprite_enable, 0x548);
1547ASSERT_REG_POSITION(counter_reset, 0x54C); 1605ASSERT_REG_POSITION(counter_reset, 0x54C);
1606ASSERT_REG_POSITION(multisample_enable, 0x54D);
1548ASSERT_REG_POSITION(zeta_enable, 0x54E); 1607ASSERT_REG_POSITION(zeta_enable, 0x54E);
1549ASSERT_REG_POSITION(multisample_control, 0x54F); 1608ASSERT_REG_POSITION(multisample_control, 0x54F);
1550ASSERT_REG_POSITION(condition, 0x554); 1609ASSERT_REG_POSITION(condition, 0x554);
1551ASSERT_REG_POSITION(tsc, 0x557); 1610ASSERT_REG_POSITION(tsc, 0x557);
1552ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); 1611ASSERT_REG_POSITION(polygon_offset_factor, 0x55B);
1612ASSERT_REG_POSITION(line_smooth_enable, 0x55C);
1553ASSERT_REG_POSITION(tic, 0x55D); 1613ASSERT_REG_POSITION(tic, 0x55D);
1554ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); 1614ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
1555ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); 1615ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1558,6 +1618,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
1558ASSERT_REG_POSITION(stencil_back_func_func, 0x569); 1618ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
1559ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); 1619ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
1560ASSERT_REG_POSITION(polygon_offset_units, 0x56F); 1620ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
1621ASSERT_REG_POSITION(multisample_mode, 0x574);
1561ASSERT_REG_POSITION(point_coord_replace, 0x581); 1622ASSERT_REG_POSITION(point_coord_replace, 0x581);
1562ASSERT_REG_POSITION(code_address, 0x582); 1623ASSERT_REG_POSITION(code_address, 0x582);
1563ASSERT_REG_POSITION(draw, 0x585); 1624ASSERT_REG_POSITION(draw, 0x585);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 498936f0c..c66c66f6c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -290,6 +290,23 @@ enum class VmadShr : u64 {
290 Shr15 = 2, 290 Shr15 = 2,
291}; 291};
292 292
293enum class VmnmxType : u64 {
294 Bits8,
295 Bits16,
296 Bits32,
297};
298
299enum class VmnmxOperation : u64 {
300 Mrg_16H = 0,
301 Mrg_16L = 1,
302 Mrg_8B0 = 2,
303 Mrg_8B2 = 3,
304 Acc = 4,
305 Min = 5,
306 Max = 6,
307 Nop = 7,
308};
309
293enum class XmadMode : u64 { 310enum class XmadMode : u64 {
294 None = 0, 311 None = 0,
295 CLo = 1, 312 CLo = 1,
@@ -1651,6 +1668,42 @@ union Instruction {
1651 } vmad; 1668 } vmad;
1652 1669
1653 union { 1670 union {
1671 BitField<54, 1, u64> is_dest_signed;
1672 BitField<48, 1, u64> is_src_a_signed;
1673 BitField<49, 1, u64> is_src_b_signed;
1674 BitField<37, 2, u64> src_format_a;
1675 BitField<29, 2, u64> src_format_b;
1676 BitField<56, 1, u64> mx;
1677 BitField<55, 1, u64> sat;
1678 BitField<36, 2, u64> selector_a;
1679 BitField<28, 2, u64> selector_b;
1680 BitField<50, 1, u64> is_op_b_register;
1681 BitField<51, 3, VmnmxOperation> operation;
1682
1683 VmnmxType SourceFormatA() const {
1684 switch (src_format_a) {
1685 case 0b11:
1686 return VmnmxType::Bits32;
1687 case 0b10:
1688 return VmnmxType::Bits16;
1689 default:
1690 return VmnmxType::Bits8;
1691 }
1692 }
1693
1694 VmnmxType SourceFormatB() const {
1695 switch (src_format_b) {
1696 case 0b11:
1697 return VmnmxType::Bits32;
1698 case 0b10:
1699 return VmnmxType::Bits16;
1700 default:
1701 return VmnmxType::Bits8;
1702 }
1703 }
1704 } vmnmx;
1705
1706 union {
1654 BitField<20, 16, u64> imm20_16; 1707 BitField<20, 16, u64> imm20_16;
1655 BitField<35, 1, u64> high_b_rr; // used on RR 1708 BitField<35, 1, u64> high_b_rr; // used on RR
1656 BitField<36, 1, u64> product_shift_left; 1709 BitField<36, 1, u64> product_shift_left;
@@ -1763,6 +1816,7 @@ public:
1763 MEMBAR, 1816 MEMBAR,
1764 VMAD, 1817 VMAD,
1765 VSETP, 1818 VSETP,
1819 VMNMX,
1766 FFMA_IMM, // Fused Multiply and Add 1820 FFMA_IMM, // Fused Multiply and Add
1767 FFMA_CR, 1821 FFMA_CR,
1768 FFMA_RC, 1822 FFMA_RC,
@@ -2070,6 +2124,7 @@ private:
2070 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), 2124 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2071 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), 2125 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2072 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), 2126 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
2127 INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2073 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), 2128 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2074 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), 2129 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2075 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), 2130 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -2170,7 +2225,7 @@ private:
2170 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), 2225 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
2171 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), 2226 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
2172 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), 2227 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
2173 INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), 2228 INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
2174 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), 2229 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
2175 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), 2230 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
2176 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), 2231 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 368f399df..f31d960c7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -345,7 +345,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
345 345
346 texture_cache.GuardRenderTargets(true); 346 texture_cache.GuardRenderTargets(true);
347 347
348 View depth_surface = texture_cache.GetDepthBufferSurface(true); 348 View depth_surface = texture_cache.GetDepthBufferSurface();
349 349
350 const auto& regs = gpu.regs; 350 const auto& regs = gpu.regs;
351 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); 351 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -354,7 +354,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
354 FramebufferCacheKey key; 354 FramebufferCacheKey key;
355 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); 355 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
356 for (std::size_t index = 0; index < colors_count; ++index) { 356 for (std::size_t index = 0; index < colors_count; ++index) {
357 View color_surface{texture_cache.GetColorBufferSurface(index, true)}; 357 View color_surface{texture_cache.GetColorBufferSurface(index)};
358 if (!color_surface) { 358 if (!color_surface) {
359 continue; 359 continue;
360 } 360 }
@@ -387,12 +387,12 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using
387 View color_surface; 387 View color_surface;
388 if (using_color_fb) { 388 if (using_color_fb) {
389 const std::size_t index = regs.clear_buffers.RT; 389 const std::size_t index = regs.clear_buffers.RT;
390 color_surface = texture_cache.GetColorBufferSurface(index, true); 390 color_surface = texture_cache.GetColorBufferSurface(index);
391 texture_cache.MarkColorBufferInUse(index); 391 texture_cache.MarkColorBufferInUse(index);
392 } 392 }
393 View depth_surface; 393 View depth_surface;
394 if (using_depth_fb || using_stencil_fb) { 394 if (using_depth_fb || using_stencil_fb) {
395 depth_surface = texture_cache.GetDepthBufferSurface(true); 395 depth_surface = texture_cache.GetDepthBufferSurface();
396 texture_cache.MarkDepthBufferInUse(); 396 texture_cache.MarkDepthBufferInUse();
397 } 397 }
398 texture_cache.GuardRenderTargets(false); 398 texture_cache.GuardRenderTargets(false);
@@ -496,6 +496,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
496 SyncPrimitiveRestart(); 496 SyncPrimitiveRestart();
497 SyncScissorTest(); 497 SyncScissorTest();
498 SyncPointState(); 498 SyncPointState();
499 SyncLineState();
499 SyncPolygonOffset(); 500 SyncPolygonOffset();
500 SyncAlphaTest(); 501 SyncAlphaTest();
501 SyncFramebufferSRGB(); 502 SyncFramebufferSRGB();
@@ -1311,6 +1312,19 @@ void RasterizerOpenGL::SyncPointState() {
1311 glDisable(GL_PROGRAM_POINT_SIZE); 1312 glDisable(GL_PROGRAM_POINT_SIZE);
1312} 1313}
1313 1314
1315void RasterizerOpenGL::SyncLineState() {
1316 auto& gpu = system.GPU().Maxwell3D();
1317 auto& flags = gpu.dirty.flags;
1318 if (!flags[Dirty::LineWidth]) {
1319 return;
1320 }
1321 flags[Dirty::LineWidth] = false;
1322
1323 const auto& regs = gpu.regs;
1324 oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
1325 glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
1326}
1327
1314void RasterizerOpenGL::SyncPolygonOffset() { 1328void RasterizerOpenGL::SyncPolygonOffset() {
1315 auto& gpu = system.GPU().Maxwell3D(); 1329 auto& gpu = system.GPU().Maxwell3D();
1316 auto& flags = gpu.dirty.flags; 1330 auto& flags = gpu.dirty.flags;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 212dad852..435da4425 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -171,6 +171,9 @@ private:
171 /// Syncs the point state to match the guest state 171 /// Syncs the point state to match the guest state
172 void SyncPointState(); 172 void SyncPointState();
173 173
174 /// Syncs the line state to match the guest state
175 void SyncLineState();
176
174 /// Syncs the rasterizer enable state to match the guest state 177 /// Syncs the rasterizer enable state to match the guest state
175 void SyncRasterizeEnable(); 178 void SyncRasterizeEnable();
176 179
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 160ae4340..1f1f01313 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1819,15 +1819,15 @@ private:
1819 } 1819 }
1820 1820
1821 Expression HMergeH0(Operation operation) { 1821 Expression HMergeH0(Operation operation) {
1822 std::string dest = VisitOperand(operation, 0).AsUint(); 1822 const std::string dest = VisitOperand(operation, 0).AsUint();
1823 std::string src = VisitOperand(operation, 1).AsUint(); 1823 const std::string src = VisitOperand(operation, 1).AsUint();
1824 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; 1824 return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint};
1825 } 1825 }
1826 1826
1827 Expression HMergeH1(Operation operation) { 1827 Expression HMergeH1(Operation operation) {
1828 std::string dest = VisitOperand(operation, 0).AsUint(); 1828 const std::string dest = VisitOperand(operation, 0).AsUint();
1829 std::string src = VisitOperand(operation, 1).AsUint(); 1829 const std::string src = VisitOperand(operation, 1).AsUint();
1830 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; 1830 return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint};
1831 } 1831 }
1832 1832
1833 Expression HPack2(Operation operation) { 1833 Expression HPack2(Operation operation) {
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 255ac3147..d24fad3de 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -185,6 +185,12 @@ void SetupDirtyPointSize(Tables& tables) {
185 tables[0][OFF(point_sprite_enable)] = PointSize; 185 tables[0][OFF(point_sprite_enable)] = PointSize;
186} 186}
187 187
188void SetupDirtyLineWidth(Tables& tables) {
189 tables[0][OFF(line_width_smooth)] = LineWidth;
190 tables[0][OFF(line_width_aliased)] = LineWidth;
191 tables[0][OFF(line_smooth_enable)] = LineWidth;
192}
193
188void SetupDirtyClipControl(Tables& tables) { 194void SetupDirtyClipControl(Tables& tables) {
189 auto& table = tables[0]; 195 auto& table = tables[0];
190 table[OFF(screen_y_control)] = ClipControl; 196 table[OFF(screen_y_control)] = ClipControl;
@@ -233,6 +239,7 @@ void StateTracker::Initialize() {
233 SetupDirtyLogicOp(tables); 239 SetupDirtyLogicOp(tables);
234 SetupDirtyFragmentClampColor(tables); 240 SetupDirtyFragmentClampColor(tables);
235 SetupDirtyPointSize(tables); 241 SetupDirtyPointSize(tables);
242 SetupDirtyLineWidth(tables);
236 SetupDirtyClipControl(tables); 243 SetupDirtyClipControl(tables);
237 SetupDirtyDepthClampEnabled(tables); 244 SetupDirtyDepthClampEnabled(tables);
238 SetupDirtyMisc(tables); 245 SetupDirtyMisc(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index b882d75c3..0f823288e 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -78,6 +78,7 @@ enum : u8 {
78 LogicOp, 78 LogicOp,
79 FragmentClampColor, 79 FragmentClampColor,
80 PointSize, 80 PointSize,
81 LineWidth,
81 ClipControl, 82 ClipControl,
82 DepthClampEnabled, 83 DepthClampEnabled,
83 84
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 36590a6d0..0b4d999d7 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -411,14 +411,13 @@ CachedSurfaceView::~CachedSurfaceView() = default;
411void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { 411void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
412 ASSERT(params.num_levels == 1); 412 ASSERT(params.num_levels == 1);
413 413
414 const GLuint texture = surface.GetTexture();
415 if (params.num_layers > 1) { 414 if (params.num_layers > 1) {
416 // Layered framebuffer attachments 415 // Layered framebuffer attachments
417 UNIMPLEMENTED_IF(params.base_layer != 0); 416 UNIMPLEMENTED_IF(params.base_layer != 0);
418 417
419 switch (params.target) { 418 switch (params.target) {
420 case SurfaceTarget::Texture2DArray: 419 case SurfaceTarget::Texture2DArray:
421 glFramebufferTexture(target, attachment, texture, params.base_level); 420 glFramebufferTexture(target, attachment, GetTexture(), params.base_level);
422 break; 421 break;
423 default: 422 default:
424 UNIMPLEMENTED(); 423 UNIMPLEMENTED();
@@ -427,6 +426,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
427 } 426 }
428 427
429 const GLenum view_target = surface.GetTarget(); 428 const GLenum view_target = surface.GetTarget();
429 const GLuint texture = surface.GetTexture();
430 switch (surface.GetSurfaceParams().target) { 430 switch (surface.GetSurfaceParams().target) {
431 case SurfaceTarget::Texture1D: 431 case SurfaceTarget::Texture1D:
432 glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); 432 glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index af9420c19..33cbc0bb6 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -609,7 +609,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
609 Texceptions texceptions; 609 Texceptions texceptions;
610 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { 610 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
611 if (update_rendertargets) { 611 if (update_rendertargets) {
612 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); 612 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt);
613 } 613 }
614 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { 614 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
615 texceptions[rt] = true; 615 texceptions[rt] = true;
@@ -617,7 +617,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
617 } 617 }
618 618
619 if (update_rendertargets) { 619 if (update_rendertargets) {
620 zeta_attachment = texture_cache.GetDepthBufferSurface(true); 620 zeta_attachment = texture_cache.GetDepthBufferSurface();
621 } 621 }
622 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { 622 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
623 texceptions[ZETA_TEXCEPTION_INDEX] = true; 623 texceptions[ZETA_TEXCEPTION_INDEX] = true;
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index c72690b2b..b9989c88c 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -2,6 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <limits>
6#include <optional>
7#include <utility>
8
5#include "common/assert.h" 9#include "common/assert.h"
6#include "common/common_types.h" 10#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h" 11#include "video_core/engines/shader_bytecode.h"
@@ -15,9 +19,49 @@ using Tegra::Shader::OpCode;
15using Tegra::Shader::Register; 19using Tegra::Shader::Register;
16 20
17namespace { 21namespace {
22
18constexpr OperationCode GetFloatSelector(u64 selector) { 23constexpr OperationCode GetFloatSelector(u64 selector) {
19 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; 24 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
20} 25}
26
27constexpr u32 SizeInBits(Register::Size size) {
28 switch (size) {
29 case Register::Size::Byte:
30 return 8;
31 case Register::Size::Short:
32 return 16;
33 case Register::Size::Word:
34 return 32;
35 case Register::Size::Long:
36 return 64;
37 }
38 return 0;
39}
40
41constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
42 Register::Size dst_size,
43 bool src_signed,
44 bool dst_signed) {
45 const u32 dst_bits = SizeInBits(dst_size);
46 if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
47 if (src_signed == dst_signed) {
48 return std::nullopt;
49 }
50 return std::make_pair(0, std::numeric_limits<s32>::max());
51 }
52 if (dst_signed) {
53 // Signed destination, clamp to [-128, 127] for instance
54 return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
55 } else {
56 // Unsigned destination
57 if (dst_bits == 32) {
58 // Avoid shifting by 32, that is undefined behavior
59 return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
60 }
61 return std::make_pair(0, (1 << dst_bits) - 1);
62 }
63}
64
21} // Anonymous namespace 65} // Anonymous namespace
22 66
23u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { 67u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
@@ -28,14 +72,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
28 case OpCode::Id::I2I_R: 72 case OpCode::Id::I2I_R:
29 case OpCode::Id::I2I_C: 73 case OpCode::Id::I2I_C:
30 case OpCode::Id::I2I_IMM: { 74 case OpCode::Id::I2I_IMM: {
31 UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); 75 const bool src_signed = instr.conversion.is_input_signed;
32 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); 76 const bool dst_signed = instr.conversion.is_output_signed;
33 UNIMPLEMENTED_IF(instr.alu.saturate_d); 77 const Register::Size src_size = instr.conversion.src_size;
78 const Register::Size dst_size = instr.conversion.dst_size;
79 const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
34 80
35 const bool input_signed = instr.conversion.is_input_signed; 81 Node value = [this, instr, opcode] {
36 const bool output_signed = instr.conversion.is_output_signed;
37
38 Node value = [&]() {
39 switch (opcode->get().GetId()) { 82 switch (opcode->get().GetId()) {
40 case OpCode::Id::I2I_R: 83 case OpCode::Id::I2I_R:
41 return GetRegister(instr.gpr20); 84 return GetRegister(instr.gpr20);
@@ -48,16 +91,60 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
48 return Immediate(0); 91 return Immediate(0);
49 } 92 }
50 }(); 93 }();
51 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
52 94
53 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, 95 // Ensure the source selector is valid
54 input_signed); 96 switch (instr.conversion.src_size) {
55 if (input_signed != output_signed) { 97 case Register::Size::Byte:
56 value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); 98 break;
99 case Register::Size::Short:
100 ASSERT(selector == 0 || selector == 2);
101 break;
102 default:
103 ASSERT(selector == 0);
104 break;
105 }
106
107 if (src_size != Register::Size::Word || selector != 0) {
108 value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
109 Immediate(selector * 8), Immediate(SizeInBits(src_size)));
110 }
111
112 value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
113 instr.conversion.negate_a, src_signed);
114
115 if (instr.alu.saturate_d) {
116 if (src_signed && !dst_signed) {
117 Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
118 Immediate(1 << (SizeInBits(src_size) - 1)));
119 value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
120 std::move(value));
121
122 // Simplify generated expressions, this can be removed without semantic impact
123 SetTemporary(bb, 0, std::move(value));
124 value = GetTemporary(0);
125
126 if (dst_size != Register::Size::Word) {
127 const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
128 Node is_large =
129 Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
130 value = Operation(OperationCode::Select, std::move(is_large), limit,
131 std::move(value));
132 }
133 } else if (const std::optional bounds =
134 IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
135 value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
136 Immediate(bounds->first));
137 value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
138 Immediate(bounds->second));
139 }
140 } else if (dst_size != Register::Size::Word) {
141 // No saturation, we only have to mask the result
142 Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
143 value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
57 } 144 }
58 145
59 SetInternalFlagsFromInteger(bb, value, instr.generates_cc); 146 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
60 SetRegister(bb, instr.gpr0, value); 147 SetRegister(bb, instr.gpr0, std::move(value));
61 break; 148 break;
62 } 149 }
63 case OpCode::Id::I2F_R: 150 case OpCode::Id::I2F_R:
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 48350e042..6c4a1358b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
780 // When lod is used always is in gpr20 780 // When lod is used always is in gpr20
781 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); 781 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
782 782
783 // Fill empty entries from the guest sampler
784 const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
785 if (type_coord_count != entry_coord_count) {
786 LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
787
788 // When the size is higher we insert zeroes
789 for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
790 coords.push_back(GetRegister(Register::ZeroIndex));
791 }
792
793 // Then we ensure the size matches the number of entries (dropping unused values)
794 coords.resize(entry_coord_count);
795 }
796
797 Node4 values; 783 Node4 values;
798 for (u32 element = 0; element < values.size(); ++element) { 784 for (u32 element = 0; element < values.size(); ++element) {
799 auto coords_copy = coords; 785 auto coords_copy = coords;
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b047cf870..64ba60ea2 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
10 10
11namespace VideoCommon::Shader { 11namespace VideoCommon::Shader {
12 12
13using std::move;
13using Tegra::Shader::Instruction; 14using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 15using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 16using Tegra::Shader::Pred;
16using Tegra::Shader::VideoType; 17using Tegra::Shader::VideoType;
17using Tegra::Shader::VmadShr; 18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
18 21
19u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { 22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 23 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 24 const auto opcode = OpCode::Decode(instr);
22 25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
23 const Node op_a = 31 const Node op_a =
24 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, 32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
25 instr.video.type_a, instr.video.byte_height_a); 33 instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
109 } 117 }
110} 118}
111 119
120void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
121 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
124 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
125 UNIMPLEMENTED_IF(instr.vmnmx.sat);
126 UNIMPLEMENTED_IF(instr.generates_cc);
127
128 Node op_a = GetRegister(instr.gpr8);
129 Node op_b = GetRegister(instr.gpr20);
130 Node op_c = GetRegister(instr.gpr39);
131
132 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
133 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
134
135 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
136 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
137
138 switch (instr.vmnmx.operation) {
139 case VmnmxOperation::Mrg_16H:
140 value = BitfieldInsert(move(op_c), move(value), 16, 16);
141 break;
142 case VmnmxOperation::Mrg_16L:
143 value = BitfieldInsert(move(op_c), move(value), 0, 16);
144 break;
145 case VmnmxOperation::Mrg_8B0:
146 value = BitfieldInsert(move(op_c), move(value), 0, 8);
147 break;
148 case VmnmxOperation::Mrg_8B2:
149 value = BitfieldInsert(move(op_c), move(value), 16, 8);
150 break;
151 case VmnmxOperation::Acc:
152 value = Operation(OperationCode::IAdd, move(value), move(op_c));
153 break;
154 case VmnmxOperation::Min:
155 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
156 break;
157 case VmnmxOperation::Max:
158 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
159 break;
160 case VmnmxOperation::Nop:
161 break;
162 default:
163 UNREACHABLE();
164 break;
165 }
166
167 SetRegister(bb, instr.gpr0, move(value));
168}
169
112} // namespace VideoCommon::Shader 170} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ca6c976c9..c6e7bdf50 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -354,6 +354,9 @@ private:
354 /// Marks the usage of a input or output attribute. 354 /// Marks the usage of a input or output attribute.
355 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); 355 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
356 356
357 /// Decodes VMNMX instruction and inserts its code into the passed basic block.
358 void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
359
357 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 360 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
358 const Node4& components); 361 const Node4& components);
359 362
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 88fe3e25f..cfc7fe6e9 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -108,7 +108,7 @@ public:
108 } 108 }
109 109
110 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 110 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
111 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 111 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
112 if (guard_samplers) { 112 if (guard_samplers) {
113 sampled_textures.push_back(surface); 113 sampled_textures.push_back(surface);
114 } 114 }
@@ -128,7 +128,7 @@ public:
128 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 128 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
129 } 129 }
130 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; 130 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
131 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 131 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
132 if (guard_samplers) { 132 if (guard_samplers) {
133 sampled_textures.push_back(surface); 133 sampled_textures.push_back(surface);
134 } 134 }
@@ -143,7 +143,7 @@ public:
143 return any_rt; 143 return any_rt;
144 } 144 }
145 145
146 TView GetDepthBufferSurface(bool preserve_contents) { 146 TView GetDepthBufferSurface() {
147 std::lock_guard lock{mutex}; 147 std::lock_guard lock{mutex};
148 auto& maxwell3d = system.GPU().Maxwell3D(); 148 auto& maxwell3d = system.GPU().Maxwell3D();
149 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { 149 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
@@ -164,7 +164,7 @@ public:
164 return {}; 164 return {};
165 } 165 }
166 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; 166 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
167 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); 167 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true);
168 if (depth_buffer.target) 168 if (depth_buffer.target)
169 depth_buffer.target->MarkAsRenderTarget(false, NO_RT); 169 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
170 depth_buffer.target = surface_view.first; 170 depth_buffer.target = surface_view.first;
@@ -174,7 +174,7 @@ public:
174 return surface_view.second; 174 return surface_view.second;
175 } 175 }
176 176
177 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { 177 TView GetColorBufferSurface(std::size_t index) {
178 std::lock_guard lock{mutex}; 178 std::lock_guard lock{mutex};
179 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 179 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
180 auto& maxwell3d = system.GPU().Maxwell3D(); 180 auto& maxwell3d = system.GPU().Maxwell3D();
@@ -204,9 +204,8 @@ public:
204 return {}; 204 return {};
205 } 205 }
206 206
207 auto surface_view = 207 auto surface_view = GetSurface(gpu_addr, *cpu_addr,
208 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), 208 SurfaceParams::CreateForFramebuffer(system, index), true);
209 preserve_contents, true);
210 if (render_targets[index].target) 209 if (render_targets[index].target)
211 render_targets[index].target->MarkAsRenderTarget(false, NO_RT); 210 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
212 render_targets[index].target = surface_view.first; 211 render_targets[index].target = surface_view.first;
@@ -260,9 +259,9 @@ public:
260 const std::optional<VAddr> src_cpu_addr = 259 const std::optional<VAddr> src_cpu_addr =
261 system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); 260 system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
262 std::pair<TSurface, TView> dst_surface = 261 std::pair<TSurface, TView> dst_surface =
263 GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); 262 GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false);
264 std::pair<TSurface, TView> src_surface = 263 std::pair<TSurface, TView> src_surface =
265 GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); 264 GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false);
266 ImageBlit(src_surface.second, dst_surface.second, copy_config); 265 ImageBlit(src_surface.second, dst_surface.second, copy_config);
267 dst_surface.first->MarkAsModified(true, Tick()); 266 dst_surface.first->MarkAsModified(true, Tick());
268 } 267 }
@@ -451,22 +450,18 @@ private:
451 * @param overlaps The overlapping surfaces registered in the cache. 450 * @param overlaps The overlapping surfaces registered in the cache.
452 * @param params The parameters for the new surface. 451 * @param params The parameters for the new surface.
453 * @param gpu_addr The starting address of the new surface. 452 * @param gpu_addr The starting address of the new surface.
454 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
455 * blank.
456 * @param untopological Indicates to the recycler that the texture has no way to match the 453 * @param untopological Indicates to the recycler that the texture has no way to match the
457 * overlaps due to topological reasons. 454 * overlaps due to topological reasons.
458 **/ 455 **/
459 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 456 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
460 const SurfaceParams& params, const GPUVAddr gpu_addr, 457 const SurfaceParams& params, const GPUVAddr gpu_addr,
461 const bool preserve_contents,
462 const MatchTopologyResult untopological) { 458 const MatchTopologyResult untopological) {
463 const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
464 for (auto& surface : overlaps) { 459 for (auto& surface : overlaps) {
465 Unregister(surface); 460 Unregister(surface);
466 } 461 }
467 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { 462 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
468 case RecycleStrategy::Ignore: { 463 case RecycleStrategy::Ignore: {
469 return InitializeSurface(gpu_addr, params, do_load); 464 return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
470 } 465 }
471 case RecycleStrategy::Flush: { 466 case RecycleStrategy::Flush: {
472 std::sort(overlaps.begin(), overlaps.end(), 467 std::sort(overlaps.begin(), overlaps.end(),
@@ -476,7 +471,7 @@ private:
476 for (auto& surface : overlaps) { 471 for (auto& surface : overlaps) {
477 FlushSurface(surface); 472 FlushSurface(surface);
478 } 473 }
479 return InitializeSurface(gpu_addr, params, preserve_contents); 474 return InitializeSurface(gpu_addr, params);
480 } 475 }
481 case RecycleStrategy::BufferCopy: { 476 case RecycleStrategy::BufferCopy: {
482 auto new_surface = GetUncachedSurface(gpu_addr, params); 477 auto new_surface = GetUncachedSurface(gpu_addr, params);
@@ -485,7 +480,7 @@ private:
485 } 480 }
486 default: { 481 default: {
487 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 482 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
488 return InitializeSurface(gpu_addr, params, do_load); 483 return InitializeSurface(gpu_addr, params);
489 } 484 }
490 } 485 }
491 } 486 }
@@ -621,14 +616,11 @@ private:
621 * @param params The parameters on the new surface. 616 * @param params The parameters on the new surface.
622 * @param gpu_addr The starting address of the new surface. 617 * @param gpu_addr The starting address of the new surface.
623 * @param cache_addr The starting address of the new surface on physical memory. 618 * @param cache_addr The starting address of the new surface on physical memory.
624 * @param preserve_contents Indicates that the new surface should be loaded from memory or
625 * left blank.
626 */ 619 */
627 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 620 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
628 const SurfaceParams& params, 621 const SurfaceParams& params,
629 const GPUVAddr gpu_addr, 622 const GPUVAddr gpu_addr,
630 const VAddr cpu_addr, 623 const VAddr cpu_addr) {
631 bool preserve_contents) {
632 if (params.target == SurfaceTarget::Texture3D) { 624 if (params.target == SurfaceTarget::Texture3D) {
633 bool failed = false; 625 bool failed = false;
634 if (params.num_levels > 1) { 626 if (params.num_levels > 1) {
@@ -677,7 +669,7 @@ private:
677 return std::nullopt; 669 return std::nullopt;
678 } 670 }
679 Unregister(surface); 671 Unregister(surface);
680 return InitializeSurface(gpu_addr, params, preserve_contents); 672 return InitializeSurface(gpu_addr, params);
681 } 673 }
682 return std::nullopt; 674 return std::nullopt;
683 } 675 }
@@ -688,7 +680,7 @@ private:
688 return {{surface, surface->GetMainView()}}; 680 return {{surface, surface->GetMainView()}};
689 } 681 }
690 } 682 }
691 return InitializeSurface(gpu_addr, params, preserve_contents); 683 return InitializeSurface(gpu_addr, params);
692 } 684 }
693 } 685 }
694 686
@@ -711,13 +703,10 @@ private:
711 * 703 *
712 * @param gpu_addr The starting address of the candidate surface. 704 * @param gpu_addr The starting address of the candidate surface.
713 * @param params The parameters on the candidate surface. 705 * @param params The parameters on the candidate surface.
714 * @param preserve_contents Indicates that the new surface should be loaded from memory or
715 * left blank.
716 * @param is_render Whether or not the surface is a render target. 706 * @param is_render Whether or not the surface is a render target.
717 **/ 707 **/
718 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, 708 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
719 const SurfaceParams& params, bool preserve_contents, 709 const SurfaceParams& params, bool is_render) {
720 bool is_render) {
721 // Step 1 710 // Step 1
722 // Check Level 1 Cache for a fast structural match. If candidate surface 711 // Check Level 1 Cache for a fast structural match. If candidate surface
723 // matches at certain level we are pretty much done. 712 // matches at certain level we are pretty much done.
@@ -726,8 +715,7 @@ private:
726 const auto topological_result = current_surface->MatchesTopology(params); 715 const auto topological_result = current_surface->MatchesTopology(params);
727 if (topological_result != MatchTopologyResult::FullMatch) { 716 if (topological_result != MatchTopologyResult::FullMatch) {
728 std::vector<TSurface> overlaps{current_surface}; 717 std::vector<TSurface> overlaps{current_surface};
729 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 718 return RecycleSurface(overlaps, params, gpu_addr, topological_result);
730 topological_result);
731 } 719 }
732 720
733 const auto struct_result = current_surface->MatchesStructure(params); 721 const auto struct_result = current_surface->MatchesStructure(params);
@@ -752,7 +740,7 @@ private:
752 740
753 // If none are found, we are done. we just load the surface and create it. 741 // If none are found, we are done. we just load the surface and create it.
754 if (overlaps.empty()) { 742 if (overlaps.empty()) {
755 return InitializeSurface(gpu_addr, params, preserve_contents); 743 return InitializeSurface(gpu_addr, params);
756 } 744 }
757 745
758 // Step 3 746 // Step 3
@@ -762,15 +750,13 @@ private:
762 for (const auto& surface : overlaps) { 750 for (const auto& surface : overlaps) {
763 const auto topological_result = surface->MatchesTopology(params); 751 const auto topological_result = surface->MatchesTopology(params);
764 if (topological_result != MatchTopologyResult::FullMatch) { 752 if (topological_result != MatchTopologyResult::FullMatch) {
765 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 753 return RecycleSurface(overlaps, params, gpu_addr, topological_result);
766 topological_result);
767 } 754 }
768 } 755 }
769 756
770 // Check if it's a 3D texture 757 // Check if it's a 3D texture
771 if (params.block_depth > 0) { 758 if (params.block_depth > 0) {
772 auto surface = 759 auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr);
773 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
774 if (surface) { 760 if (surface) {
775 return *surface; 761 return *surface;
776 } 762 }
@@ -790,8 +776,7 @@ private:
790 return *view; 776 return *view;
791 } 777 }
792 } 778 }
793 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 779 return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
794 MatchTopologyResult::FullMatch);
795 } 780 }
796 // Now we check if the candidate is a mipmap/layer of the overlap 781 // Now we check if the candidate is a mipmap/layer of the overlap
797 std::optional<TView> view = 782 std::optional<TView> view =
@@ -815,7 +800,7 @@ private:
815 pair.first->EmplaceView(params, gpu_addr, candidate_size); 800 pair.first->EmplaceView(params, gpu_addr, candidate_size);
816 if (mirage_view) 801 if (mirage_view)
817 return {pair.first, *mirage_view}; 802 return {pair.first, *mirage_view};
818 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 803 return RecycleSurface(overlaps, params, gpu_addr,
819 MatchTopologyResult::FullMatch); 804 MatchTopologyResult::FullMatch);
820 } 805 }
821 return {current_surface, *view}; 806 return {current_surface, *view};
@@ -831,8 +816,7 @@ private:
831 } 816 }
832 } 817 }
833 // We failed all the tests, recycle the overlaps into a new texture. 818 // We failed all the tests, recycle the overlaps into a new texture.
834 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 819 return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
835 MatchTopologyResult::FullMatch);
836 } 820 }
837 821
838 /** 822 /**
@@ -990,10 +974,10 @@ private:
990 } 974 }
991 975
992 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 976 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
993 bool preserve_contents) { 977 bool do_load = true) {
994 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 978 auto new_surface{GetUncachedSurface(gpu_addr, params)};
995 Register(new_surface); 979 Register(new_surface);
996 if (preserve_contents) { 980 if (do_load) {
997 LoadSurface(new_surface); 981 LoadSurface(new_surface);
998 } 982 }
999 return {new_surface, new_surface->GetMainView()}; 983 return {new_surface, new_surface->GetMainView()};
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 062b4f252..365bde2f1 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -20,6 +20,8 @@
20#include <cstring> 20#include <cstring>
21#include <vector> 21#include <vector>
22 22
23#include <boost/container/static_vector.hpp>
24
23#include "common/common_types.h" 25#include "common/common_types.h"
24 26
25#include "video_core/textures/astc.h" 27#include "video_core/textures/astc.h"
@@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) {
39 41
40class InputBitStream { 42class InputBitStream {
41public: 43public:
42 explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) 44 constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
43 : m_CurByte(ptr), m_NextBit(start_offset % 8) {} 45 : cur_byte{ptr}, next_bit{start_offset % 8} {}
44 46
45 std::size_t GetBitsRead() const { 47 constexpr std::size_t GetBitsRead() const {
46 return m_BitsRead; 48 return bits_read;
47 } 49 }
48 50
49 u32 ReadBit() { 51 constexpr bool ReadBit() {
50 u32 bit = *m_CurByte >> m_NextBit++; 52 const bool bit = (*cur_byte >> next_bit++) & 1;
51 while (m_NextBit >= 8) { 53 while (next_bit >= 8) {
52 m_NextBit -= 8; 54 next_bit -= 8;
53 m_CurByte++; 55 cur_byte++;
54 } 56 }
55 57
56 m_BitsRead++; 58 bits_read++;
57 return bit & 1; 59 return bit;
58 } 60 }
59 61
60 u32 ReadBits(std::size_t nBits) { 62 constexpr u32 ReadBits(std::size_t nBits) {
61 u32 ret = 0; 63 u32 ret = 0;
62 for (std::size_t i = 0; i < nBits; ++i) { 64 for (std::size_t i = 0; i < nBits; ++i) {
63 ret |= (ReadBit() & 1) << i; 65 ret |= (ReadBit() & 1) << i;
@@ -66,7 +68,7 @@ public:
66 } 68 }
67 69
68 template <std::size_t nBits> 70 template <std::size_t nBits>
69 u32 ReadBits() { 71 constexpr u32 ReadBits() {
70 u32 ret = 0; 72 u32 ret = 0;
71 for (std::size_t i = 0; i < nBits; ++i) { 73 for (std::size_t i = 0; i < nBits; ++i) {
72 ret |= (ReadBit() & 1) << i; 74 ret |= (ReadBit() & 1) << i;
@@ -75,64 +77,58 @@ public:
75 } 77 }
76 78
77private: 79private:
78 const u8* m_CurByte; 80 const u8* cur_byte;
79 std::size_t m_NextBit = 0; 81 std::size_t next_bit = 0;
80 std::size_t m_BitsRead = 0; 82 std::size_t bits_read = 0;
81}; 83};
82 84
83class OutputBitStream { 85class OutputBitStream {
84public: 86public:
85 explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) 87 constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0)
86 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 88 : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {}
87
88 ~OutputBitStream() = default;
89 89
90 s32 GetBitsWritten() const { 90 constexpr std::size_t GetBitsWritten() const {
91 return m_BitsWritten; 91 return bits_written;
92 } 92 }
93 93
94 void WriteBitsR(u32 val, u32 nBits) { 94 constexpr void WriteBitsR(u32 val, u32 nBits) {
95 for (u32 i = 0; i < nBits; i++) { 95 for (u32 i = 0; i < nBits; i++) {
96 WriteBit((val >> (nBits - i - 1)) & 1); 96 WriteBit((val >> (nBits - i - 1)) & 1);
97 } 97 }
98 } 98 }
99 99
100 void WriteBits(u32 val, u32 nBits) { 100 constexpr void WriteBits(u32 val, u32 nBits) {
101 for (u32 i = 0; i < nBits; i++) { 101 for (u32 i = 0; i < nBits; i++) {
102 WriteBit((val >> i) & 1); 102 WriteBit((val >> i) & 1);
103 } 103 }
104 } 104 }
105 105
106private: 106private:
107 void WriteBit(s32 b) { 107 constexpr void WriteBit(bool b) {
108 108 if (bits_written >= num_bits) {
109 if (done)
110 return; 109 return;
110 }
111 111
112 const u32 mask = 1 << m_NextBit++; 112 const u32 mask = 1 << next_bit++;
113 113
114 // clear the bit 114 // clear the bit
115 *m_CurByte &= static_cast<u8>(~mask); 115 *cur_byte &= static_cast<u8>(~mask);
116 116
117 // Write the bit, if necessary 117 // Write the bit, if necessary
118 if (b) 118 if (b)
119 *m_CurByte |= static_cast<u8>(mask); 119 *cur_byte |= static_cast<u8>(mask);
120 120
121 // Next byte? 121 // Next byte?
122 if (m_NextBit >= 8) { 122 if (next_bit >= 8) {
123 m_CurByte += 1; 123 cur_byte += 1;
124 m_NextBit = 0; 124 next_bit = 0;
125 } 125 }
126
127 done = done || ++m_BitsWritten >= m_NumBits;
128 } 126 }
129 127
130 s32 m_BitsWritten = 0; 128 u8* cur_byte;
131 const s32 m_NumBits; 129 std::size_t num_bits;
132 u8* m_CurByte; 130 std::size_t bits_written = 0;
133 s32 m_NextBit = 0; 131 std::size_t next_bit = 0;
134
135 bool done = false;
136}; 132};
137 133
138template <typename IntType> 134template <typename IntType>
@@ -195,9 +191,13 @@ struct IntegerEncodedValue {
195 u32 trit_value; 191 u32 trit_value;
196 }; 192 };
197}; 193};
194using IntegerEncodedVector = boost::container::static_vector<
195 IntegerEncodedValue, 64,
196 boost::container::static_vector_options<
197 boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
198 boost::container::throw_on_overflow<false>>::type>;
198 199
199static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 200static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
200 u32 nBitsPerValue) {
201 // Implement the algorithm in section C.2.12 201 // Implement the algorithm in section C.2.12
202 u32 m[5]; 202 u32 m[5];
203 u32 t[5]; 203 u32 t[5];
@@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu
255 } 255 }
256} 256}
257 257
258static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 258static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result,
259 u32 nBitsPerValue) { 259 u32 nBitsPerValue) {
260 // Implement the algorithm in section C.2.12 260 // Implement the algorithm in section C.2.12
261 u32 m[3]; 261 u32 m[3];
@@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues();
343// Fills result with the values that are encoded in the given 343// Fills result with the values that are encoded in the given
344// bitstream. We must know beforehand what the maximum possible 344// bitstream. We must know beforehand what the maximum possible
345// value is, and how many values we're decoding. 345// value is, and how many values we're decoding.
346static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, 346static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
347 u32 maxRange, u32 nValues) { 347 u32 nValues) {
348 // Determine encoding parameters 348 // Determine encoding parameters
349 IntegerEncodedValue val = EncodingsValues[maxRange]; 349 IntegerEncodedValue val = EncodingsValues[maxRange];
350 350
@@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
634// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] 634// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
635// is the same as [(numBits - 1):0] and repeats all the way down. 635// is the same as [(numBits - 1):0] and repeats all the way down.
636template <typename IntType> 636template <typename IntType>
637static IntType Replicate(IntType val, u32 numBits, u32 toBit) { 637static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) {
638 if (numBits == 0) 638 if (numBits == 0) {
639 return 0; 639 return 0;
640 if (toBit == 0) 640 }
641 if (toBit == 0) {
641 return 0; 642 return 0;
642 IntType v = val & static_cast<IntType>((1 << numBits) - 1); 643 }
644 const IntType v = val & static_cast<IntType>((1 << numBits) - 1);
643 IntType res = v; 645 IntType res = v;
644 u32 reslen = numBits; 646 u32 reslen = numBits;
645 while (reslen < toBit) { 647 while (reslen < toBit) {
@@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
656 return res; 658 return res;
657} 659}
658 660
661static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
662 return std::size_t(1) << num_bits;
663}
664
665template <typename IntType, u32 num_bits, u32 to_bit>
666static constexpr auto MakeReplicateTable() {
667 std::array<IntType, NumReplicateEntries(num_bits)> table{};
668 for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
669 table[value] = Replicate(value, num_bits, to_bit);
670 }
671 return table;
672}
673
674static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
675static constexpr u32 ReplicateByteTo16(std::size_t value) {
676 return REPLICATE_BYTE_TO_16_TABLE[value];
677}
678
679static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>();
680static constexpr u32 ReplicateBitTo7(std::size_t value) {
681 return REPLICATE_BIT_TO_7_TABLE[value];
682}
683
684static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>();
685static constexpr u32 ReplicateBitTo9(std::size_t value) {
686 return REPLICATE_BIT_TO_9_TABLE[value];
687}
688
689static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>();
690static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>();
691static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
692static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
693static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
694static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
695static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
696static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
697/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
698/// to the runtime implementation
699static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
700 switch (num_bits) {
701 case 1:
702 return REPLICATE_1_BIT_TO_8_TABLE[value];
703 case 2:
704 return REPLICATE_2_BIT_TO_8_TABLE[value];
705 case 3:
706 return REPLICATE_3_BIT_TO_8_TABLE[value];
707 case 4:
708 return REPLICATE_4_BIT_TO_8_TABLE[value];
709 case 5:
710 return REPLICATE_5_BIT_TO_8_TABLE[value];
711 case 6:
712 return REPLICATE_6_BIT_TO_8_TABLE[value];
713 case 7:
714 return REPLICATE_7_BIT_TO_8_TABLE[value];
715 case 8:
716 return REPLICATE_8_BIT_TO_8_TABLE[value];
717 default:
718 return Replicate(value, num_bits, 8);
719 }
720}
721
722static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>();
723static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>();
724static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>();
725static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>();
726static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>();
727static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) {
728 switch (num_bits) {
729 case 1:
730 return REPLICATE_1_BIT_TO_6_TABLE[value];
731 case 2:
732 return REPLICATE_2_BIT_TO_6_TABLE[value];
733 case 3:
734 return REPLICATE_3_BIT_TO_6_TABLE[value];
735 case 4:
736 return REPLICATE_4_BIT_TO_6_TABLE[value];
737 case 5:
738 return REPLICATE_5_BIT_TO_6_TABLE[value];
739 default:
740 return Replicate(value, num_bits, 6);
741 }
742}
743
659class Pixel { 744class Pixel {
660protected: 745protected:
661 using ChannelType = s16; 746 using ChannelType = s16;
@@ -674,10 +759,10 @@ public:
674 // significant bits when going from larger to smaller bit depth 759 // significant bits when going from larger to smaller bit depth
675 // or by repeating the most significant bits when going from 760 // or by repeating the most significant bits when going from
676 // smaller to larger bit depths. 761 // smaller to larger bit depths.
677 void ChangeBitDepth(const u8 (&depth)[4]) { 762 void ChangeBitDepth() {
678 for (u32 i = 0; i < 4; i++) { 763 for (u32 i = 0; i < 4; i++) {
679 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); 764 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]);
680 m_BitDepth[i] = depth[i]; 765 m_BitDepth[i] = 8;
681 } 766 }
682 } 767 }
683 768
@@ -689,28 +774,23 @@ public:
689 774
690 // Changes the bit depth of a single component. See the comment 775 // Changes the bit depth of a single component. See the comment
691 // above for how we do this. 776 // above for how we do this.
692 static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { 777 static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) {
693 assert(newDepth <= 8);
694 assert(oldDepth <= 8); 778 assert(oldDepth <= 8);
695 779
696 if (oldDepth == newDepth) { 780 if (oldDepth == 8) {
697 // Do nothing 781 // Do nothing
698 return val; 782 return val;
699 } else if (oldDepth == 0 && newDepth != 0) { 783 } else if (oldDepth == 0) {
700 return static_cast<ChannelType>((1 << newDepth) - 1); 784 return static_cast<ChannelType>((1 << 8) - 1);
701 } else if (newDepth > oldDepth) { 785 } else if (8 > oldDepth) {
702 return Replicate(val, oldDepth, newDepth); 786 return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth));
703 } else { 787 } else {
704 // oldDepth > newDepth 788 // oldDepth > newDepth
705 if (newDepth == 0) { 789 const u8 bitsWasted = static_cast<u8>(oldDepth - 8);
706 return 0xFF; 790 u16 v = static_cast<u16>(val);
707 } else { 791 v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
708 u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); 792 v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1));
709 u16 v = static_cast<u16>(val); 793 return static_cast<u8>(v);
710 v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
711 v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1));
712 return static_cast<u8>(v);
713 }
714 } 794 }
715 795
716 assert(false && "We shouldn't get here."); 796 assert(false && "We shouldn't get here.");
@@ -760,8 +840,7 @@ public:
760 // up in the most-significant byte. 840 // up in the most-significant byte.
761 u32 Pack() const { 841 u32 Pack() const {
762 Pixel eightBit(*this); 842 Pixel eightBit(*this);
763 const u8 eightBitDepth[4] = {8, 8, 8, 8}; 843 eightBit.ChangeBitDepth();
764 eightBit.ChangeBitDepth(eightBitDepth);
765 844
766 u32 r = 0; 845 u32 r = 0;
767 r |= eightBit.A(); 846 r |= eightBit.A();
@@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
816 } 895 }
817 896
818 // We now have enough to decode our integer sequence. 897 // We now have enough to decode our integer sequence.
819 std::vector<IntegerEncodedValue> decodedColorValues; 898 IntegerEncodedVector decodedColorValues;
820 decodedColorValues.reserve(32);
821 899
822 InputBitStream colorStream(data); 900 InputBitStream colorStream(data);
823 DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 901 DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
@@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
839 917
840 u32 A = 0, B = 0, C = 0, D = 0; 918 u32 A = 0, B = 0, C = 0, D = 0;
841 // A is just the lsb replicated 9 times. 919 // A is just the lsb replicated 9 times.
842 A = Replicate(bitval & 1, 1, 9); 920 A = ReplicateBitTo9(bitval & 1);
843 921
844 switch (val.encoding) { 922 switch (val.encoding) {
845 // Replicate bits 923 // Replicate bits
846 case IntegerEncoding::JustBits: 924 case IntegerEncoding::JustBits:
847 out[outIdx++] = Replicate(bitval, bitlen, 8); 925 out[outIdx++] = FastReplicateTo8(bitval, bitlen);
848 break; 926 break;
849 927
850 // Use algorithm in C.2.13 928 // Use algorithm in C.2.13
@@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
962 u32 bitval = val.bit_value; 1040 u32 bitval = val.bit_value;
963 u32 bitlen = val.num_bits; 1041 u32 bitlen = val.num_bits;
964 1042
965 u32 A = Replicate(bitval & 1, 1, 7); 1043 u32 A = ReplicateBitTo7(bitval & 1);
966 u32 B = 0, C = 0, D = 0; 1044 u32 B = 0, C = 0, D = 0;
967 1045
968 u32 result = 0; 1046 u32 result = 0;
969 switch (val.encoding) { 1047 switch (val.encoding) {
970 case IntegerEncoding::JustBits: 1048 case IntegerEncoding::JustBits:
971 result = Replicate(bitval, bitlen, 6); 1049 result = FastReplicateTo6(bitval, bitlen);
972 break; 1050 break;
973 1051
974 case IntegerEncoding::Trit: { 1052 case IntegerEncoding::Trit: {
@@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1047 return result; 1125 return result;
1048} 1126}
1049 1127
1050static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, 1128static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights,
1051 const TexelWeightParams& params, const u32 blockWidth, 1129 const TexelWeightParams& params, const u32 blockWidth,
1052 const u32 blockHeight) { 1130 const u32 blockHeight) {
1053 u32 weightIdx = 0; 1131 u32 weightIdx = 0;
@@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1545 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1623 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1546 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1624 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1547 1625
1548 std::vector<IntegerEncodedValue> texelWeightValues; 1626 IntegerEncodedVector texelWeightValues;
1549 texelWeightValues.reserve(64);
1550 1627
1551 InputBitStream weightStream(texelWeightData); 1628 InputBitStream weightStream(texelWeightData);
1552 1629
@@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1568 Pixel p; 1645 Pixel p;
1569 for (u32 c = 0; c < 4; c++) { 1646 for (u32 c = 0; c < 4; c++) {
1570 u32 C0 = endpos32s[partition][0].Component(c); 1647 u32 C0 = endpos32s[partition][0].Component(c);
1571 C0 = Replicate(C0, 8, 16); 1648 C0 = ReplicateByteTo16(C0);
1572 u32 C1 = endpos32s[partition][1].Component(c); 1649 u32 C1 = endpos32s[partition][1].Component(c);
1573 C1 = Replicate(C1, 8, 16); 1650 C1 = ReplicateByteTo16(C1);
1574 1651
1575 u32 plane = 0; 1652 u32 plane = 0;
1576 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { 1653 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 59b8a5e66..eba05aced 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -131,6 +131,20 @@ enum class SwizzleSource : u32 {
131 OneFloat = 7, 131 OneFloat = 7,
132}; 132};
133 133
134enum class MsaaMode : u32 {
135 Msaa1x1 = 0,
136 Msaa2x1 = 1,
137 Msaa2x2 = 2,
138 Msaa4x2 = 3,
139 Msaa4x2_D3D = 4,
140 Msaa2x1_D3D = 5,
141 Msaa4x4 = 6,
142 Msaa2x2_VC4 = 8,
143 Msaa2x2_VC12 = 9,
144 Msaa4x2_VC8 = 10,
145 Msaa4x2_VC24 = 11,
146};
147
134union TextureHandle { 148union TextureHandle {
135 TextureHandle(u32 raw) : raw{raw} {} 149 TextureHandle(u32 raw) : raw{raw} {}
136 150
@@ -197,6 +211,7 @@ struct TICEntry {
197 union { 211 union {
198 BitField<0, 4, u32> res_min_mip_level; 212 BitField<0, 4, u32> res_min_mip_level;
199 BitField<4, 4, u32> res_max_mip_level; 213 BitField<4, 4, u32> res_max_mip_level;
214 BitField<8, 4, MsaaMode> msaa_mode;
200 BitField<12, 12, u32> min_lod_clamp; 215 BitField<12, 12, u32> min_lod_clamp;
201 }; 216 };
202 217
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index a2b88c787..dccbabcbf 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -315,7 +315,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide
315 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); 315 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type"));
316 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); 316 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size"));
317 } 317 }
318 item_model->setSortRole(GameListItemPath::TitleRole); 318 item_model->setSortRole(GameListItemPath::SortRole);
319 319
320 connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); 320 connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons);
321 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); 321 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry);
@@ -441,6 +441,8 @@ void GameList::DonePopulating(QStringList watch_list) {
441 if (children_total > 0) { 441 if (children_total > 0) {
442 search_field->setFocus(); 442 search_field->setFocus();
443 } 443 }
444 item_model->sort(tree_view->header()->sortIndicatorSection(),
445 tree_view->header()->sortIndicatorOrder());
444} 446}
445 447
446void GameList::PopupContextMenu(const QPoint& menu_location) { 448void GameList::PopupContextMenu(const QPoint& menu_location) {
@@ -666,8 +668,6 @@ void GameList::LoadInterfaceLayout() {
666 // so make it as large as possible as default. 668 // so make it as large as possible as default.
667 header->resizeSection(COLUMN_NAME, header->width()); 669 header->resizeSection(COLUMN_NAME, header->width());
668 } 670 }
669
670 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
671} 671}
672 672
673const QStringList GameList::supported_file_extensions = { 673const QStringList GameList::supported_file_extensions = {
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 7cde72d1b..3e6d5a7cd 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -65,10 +65,10 @@ public:
65 */ 65 */
66class GameListItemPath : public GameListItem { 66class GameListItemPath : public GameListItem {
67public: 67public:
68 static const int TitleRole = SortRole; 68 static const int TitleRole = SortRole + 1;
69 static const int FullPathRole = SortRole + 1; 69 static const int FullPathRole = SortRole + 2;
70 static const int ProgramIdRole = SortRole + 2; 70 static const int ProgramIdRole = SortRole + 3;
71 static const int FileTypeRole = SortRole + 3; 71 static const int FileTypeRole = SortRole + 4;
72 72
73 GameListItemPath() = default; 73 GameListItemPath() = default;
74 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, 74 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data,
@@ -95,7 +95,7 @@ public:
95 } 95 }
96 96
97 QVariant data(int role) const override { 97 QVariant data(int role) const override {
98 if (role == Qt::DisplayRole) { 98 if (role == Qt::DisplayRole || role == SortRole) {
99 std::string filename; 99 std::string filename;
100 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, 100 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename,
101 nullptr); 101 nullptr);
@@ -110,6 +110,9 @@ public:
110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id); 110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id);
111 const int row2_id = UISettings::values.row_2_text_id; 111 const int row2_id = UISettings::values.row_2_text_id;
112 112
113 if (role == SortRole)
114 return row1.toLower();
115
113 if (row2_id == 4) // None 116 if (row2_id == 4) // None
114 return row1; 117 return row1;
115 118
@@ -123,6 +126,13 @@ public:
123 126
124 return GameListItem::data(role); 127 return GameListItem::data(role);
125 } 128 }
129
130 /**
131 * Override to prevent automatic sorting.
132 */
133 bool operator<(const QStandardItem& other) const override {
134 return false;
135 }
126}; 136};
127 137
128class GameListItemCompat : public GameListItem { 138class GameListItemCompat : public GameListItem {
@@ -289,6 +299,10 @@ public:
289 int type() const override { 299 int type() const override {
290 return static_cast<int>(GameListItemType::AddDir); 300 return static_cast<int>(GameListItemType::AddDir);
291 } 301 }
302
303 bool operator<(const QStandardItem& other) const override {
304 return false;
305 }
292}; 306};
293 307
294class GameList; 308class GameList;