summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp13
-rw-r--r--src/video_core/engines/maxwell_3d.h28
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h213
-rw-r--r--src/video_core/engines/shader_header.h103
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/macro_interpreter.h2
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp30
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h50
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp223
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp2
-rw-r--r--src/video_core/textures/decoders.cpp53
24 files changed, 651 insertions, 195 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4a79ce39c..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(video_core STATIC
14 engines/maxwell_dma.cpp 14 engines/maxwell_dma.cpp
15 engines/maxwell_dma.h 15 engines/maxwell_dma.h
16 engines/shader_bytecode.h 16 engines/shader_bytecode.h
17 engines/shader_header.h
17 gpu.cpp 18 gpu.cpp
18 gpu.h 19 gpu.h
19 macro_interpreter.cpp 20 macro_interpreter.cpp
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index dcf9ef8b9..021b83eaa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -26,7 +26,7 @@ public:
26 void WriteReg(u32 method, u32 value); 26 void WriteReg(u32 method, u32 value);
27 27
28 struct Regs { 28 struct Regs {
29 static constexpr size_t NUM_REGS = 0x258; 29 static constexpr std::size_t NUM_REGS = 0x258;
30 30
31 struct Surface { 31 struct Surface {
32 RenderTargetFormat format; 32 RenderTargetFormat format;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 329079ddd..8afd26fe9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() {
248 248
249void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { 249void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
250 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. 250 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
251 auto& shader = state.shader_stages[static_cast<size_t>(stage)]; 251 auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
252 auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)]; 252 auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)];
253 253
254 auto& buffer = shader.const_buffers[bind_data.index]; 254 auto& buffer = shader.const_buffers[bind_data.index];
255 255
@@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
316std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { 316std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
317 std::vector<Texture::FullTextureInfo> textures; 317 std::vector<Texture::FullTextureInfo> textures;
318 318
319 auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)]; 319 auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
320 auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; 320 auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
321 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); 321 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
322 322
323 GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; 323 GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
324 324
325 // Offset into the texture constbuffer where the texture info begins. 325 // Offset into the texture constbuffer where the texture info begins.
326 static constexpr size_t TextureInfoOffset = 0x20; 326 static constexpr std::size_t TextureInfoOffset = 0x20;
327 327
328 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; 328 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
329 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 329 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
@@ -360,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
360 return textures; 360 return textures;
361} 361}
362 362
363Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const { 363Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
364 auto& shader = state.shader_stages[static_cast<size_t>(stage)]; 364 std::size_t offset) const {
365 auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
365 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; 366 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
366 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); 367 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
367 368
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d3be900a4..b81b0723d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -34,17 +34,17 @@ public:
34 /// Register structure of the Maxwell3D engine. 34 /// Register structure of the Maxwell3D engine.
35 /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. 35 /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
36 struct Regs { 36 struct Regs {
37 static constexpr size_t NUM_REGS = 0xE00; 37 static constexpr std::size_t NUM_REGS = 0xE00;
38 38
39 static constexpr size_t NumRenderTargets = 8; 39 static constexpr std::size_t NumRenderTargets = 8;
40 static constexpr size_t NumViewports = 16; 40 static constexpr std::size_t NumViewports = 16;
41 static constexpr size_t NumCBData = 16; 41 static constexpr std::size_t NumCBData = 16;
42 static constexpr size_t NumVertexArrays = 32; 42 static constexpr std::size_t NumVertexArrays = 32;
43 static constexpr size_t NumVertexAttributes = 32; 43 static constexpr std::size_t NumVertexAttributes = 32;
44 static constexpr size_t MaxShaderProgram = 6; 44 static constexpr std::size_t MaxShaderProgram = 6;
45 static constexpr size_t MaxShaderStage = 5; 45 static constexpr std::size_t MaxShaderStage = 5;
46 // Maximum number of const buffers per shader stage. 46 // Maximum number of const buffers per shader stage.
47 static constexpr size_t MaxConstBuffers = 18; 47 static constexpr std::size_t MaxConstBuffers = 18;
48 48
49 enum class QueryMode : u32 { 49 enum class QueryMode : u32 {
50 Write = 0, 50 Write = 0,
@@ -443,9 +443,9 @@ public:
443 } 443 }
444 }; 444 };
445 445
446 bool IsShaderConfigEnabled(size_t index) const { 446 bool IsShaderConfigEnabled(std::size_t index) const {
447 // The VertexB is always enabled. 447 // The VertexB is always enabled.
448 if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) { 448 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
449 return true; 449 return true;
450 } 450 }
451 return shader_config[index].enable != 0; 451 return shader_config[index].enable != 0;
@@ -571,7 +571,7 @@ public:
571 BitField<25, 3, u32> map_7; 571 BitField<25, 3, u32> map_7;
572 }; 572 };
573 573
574 u32 GetMap(size_t index) const { 574 u32 GetMap(std::size_t index) const {
575 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, 575 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
576 map_4, map_5, map_6, map_7}; 576 map_4, map_5, map_6, map_7};
577 ASSERT(index < maps.size()); 577 ASSERT(index < maps.size());
@@ -925,7 +925,7 @@ public:
925 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; 925 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
926 926
927 /// Returns the texture information for a specific texture in a specific shader stage. 927 /// Returns the texture information for a specific texture in a specific shader stage.
928 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; 928 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
929 929
930private: 930private:
931 VideoCore::RasterizerInterface& rasterizer; 931 VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c24d33d5c..aa7481b8c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() {
50 ASSERT(regs.dst_params.pos_y == 0); 50 ASSERT(regs.dst_params.pos_y == 0);
51 51
52 if (regs.exec.is_dst_linear == regs.exec.is_src_linear) { 52 if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
53 size_t copy_size = regs.x_count; 53 std::size_t copy_size = regs.x_count;
54 54
55 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 55 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
56 // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count). 56 // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 7882f16e0..311ccb616 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -23,7 +23,7 @@ public:
23 void WriteReg(u32 method, u32 value); 23 void WriteReg(u32 method, u32 value);
24 24
25 struct Regs { 25 struct Regs {
26 static constexpr size_t NUM_REGS = 0x1D6; 26 static constexpr std::size_t NUM_REGS = 0x1D6;
27 27
28 struct Parameters { 28 struct Parameters {
29 union { 29 union {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d6e2397f2..7e1de0fa1 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -20,10 +20,10 @@ namespace Tegra::Shader {
20 20
21struct Register { 21struct Register {
22 /// Number of registers 22 /// Number of registers
23 static constexpr size_t NumRegisters = 256; 23 static constexpr std::size_t NumRegisters = 256;
24 24
25 /// Register 255 is special cased to always be 0 25 /// Register 255 is special cased to always be 0
26 static constexpr size_t ZeroIndex = 255; 26 static constexpr std::size_t ZeroIndex = 255;
27 27
28 enum class Size : u64 { 28 enum class Size : u64 {
29 Byte = 0, 29 Byte = 0,
@@ -240,6 +240,41 @@ enum class FlowCondition : u64 {
240 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? 240 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
241}; 241};
242 242
243enum class ControlCode : u64 {
244 F = 0,
245 LT = 1,
246 EQ = 2,
247 LE = 3,
248 GT = 4,
249 NE = 5,
250 GE = 6,
251 Num = 7,
252 Nan = 8,
253 LTU = 9,
254 EQU = 10,
255 LEU = 11,
256 GTU = 12,
257 NEU = 13,
258 GEU = 14,
259 //
260 OFF = 16,
261 LO = 17,
262 SFF = 18,
263 LS = 19,
264 HI = 20,
265 SFT = 21,
266 HS = 22,
267 OFT = 23,
268 CSM_TA = 24,
269 CSM_TR = 25,
270 CSM_MX = 26,
271 FCSM_TA = 27,
272 FCSM_TR = 28,
273 FCSM_MX = 29,
274 RLE = 30,
275 RGT = 31,
276};
277
243enum class PredicateResultMode : u64 { 278enum class PredicateResultMode : u64 {
244 None = 0x0, 279 None = 0x0,
245 NotZero = 0x3, 280 NotZero = 0x3,
@@ -271,6 +306,15 @@ enum class TextureProcessMode : u64 {
271 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 306 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
272}; 307};
273 308
309enum class TextureMiscMode : u64 {
310 DC,
311 AOFFI, // Uses Offset
312 NDV,
313 NODEP,
314 MZ,
315 PTP,
316};
317
274enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; 318enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
275enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; 319enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
276 320
@@ -546,6 +590,15 @@ union Instruction {
546 } pset; 590 } pset;
547 591
548 union { 592 union {
593 BitField<0, 3, u64> pred0;
594 BitField<3, 3, u64> pred3;
595 BitField<8, 5, ControlCode> cc; // flag in cc
596 BitField<39, 3, u64> pred39;
597 BitField<42, 1, u64> neg_pred39;
598 BitField<45, 4, PredOperation> op; // op with pred39
599 } csetp;
600
601 union {
549 BitField<39, 3, u64> pred39; 602 BitField<39, 3, u64> pred39;
550 BitField<42, 1, u64> neg_pred; 603 BitField<42, 1, u64> neg_pred;
551 BitField<43, 1, u64> neg_a; 604 BitField<43, 1, u64> neg_a;
@@ -590,42 +643,127 @@ union Instruction {
590 BitField<28, 1, u64> array; 643 BitField<28, 1, u64> array;
591 BitField<29, 2, TextureType> texture_type; 644 BitField<29, 2, TextureType> texture_type;
592 BitField<31, 4, u64> component_mask; 645 BitField<31, 4, u64> component_mask;
646 BitField<49, 1, u64> nodep_flag;
647 BitField<50, 1, u64> dc_flag;
648 BitField<54, 1, u64> aoffi_flag;
593 BitField<55, 3, TextureProcessMode> process_mode; 649 BitField<55, 3, TextureProcessMode> process_mode;
594 650
595 bool IsComponentEnabled(size_t component) const { 651 bool IsComponentEnabled(std::size_t component) const {
596 return ((1ull << component) & component_mask) != 0; 652 return ((1ull << component) & component_mask) != 0;
597 } 653 }
654
655 TextureProcessMode GetTextureProcessMode() const {
656 return process_mode;
657 }
658
659 bool UsesMiscMode(TextureMiscMode mode) const {
660 switch (mode) {
661 case TextureMiscMode::DC:
662 return dc_flag != 0;
663 case TextureMiscMode::NODEP:
664 return nodep_flag != 0;
665 case TextureMiscMode::AOFFI:
666 return aoffi_flag != 0;
667 default:
668 break;
669 }
670 return false;
671 }
598 } tex; 672 } tex;
599 673
600 union { 674 union {
601 BitField<22, 6, TextureQueryType> query_type; 675 BitField<22, 6, TextureQueryType> query_type;
602 BitField<31, 4, u64> component_mask; 676 BitField<31, 4, u64> component_mask;
677 BitField<49, 1, u64> nodep_flag;
678
679 bool UsesMiscMode(TextureMiscMode mode) const {
680 switch (mode) {
681 case TextureMiscMode::NODEP:
682 return nodep_flag != 0;
683 default:
684 break;
685 }
686 return false;
687 }
603 } txq; 688 } txq;
604 689
605 union { 690 union {
606 BitField<28, 1, u64> array; 691 BitField<28, 1, u64> array;
607 BitField<29, 2, TextureType> texture_type; 692 BitField<29, 2, TextureType> texture_type;
608 BitField<31, 4, u64> component_mask; 693 BitField<31, 4, u64> component_mask;
694 BitField<35, 1, u64> ndv_flag;
695 BitField<49, 1, u64> nodep_flag;
609 696
610 bool IsComponentEnabled(size_t component) const { 697 bool IsComponentEnabled(std::size_t component) const {
611 return ((1ull << component) & component_mask) != 0; 698 return ((1ull << component) & component_mask) != 0;
612 } 699 }
700
701 bool UsesMiscMode(TextureMiscMode mode) const {
702 switch (mode) {
703 case TextureMiscMode::NDV:
704 return (ndv_flag != 0);
705 case TextureMiscMode::NODEP:
706 return (nodep_flag != 0);
707 default:
708 break;
709 }
710 return false;
711 }
613 } tmml; 712 } tmml;
614 713
615 union { 714 union {
616 BitField<28, 1, u64> array; 715 BitField<28, 1, u64> array;
617 BitField<29, 2, TextureType> texture_type; 716 BitField<29, 2, TextureType> texture_type;
717 BitField<35, 1, u64> ndv_flag;
718 BitField<49, 1, u64> nodep_flag;
719 BitField<50, 1, u64> dc_flag;
720 BitField<54, 2, u64> info;
618 BitField<56, 2, u64> component; 721 BitField<56, 2, u64> component;
722
723 bool UsesMiscMode(TextureMiscMode mode) const {
724 switch (mode) {
725 case TextureMiscMode::NDV:
726 return ndv_flag != 0;
727 case TextureMiscMode::NODEP:
728 return nodep_flag != 0;
729 case TextureMiscMode::DC:
730 return dc_flag != 0;
731 case TextureMiscMode::AOFFI:
732 return info == 1;
733 case TextureMiscMode::PTP:
734 return info == 2;
735 default:
736 break;
737 }
738 return false;
739 }
619 } tld4; 740 } tld4;
620 741
621 union { 742 union {
743 BitField<49, 1, u64> nodep_flag;
744 BitField<50, 1, u64> dc_flag;
745 BitField<51, 1, u64> aoffi_flag;
622 BitField<52, 2, u64> component; 746 BitField<52, 2, u64> component;
747
748 bool UsesMiscMode(TextureMiscMode mode) const {
749 switch (mode) {
750 case TextureMiscMode::DC:
751 return dc_flag != 0;
752 case TextureMiscMode::NODEP:
753 return nodep_flag != 0;
754 case TextureMiscMode::AOFFI:
755 return aoffi_flag != 0;
756 default:
757 break;
758 }
759 return false;
760 }
623 } tld4s; 761 } tld4s;
624 762
625 union { 763 union {
626 BitField<0, 8, Register> gpr0; 764 BitField<0, 8, Register> gpr0;
627 BitField<28, 8, Register> gpr28; 765 BitField<28, 8, Register> gpr28;
628 BitField<49, 1, u64> nodep; 766 BitField<49, 1, u64> nodep_flag;
629 BitField<50, 3, u64> component_mask_selector; 767 BitField<50, 3, u64> component_mask_selector;
630 BitField<53, 4, u64> texture_info; 768 BitField<53, 4, u64> texture_info;
631 769
@@ -645,6 +783,37 @@ union Instruction {
645 UNREACHABLE(); 783 UNREACHABLE();
646 } 784 }
647 785
786 TextureProcessMode GetTextureProcessMode() const {
787 switch (texture_info) {
788 case 0:
789 case 2:
790 case 6:
791 case 8:
792 case 9:
793 case 11:
794 return TextureProcessMode::LZ;
795 case 3:
796 case 5:
797 case 13:
798 return TextureProcessMode::LL;
799 default:
800 break;
801 }
802 return TextureProcessMode::None;
803 }
804
805 bool UsesMiscMode(TextureMiscMode mode) const {
806 switch (mode) {
807 case TextureMiscMode::DC:
808 return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
809 case TextureMiscMode::NODEP:
810 return nodep_flag != 0;
811 default:
812 break;
813 }
814 return false;
815 }
816
648 bool IsArrayTexture() const { 817 bool IsArrayTexture() const {
649 // TEXS only supports Texture2D arrays. 818 // TEXS only supports Texture2D arrays.
650 return texture_info >= 7 && texture_info <= 9; 819 return texture_info >= 7 && texture_info <= 9;
@@ -654,7 +823,7 @@ union Instruction {
654 return gpr28.Value() != Register::ZeroIndex; 823 return gpr28.Value() != Register::ZeroIndex;
655 } 824 }
656 825
657 bool IsComponentEnabled(size_t component) const { 826 bool IsComponentEnabled(std::size_t component) const {
658 static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ 827 static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
659 {}, 828 {},
660 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, 829 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -662,7 +831,7 @@ union Instruction {
662 {0x7, 0xb, 0xd, 0xe, 0xf}, 831 {0x7, 0xb, 0xd, 0xe, 0xf},
663 }}; 832 }};
664 833
665 size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; 834 std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
666 index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; 835 index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
667 836
668 u32 mask = mask_lut[index][component_mask_selector]; 837 u32 mask = mask_lut[index][component_mask_selector];
@@ -673,6 +842,7 @@ union Instruction {
673 } texs; 842 } texs;
674 843
675 union { 844 union {
845 BitField<49, 1, u64> nodep_flag;
676 BitField<53, 4, u64> texture_info; 846 BitField<53, 4, u64> texture_info;
677 847
678 TextureType GetTextureType() const { 848 TextureType GetTextureType() const {
@@ -693,6 +863,26 @@ union Instruction {
693 UNREACHABLE(); 863 UNREACHABLE();
694 } 864 }
695 865
866 TextureProcessMode GetTextureProcessMode() const {
867 if (texture_info == 1 || texture_info == 5 || texture_info == 12)
868 return TextureProcessMode::LL;
869 return TextureProcessMode::LZ;
870 }
871
872 bool UsesMiscMode(TextureMiscMode mode) const {
873 switch (mode) {
874 case TextureMiscMode::AOFFI:
875 return texture_info == 12 || texture_info == 4;
876 case TextureMiscMode::MZ:
877 return texture_info == 5;
878 case TextureMiscMode::NODEP:
879 return nodep_flag != 0;
880 default:
881 break;
882 }
883 return false;
884 }
885
696 bool IsArrayTexture() const { 886 bool IsArrayTexture() const {
697 // TEXS only supports Texture2D arrays. 887 // TEXS only supports Texture2D arrays.
698 return texture_info == 8; 888 return texture_info == 8;
@@ -735,6 +925,7 @@ union Instruction {
735 BitField<36, 5, u64> index; 925 BitField<36, 5, u64> index;
736 } cbuf36; 926 } cbuf36;
737 927
928 BitField<47, 1, u64> generates_cc;
738 BitField<61, 1, u64> is_b_imm; 929 BitField<61, 1, u64> is_b_imm;
739 BitField<60, 1, u64> is_b_gpr; 930 BitField<60, 1, u64> is_b_gpr;
740 BitField<59, 1, u64> is_c_gpr; 931 BitField<59, 1, u64> is_c_gpr;
@@ -859,6 +1050,7 @@ public:
859 ISET_IMM, 1050 ISET_IMM,
860 PSETP, 1051 PSETP,
861 PSET, 1052 PSET,
1053 CSETP,
862 XMAD_IMM, 1054 XMAD_IMM,
863 XMAD_CR, 1055 XMAD_CR,
864 XMAD_RC, 1056 XMAD_RC,
@@ -947,7 +1139,7 @@ public:
947private: 1139private:
948 struct Detail { 1140 struct Detail {
949 private: 1141 private:
950 static constexpr size_t opcode_bitsize = 16; 1142 static constexpr std::size_t opcode_bitsize = 16;
951 1143
952 /** 1144 /**
953 * Generates the mask and the expected value after masking from a given bitstring. 1145 * Generates the mask and the expected value after masking from a given bitstring.
@@ -956,8 +1148,8 @@ private:
956 */ 1148 */
957 static auto GetMaskAndExpect(const char* const bitstring) { 1149 static auto GetMaskAndExpect(const char* const bitstring) {
958 u16 mask = 0, expect = 0; 1150 u16 mask = 0, expect = 0;
959 for (size_t i = 0; i < opcode_bitsize; i++) { 1151 for (std::size_t i = 0; i < opcode_bitsize; i++) {
960 const size_t bit_position = opcode_bitsize - i - 1; 1152 const std::size_t bit_position = opcode_bitsize - i - 1;
961 switch (bitstring[i]) { 1153 switch (bitstring[i]) {
962 case '0': 1154 case '0':
963 mask |= 1 << bit_position; 1155 mask |= 1 << bit_position;
@@ -1095,6 +1287,7 @@ private:
1095 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), 1287 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
1096 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), 1288 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
1097 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 1289 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
1290 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
1098 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), 1291 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
1099 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), 1292 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
1100 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), 1293 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
new file mode 100644
index 000000000..a885ee3cf
--- /dev/null
+++ b/src/video_core/engines/shader_header.h
@@ -0,0 +1,103 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra::Shader {
12
13enum class OutputTopology : u32 {
14 PointList = 1,
15 LineStrip = 6,
16 TriangleStrip = 7,
17};
18
19// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header {
22 union {
23 BitField<0, 5, u32> sph_type;
24 BitField<5, 5, u32> version;
25 BitField<10, 4, u32> shader_type;
26 BitField<14, 1, u32> mrt_enable;
27 BitField<15, 1, u32> kills_pixels;
28 BitField<16, 1, u32> does_global_store;
29 BitField<17, 4, u32> sass_version;
30 BitField<21, 5, u32> reserved;
31 BitField<26, 1, u32> does_load_or_store;
32 BitField<27, 1, u32> does_fp64;
33 BitField<28, 4, u32> stream_out_mask;
34 } common0;
35
36 union {
37 BitField<0, 24, u32> shader_local_memory_low_size;
38 BitField<24, 8, u32> per_patch_attribute_count;
39 } common1;
40
41 union {
42 BitField<0, 24, u32> shader_local_memory_high_size;
43 BitField<24, 8, u32> threads_per_input_primitive;
44 } common2;
45
46 union {
47 BitField<0, 24, u32> shader_local_memory_crs_size;
48 BitField<24, 4, OutputTopology> output_topology;
49 BitField<28, 4, u32> reserved;
50 } common3;
51
52 union {
53 BitField<0, 12, u32> max_output_vertices;
54 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
55 BitField<24, 4, u32> reserved;
56 BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
57 } common4;
58
59 union {
60 struct {
61 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
64 INSERT_PADDING_BYTES(2); // ImapColor
65 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
66 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
67 INSERT_PADDING_BYTES(1); // ImapReserved
68 INSERT_PADDING_BYTES(3); // OmapSystemValuesA
69 INSERT_PADDING_BYTES(1); // OmapSystemValuesB
70 INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
71 INSERT_PADDING_BYTES(2); // OmapColor
72 INSERT_PADDING_BYTES(2); // OmapSystemValuesC
73 INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
74 INSERT_PADDING_BYTES(1); // OmapReserved
75 } vtg;
76
77 struct {
78 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
79 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
80 INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
81 INSERT_PADDING_BYTES(2); // ImapColor
82 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
83 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
84 INSERT_PADDING_BYTES(2); // ImapReserved
85 struct {
86 u32 target;
87 union {
88 BitField<0, 1, u32> sample_mask;
89 BitField<1, 1, u32> depth;
90 BitField<2, 30, u32> reserved;
91 };
92 } omap;
93 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
94 const u32 bit = render_target * 4 + component;
95 return omap.target & (1 << bit);
96 }
97 } ps;
98 };
99};
100
101static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
102
103} // namespace Tegra::Shader
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 7329ca766..5cc1e19ca 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 {
42 R32_UINT = 0xE4, 42 R32_UINT = 0xE4,
43 R32_FLOAT = 0xE5, 43 R32_FLOAT = 0xE5,
44 B5G6R5_UNORM = 0xE8, 44 B5G6R5_UNORM = 0xE8,
45 BGR5A1_UNORM = 0xE9,
45 RG8_UNORM = 0xEA, 46 RG8_UNORM = 0xEA,
46 RG8_SNORM = 0xEB, 47 RG8_SNORM = 0xEB,
47 R16_UNORM = 0xEE, 48 R16_UNORM = 0xEE,
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 7d836b816..cee0baaf3 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -152,7 +152,7 @@ private:
152 boost::optional<u32> 152 boost::optional<u32>
153 delayed_pc; ///< Program counter to execute at after the delay slot is executed. 153 delayed_pc; ///< Program counter to execute at after the delay slot is executed.
154 154
155 static constexpr size_t NumMacroRegisters = 8; 155 static constexpr std::size_t NumMacroRegisters = 8;
156 156
157 /// General purpose macro registers. 157 /// General purpose macro registers.
158 std::array<u32, NumMacroRegisters> registers = {}; 158 std::array<u32, NumMacroRegisters> registers = {};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0b5d18bcb..578aca789 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -12,10 +12,10 @@
12 12
13namespace OpenGL { 13namespace OpenGL {
14 14
15OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} 15OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
16 16
17GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment, 17GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
18 bool cache) { 18 std::size_t alignment, bool cache) {
19 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 19 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
20 const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; 20 const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
21 21
@@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz
53 return uploaded_offset; 53 return uploaded_offset;
54} 54}
55 55
56GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) { 56GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
57 std::size_t alignment) {
57 AlignBuffer(alignment); 58 AlignBuffer(alignment);
58 std::memcpy(buffer_ptr, raw_pointer, size); 59 std::memcpy(buffer_ptr, raw_pointer, size);
59 GLintptr uploaded_offset = buffer_offset; 60 GLintptr uploaded_offset = buffer_offset;
@@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size,
63 return uploaded_offset; 64 return uploaded_offset;
64} 65}
65 66
66void OGLBufferCache::Map(size_t max_size) { 67void OGLBufferCache::Map(std::size_t max_size) {
67 bool invalidate; 68 bool invalidate;
68 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 69 std::tie(buffer_ptr, buffer_offset_base, invalidate) =
69 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); 70 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const {
81 return stream_buffer.GetHandle(); 82 return stream_buffer.GetHandle();
82} 83}
83 84
84void OGLBufferCache::AlignBuffer(size_t alignment) { 85void OGLBufferCache::AlignBuffer(std::size_t alignment) {
85 // Align the offset, not the mapped pointer 86 // Align the offset, not the mapped pointer
86 GLintptr offset_aligned = 87 GLintptr offset_aligned =
87 static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); 88 static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
88 buffer_ptr += offset_aligned - buffer_offset; 89 buffer_ptr += offset_aligned - buffer_offset;
89 buffer_offset = offset_aligned; 90 buffer_offset = offset_aligned;
90} 91}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 6da862902..6c18461f4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -19,32 +19,32 @@ struct CachedBufferEntry final {
19 return addr; 19 return addr;
20 } 20 }
21 21
22 size_t GetSizeInBytes() const { 22 std::size_t GetSizeInBytes() const {
23 return size; 23 return size;
24 } 24 }
25 25
26 VAddr addr; 26 VAddr addr;
27 size_t size; 27 std::size_t size;
28 GLintptr offset; 28 GLintptr offset;
29 size_t alignment; 29 std::size_t alignment;
30}; 30};
31 31
32class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 32class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
33public: 33public:
34 explicit OGLBufferCache(size_t size); 34 explicit OGLBufferCache(std::size_t size);
35 35
36 GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4, 36 GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
37 bool cache = true); 37 bool cache = true);
38 38
39 GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4); 39 GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
40 40
41 void Map(size_t max_size); 41 void Map(std::size_t max_size);
42 void Unmap(); 42 void Unmap();
43 43
44 GLuint GetHandle() const; 44 GLuint GetHandle() const;
45 45
46protected: 46protected:
47 void AlignBuffer(size_t alignment); 47 void AlignBuffer(std::size_t alignment);
48 48
49private: 49private:
50 OGLStreamBuffer stream_buffer; 50 OGLStreamBuffer stream_buffer;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7e1bba67d..274c2dbcf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -46,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100,
46RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) 46RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
47 : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { 47 : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
48 // Create sampler objects 48 // Create sampler objects
49 for (size_t i = 0; i < texture_samplers.size(); ++i) { 49 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
50 texture_samplers[i].Create(); 50 texture_samplers[i].Create();
51 state.texture_units[i].sampler = texture_samplers[i].sampler.handle; 51 state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
52 } 52 }
@@ -181,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() {
181 u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; 181 u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
182 u32 current_texture_bindpoint = 0; 182 u32 current_texture_bindpoint = 0;
183 183
184 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 184 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
185 const auto& shader_config = gpu.regs.shader_config[index]; 185 const auto& shader_config = gpu.regs.shader_config[index];
186 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 186 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
187 187
@@ -190,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() {
190 continue; 190 continue;
191 } 191 }
192 192
193 const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 193 const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
194 194
195 GLShader::MaxwellUniformData ubo{}; 195 GLShader::MaxwellUniformData ubo{};
196 ubo.SetFromRegs(gpu.state.shader_stages[stage]); 196 ubo.SetFromRegs(gpu.state.shader_stages[stage]);
197 const GLintptr offset = buffer_cache.UploadHostMemory( 197 const GLintptr offset = buffer_cache.UploadHostMemory(
198 &ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment)); 198 &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
199 199
200 // Bind the buffer 200 // Bind the buffer
201 glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo)); 201 glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
@@ -238,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() {
238 shader_program_manager->UseTrivialGeometryShader(); 238 shader_program_manager->UseTrivialGeometryShader();
239} 239}
240 240
241size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 241std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
242 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 242 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
243 243
244 size_t size = 0; 244 std::size_t size = 0;
245 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 245 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
246 if (!regs.vertex_array[index].IsEnabled()) 246 if (!regs.vertex_array[index].IsEnabled())
247 continue; 247 continue;
@@ -299,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
299 299
300void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, 300void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
301 bool preserve_contents, 301 bool preserve_contents,
302 boost::optional<size_t> single_color_target) { 302 boost::optional<std::size_t> single_color_target) {
303 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 303 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
304 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 304 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
305 305
@@ -330,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
330 } else { 330 } else {
331 // Multiple color attachments are enabled 331 // Multiple color attachments are enabled
332 std::array<GLenum, Maxwell::NumRenderTargets> buffers; 332 std::array<GLenum, Maxwell::NumRenderTargets> buffers;
333 for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { 333 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
334 Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); 334 Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
335 buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); 335 buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
336 glFramebufferTexture2D( 336 glFramebufferTexture2D(
@@ -342,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
342 } 342 }
343 } else { 343 } else {
344 // No color attachments are enabled - zero out all of them 344 // No color attachments are enabled - zero out all of them
345 for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { 345 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
346 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, 346 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
347 GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D, 347 GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
348 0, 0); 348 0, 0);
@@ -462,15 +462,15 @@ void RasterizerOpenGL::DrawArrays() {
462 state.draw.vertex_buffer = buffer_cache.GetHandle(); 462 state.draw.vertex_buffer = buffer_cache.GetHandle();
463 state.Apply(); 463 state.Apply();
464 464
465 size_t buffer_size = CalculateVertexArraysSize(); 465 std::size_t buffer_size = CalculateVertexArraysSize();
466 466
467 if (is_indexed) { 467 if (is_indexed) {
468 buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; 468 buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
469 } 469 }
470 470
471 // Uniform space for the 5 shader stages 471 // Uniform space for the 5 shader stages
472 buffer_size = 472 buffer_size =
473 Common::AlignUp<size_t>(buffer_size, 4) + 473 Common::AlignUp<std::size_t>(buffer_size, 4) +
474 (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; 474 (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
475 475
476 // Add space for at least 18 constant buffers 476 // Add space for at least 18 constant buffers
@@ -644,7 +644,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
644 MICROPROFILE_SCOPE(OpenGL_UBO); 644 MICROPROFILE_SCOPE(OpenGL_UBO);
645 const auto& gpu = Core::System::GetInstance().GPU(); 645 const auto& gpu = Core::System::GetInstance().GPU();
646 const auto& maxwell3d = gpu.Maxwell3D(); 646 const auto& maxwell3d = gpu.Maxwell3D();
647 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; 647 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
648 const auto& entries = shader->GetShaderEntries().const_buffer_entries; 648 const auto& entries = shader->GetShaderEntries().const_buffer_entries;
649 649
650 constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; 650 constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
@@ -667,7 +667,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
667 continue; 667 continue;
668 } 668 }
669 669
670 size_t size = 0; 670 std::size_t size = 0;
671 671
672 if (used_buffer.IsIndirect()) { 672 if (used_buffer.IsIndirect()) {
673 // Buffer is accessed indirectly, so upload the entire thing 673 // Buffer is accessed indirectly, so upload the entire thing
@@ -689,7 +689,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
689 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); 689 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
690 690
691 GLintptr const_buffer_offset = buffer_cache.UploadMemory( 691 GLintptr const_buffer_offset = buffer_cache.UploadMemory(
692 buffer.address, size, static_cast<size_t>(uniform_buffer_alignment)); 692 buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
693 693
694 // Now configure the bindpoint of the buffer inside the shader 694 // Now configure the bindpoint of the buffer inside the shader
695 glUniformBlockBinding(shader->GetProgramHandle(), 695 glUniformBlockBinding(shader->GetProgramHandle(),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 163412882..bf9560bdc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -73,7 +73,7 @@ public:
73 }; 73 };
74 74
75 /// Maximum supported size that a constbuffer can have in bytes. 75 /// Maximum supported size that a constbuffer can have in bytes.
76 static constexpr size_t MaxConstbufferSize = 0x10000; 76 static constexpr std::size_t MaxConstbufferSize = 0x10000;
77 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, 77 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
78 "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); 78 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
79 79
@@ -106,7 +106,7 @@ private:
106 */ 106 */
107 void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, 107 void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
108 bool preserve_contents = true, 108 bool preserve_contents = true,
109 boost::optional<size_t> single_color_target = {}); 109 boost::optional<std::size_t> single_color_target = {});
110 110
111 /* 111 /*
112 * Configures the current constbuffers to use for the draw command. 112 * Configures the current constbuffers to use for the draw command.
@@ -180,12 +180,12 @@ private:
180 180
181 std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; 181 std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
182 182
183 static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 183 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
184 OGLBufferCache buffer_cache; 184 OGLBufferCache buffer_cache;
185 OGLFramebuffer framebuffer; 185 OGLFramebuffer framebuffer;
186 GLint uniform_buffer_alignment; 186 GLint uniform_buffer_alignment;
187 187
188 size_t CalculateVertexArraysSize() const; 188 std::size_t CalculateVertexArraysSize() const;
189 189
190 void SetupVertexArrays(); 190 void SetupVertexArrays();
191 191
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 32001e44b..86682d7cb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -75,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
75 return params; 75 return params;
76} 76}
77 77
78/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) { 78/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
79 const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; 79 const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
80 SurfaceParams params{}; 80 SurfaceParams params{};
81 params.addr = TryGetCpuAddr(config.Address()); 81 params.addr = TryGetCpuAddr(config.Address());
@@ -167,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
167 {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S 167 {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
168 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI 168 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
169 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI 169 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
170 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
170 171
171 // Depth formats 172 // Depth formats
172 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F 173 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -203,7 +204,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
203} 204}
204 205
205static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 206static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
206 ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); 207 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
207 auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; 208 auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
208 ASSERT(component_type == format.component_type); 209 ASSERT(component_type == format.component_type);
209 210
@@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
213static bool IsPixelFormatASTC(PixelFormat format) { 214static bool IsPixelFormatASTC(PixelFormat format) {
214 switch (format) { 215 switch (format) {
215 case PixelFormat::ASTC_2D_4X4: 216 case PixelFormat::ASTC_2D_4X4:
217 case PixelFormat::ASTC_2D_8X8:
216 return true; 218 return true;
217 default: 219 default:
218 return false; 220 return false;
@@ -223,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
223 switch (format) { 225 switch (format) {
224 case PixelFormat::ASTC_2D_4X4: 226 case PixelFormat::ASTC_2D_4X4:
225 return {4, 4}; 227 return {4, 4};
228 case PixelFormat::ASTC_2D_8X8:
229 return {8, 8};
226 default: 230 default:
227 LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); 231 LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
228 UNREACHABLE(); 232 UNREACHABLE();
@@ -256,7 +260,7 @@ static bool IsFormatBCn(PixelFormat format) {
256} 260}
257 261
258template <bool morton_to_gl, PixelFormat format> 262template <bool morton_to_gl, PixelFormat format>
259void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size, 263void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size,
260 VAddr addr) { 264 VAddr addr) {
261 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; 265 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
262 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); 266 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -267,7 +271,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
267 const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; 271 const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
268 const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( 272 const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
269 addr, tile_size, bytes_per_pixel, stride, height, block_height); 273 addr, tile_size, bytes_per_pixel, stride, height, block_height);
270 const size_t size_to_copy{std::min(gl_buffer_size, data.size())}; 274 const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
271 memcpy(gl_buffer, data.data(), size_to_copy); 275 memcpy(gl_buffer, data.data(), size_to_copy);
272 } else { 276 } else {
273 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should 277 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
@@ -278,7 +282,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
278 } 282 }
279} 283}
280 284
281static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), 285static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
282 SurfaceParams::MaxPixelFormat> 286 SurfaceParams::MaxPixelFormat>
283 morton_to_gl_fns = { 287 morton_to_gl_fns = {
284 // clang-format off 288 // clang-format off
@@ -327,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
327 MortonCopy<true, PixelFormat::RG8S>, 331 MortonCopy<true, PixelFormat::RG8S>,
328 MortonCopy<true, PixelFormat::RG32UI>, 332 MortonCopy<true, PixelFormat::RG32UI>,
329 MortonCopy<true, PixelFormat::R32UI>, 333 MortonCopy<true, PixelFormat::R32UI>,
334 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
330 MortonCopy<true, PixelFormat::Z32F>, 335 MortonCopy<true, PixelFormat::Z32F>,
331 MortonCopy<true, PixelFormat::Z16>, 336 MortonCopy<true, PixelFormat::Z16>,
332 MortonCopy<true, PixelFormat::Z24S8>, 337 MortonCopy<true, PixelFormat::Z24S8>,
@@ -335,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
335 // clang-format on 340 // clang-format on
336}; 341};
337 342
338static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), 343static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
339 SurfaceParams::MaxPixelFormat> 344 SurfaceParams::MaxPixelFormat>
340 gl_to_morton_fns = { 345 gl_to_morton_fns = {
341 // clang-format off 346 // clang-format off
@@ -386,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
386 MortonCopy<false, PixelFormat::RG8S>, 391 MortonCopy<false, PixelFormat::RG8S>,
387 MortonCopy<false, PixelFormat::RG32UI>, 392 MortonCopy<false, PixelFormat::RG32UI>,
388 MortonCopy<false, PixelFormat::R32UI>, 393 MortonCopy<false, PixelFormat::R32UI>,
394 nullptr,
389 MortonCopy<false, PixelFormat::Z32F>, 395 MortonCopy<false, PixelFormat::Z32F>,
390 MortonCopy<false, PixelFormat::Z16>, 396 MortonCopy<false, PixelFormat::Z16>,
391 MortonCopy<false, PixelFormat::Z24S8>, 397 MortonCopy<false, PixelFormat::Z24S8>,
@@ -513,9 +519,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
513 S8Z24 input_pixel{}; 519 S8Z24 input_pixel{};
514 Z24S8 output_pixel{}; 520 Z24S8 output_pixel{};
515 constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; 521 constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
516 for (size_t y = 0; y < height; ++y) { 522 for (std::size_t y = 0; y < height; ++y) {
517 for (size_t x = 0; x < width; ++x) { 523 for (std::size_t x = 0; x < width; ++x) {
518 const size_t offset{bpp * (y * width + x)}; 524 const std::size_t offset{bpp * (y * width + x)};
519 std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); 525 std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
520 output_pixel.s8.Assign(input_pixel.s8); 526 output_pixel.s8.Assign(input_pixel.s8);
521 output_pixel.z24.Assign(input_pixel.z24); 527 output_pixel.z24.Assign(input_pixel.z24);
@@ -526,9 +532,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
526 532
527static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { 533static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
528 constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; 534 constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
529 for (size_t y = 0; y < height; ++y) { 535 for (std::size_t y = 0; y < height; ++y) {
530 for (size_t x = 0; x < width; ++x) { 536 for (std::size_t x = 0; x < width; ++x) {
531 const size_t offset{bpp * (y * width + x)}; 537 const std::size_t offset{bpp * (y * width + x)};
532 const u8 temp{data[offset]}; 538 const u8 temp{data[offset]};
533 data[offset] = data[offset + 1]; 539 data[offset] = data[offset + 1];
534 data[offset + 1] = temp; 540 data[offset + 1] = temp;
@@ -544,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
544static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, 550static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
545 u32 width, u32 height) { 551 u32 width, u32 height) {
546 switch (pixel_format) { 552 switch (pixel_format) {
547 case PixelFormat::ASTC_2D_4X4: { 553 case PixelFormat::ASTC_2D_4X4:
554 case PixelFormat::ASTC_2D_8X8: {
548 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. 555 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
549 u32 block_width{}; 556 u32 block_width{};
550 u32 block_height{}; 557 u32 block_height{};
@@ -591,13 +598,13 @@ void CachedSurface::LoadGLBuffer() {
591 UNREACHABLE(); 598 UNREACHABLE();
592 } 599 }
593 600
594 gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); 601 gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
595 morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( 602 morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
596 params.width, params.block_height, params.height, gl_buffer.data(), copy_size, 603 params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
597 params.addr); 604 params.addr);
598 } else { 605 } else {
599 const u8* const texture_src_data_end{texture_src_data + 606 const u8* const texture_src_data_end{texture_src_data +
600 (static_cast<size_t>(params.depth) * copy_size)}; 607 (static_cast<std::size_t>(params.depth) * copy_size)};
601 gl_buffer.assign(texture_src_data, texture_src_data_end); 608 gl_buffer.assign(texture_src_data, texture_src_data_end);
602 } 609 }
603 610
@@ -616,7 +623,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
616 623
617 MICROPROFILE_SCOPE(OpenGL_TextureUL); 624 MICROPROFILE_SCOPE(OpenGL_TextureUL);
618 625
619 ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height * 626 ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *
620 GetGLBytesPerPixel(params.pixel_format) * params.depth); 627 GetGLBytesPerPixel(params.pixel_format) * params.depth);
621 628
622 const auto& rect{params.GetRect()}; 629 const auto& rect{params.GetRect()};
@@ -624,8 +631,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
624 // Load data from memory to the surface 631 // Load data from memory to the surface
625 const GLint x0 = static_cast<GLint>(rect.left); 632 const GLint x0 = static_cast<GLint>(rect.left);
626 const GLint y0 = static_cast<GLint>(rect.bottom); 633 const GLint y0 = static_cast<GLint>(rect.bottom);
627 const size_t buffer_offset = 634 const std::size_t buffer_offset =
628 static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * 635 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
636 static_cast<std::size_t>(x0)) *
629 GetGLBytesPerPixel(params.pixel_format); 637 GetGLBytesPerPixel(params.pixel_format);
630 638
631 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); 639 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -727,7 +735,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
727 return GetSurface(depth_params, preserve_contents); 735 return GetSurface(depth_params, preserve_contents);
728} 736}
729 737
730Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) { 738Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
731 const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs}; 739 const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
732 740
733 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 741 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
@@ -825,7 +833,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
825 auto source_format = GetFormatTuple(params.pixel_format, params.component_type); 833 auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
826 auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); 834 auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
827 835
828 size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); 836 std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
829 837
830 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); 838 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
831 glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); 839 glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
@@ -849,7 +857,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
849 LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " 857 LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
850 "reinterpretation but the texture is tiled."); 858 "reinterpretation but the texture is tiled.");
851 } 859 }
852 size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); 860 std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
853 std::vector<u8> data(remaining_size); 861 std::vector<u8> data(remaining_size);
854 Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); 862 Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
855 glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, 863 glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 57ea8593b..d7a4bc37f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -70,19 +70,20 @@ struct SurfaceParams {
70 RG8S = 42, 70 RG8S = 42,
71 RG32UI = 43, 71 RG32UI = 43,
72 R32UI = 44, 72 R32UI = 44,
73 ASTC_2D_8X8 = 45,
73 74
74 MaxColorFormat, 75 MaxColorFormat,
75 76
76 // Depth formats 77 // Depth formats
77 Z32F = 45, 78 Z32F = 46,
78 Z16 = 46, 79 Z16 = 47,
79 80
80 MaxDepthFormat, 81 MaxDepthFormat,
81 82
82 // DepthStencil formats 83 // DepthStencil formats
83 Z24S8 = 47, 84 Z24S8 = 48,
84 S8Z24 = 48, 85 S8Z24 = 49,
85 Z32FS8 = 49, 86 Z32FS8 = 50,
86 87
87 MaxDepthStencilFormat, 88 MaxDepthStencilFormat,
88 89
@@ -90,7 +91,7 @@ struct SurfaceParams {
90 Invalid = 255, 91 Invalid = 255,
91 }; 92 };
92 93
93 static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max); 94 static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
94 95
95 enum class ComponentType { 96 enum class ComponentType {
96 Invalid = 0, 97 Invalid = 0,
@@ -192,6 +193,7 @@ struct SurfaceParams {
192 1, // RG8S 193 1, // RG8S
193 1, // RG32UI 194 1, // RG32UI
194 1, // R32UI 195 1, // R32UI
196 4, // ASTC_2D_8X8
195 1, // Z32F 197 1, // Z32F
196 1, // Z16 198 1, // Z16
197 1, // Z24S8 199 1, // Z24S8
@@ -199,8 +201,8 @@ struct SurfaceParams {
199 1, // Z32FS8 201 1, // Z32FS8
200 }}; 202 }};
201 203
202 ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); 204 ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
203 return compression_factor_table[static_cast<size_t>(format)]; 205 return compression_factor_table[static_cast<std::size_t>(format)];
204 } 206 }
205 207
206 static constexpr u32 GetFormatBpp(PixelFormat format) { 208 static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -253,6 +255,7 @@ struct SurfaceParams {
253 16, // RG8S 255 16, // RG8S
254 64, // RG32UI 256 64, // RG32UI
255 32, // R32UI 257 32, // R32UI
258 16, // ASTC_2D_8X8
256 32, // Z32F 259 32, // Z32F
257 16, // Z16 260 16, // Z16
258 32, // Z24S8 261 32, // Z24S8
@@ -260,8 +263,8 @@ struct SurfaceParams {
260 64, // Z32FS8 263 64, // Z32FS8
261 }}; 264 }};
262 265
263 ASSERT(static_cast<size_t>(format) < bpp_table.size()); 266 ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
264 return bpp_table[static_cast<size_t>(format)]; 267 return bpp_table[static_cast<std::size_t>(format)];
265 } 268 }
266 269
267 u32 GetFormatBpp() const { 270 u32 GetFormatBpp() const {
@@ -316,6 +319,8 @@ struct SurfaceParams {
316 return PixelFormat::R11FG11FB10F; 319 return PixelFormat::R11FG11FB10F;
317 case Tegra::RenderTargetFormat::B5G6R5_UNORM: 320 case Tegra::RenderTargetFormat::B5G6R5_UNORM:
318 return PixelFormat::B5G6R5U; 321 return PixelFormat::B5G6R5U;
322 case Tegra::RenderTargetFormat::BGR5A1_UNORM:
323 return PixelFormat::A1B5G5R5U;
319 case Tegra::RenderTargetFormat::RGBA32_UINT: 324 case Tegra::RenderTargetFormat::RGBA32_UINT:
320 return PixelFormat::RGBA32UI; 325 return PixelFormat::RGBA32UI;
321 case Tegra::RenderTargetFormat::R8_UNORM: 326 case Tegra::RenderTargetFormat::R8_UNORM:
@@ -522,6 +527,8 @@ struct SurfaceParams {
522 return PixelFormat::BC6H_SF16; 527 return PixelFormat::BC6H_SF16;
523 case Tegra::Texture::TextureFormat::ASTC_2D_4X4: 528 case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
524 return PixelFormat::ASTC_2D_4X4; 529 return PixelFormat::ASTC_2D_4X4;
530 case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
531 return PixelFormat::ASTC_2D_8X8;
525 case Tegra::Texture::TextureFormat::R16_G16: 532 case Tegra::Texture::TextureFormat::R16_G16:
526 switch (component_type) { 533 switch (component_type) {
527 case Tegra::Texture::ComponentType::FLOAT: 534 case Tegra::Texture::ComponentType::FLOAT:
@@ -576,6 +583,7 @@ struct SurfaceParams {
576 case Tegra::RenderTargetFormat::RG16_UNORM: 583 case Tegra::RenderTargetFormat::RG16_UNORM:
577 case Tegra::RenderTargetFormat::R16_UNORM: 584 case Tegra::RenderTargetFormat::R16_UNORM:
578 case Tegra::RenderTargetFormat::B5G6R5_UNORM: 585 case Tegra::RenderTargetFormat::B5G6R5_UNORM:
586 case Tegra::RenderTargetFormat::BGR5A1_UNORM:
579 case Tegra::RenderTargetFormat::RG8_UNORM: 587 case Tegra::RenderTargetFormat::RG8_UNORM:
580 case Tegra::RenderTargetFormat::RGBA16_UNORM: 588 case Tegra::RenderTargetFormat::RGBA16_UNORM:
581 return ComponentType::UNorm; 589 return ComponentType::UNorm;
@@ -636,16 +644,18 @@ struct SurfaceParams {
636 } 644 }
637 645
638 static SurfaceType GetFormatType(PixelFormat pixel_format) { 646 static SurfaceType GetFormatType(PixelFormat pixel_format) {
639 if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) { 647 if (static_cast<std::size_t>(pixel_format) <
648 static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
640 return SurfaceType::ColorTexture; 649 return SurfaceType::ColorTexture;
641 } 650 }
642 651
643 if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) { 652 if (static_cast<std::size_t>(pixel_format) <
653 static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
644 return SurfaceType::Depth; 654 return SurfaceType::Depth;
645 } 655 }
646 656
647 if (static_cast<size_t>(pixel_format) < 657 if (static_cast<std::size_t>(pixel_format) <
648 static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) { 658 static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
649 return SurfaceType::DepthStencil; 659 return SurfaceType::DepthStencil;
650 } 660 }
651 661
@@ -659,7 +669,7 @@ struct SurfaceParams {
659 MathUtil::Rectangle<u32> GetRect() const; 669 MathUtil::Rectangle<u32> GetRect() const;
660 670
661 /// Returns the size of this surface in bytes, adjusted for compression 671 /// Returns the size of this surface in bytes, adjusted for compression
662 size_t SizeInBytes() const { 672 std::size_t SizeInBytes() const {
663 const u32 compression_factor{GetCompressionFactor(pixel_format)}; 673 const u32 compression_factor{GetCompressionFactor(pixel_format)};
664 ASSERT(width % compression_factor == 0); 674 ASSERT(width % compression_factor == 0);
665 ASSERT(height % compression_factor == 0); 675 ASSERT(height % compression_factor == 0);
@@ -671,7 +681,7 @@ struct SurfaceParams {
671 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); 681 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
672 682
673 /// Creates SurfaceParams from a framebuffer configuration 683 /// Creates SurfaceParams from a framebuffer configuration
674 static SurfaceParams CreateForFramebuffer(size_t index); 684 static SurfaceParams CreateForFramebuffer(std::size_t index);
675 685
676 /// Creates SurfaceParams for a depth buffer configuration 686 /// Creates SurfaceParams for a depth buffer configuration
677 static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, 687 static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -694,7 +704,7 @@ struct SurfaceParams {
694 u32 height; 704 u32 height;
695 u32 depth; 705 u32 depth;
696 u32 unaligned_height; 706 u32 unaligned_height;
697 size_t size_in_bytes; 707 std::size_t size_in_bytes;
698 SurfaceTarget target; 708 SurfaceTarget target;
699}; 709};
700 710
@@ -711,7 +721,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
711namespace std { 721namespace std {
712template <> 722template <>
713struct hash<SurfaceReserveKey> { 723struct hash<SurfaceReserveKey> {
714 size_t operator()(const SurfaceReserveKey& k) const { 724 std::size_t operator()(const SurfaceReserveKey& k) const {
715 return k.Hash(); 725 return k.Hash();
716 } 726 }
717}; 727};
@@ -727,7 +737,7 @@ public:
727 return params.addr; 737 return params.addr;
728 } 738 }
729 739
730 size_t GetSizeInBytes() const { 740 std::size_t GetSizeInBytes() const {
731 return params.size_in_bytes; 741 return params.size_in_bytes;
732 } 742 }
733 743
@@ -775,7 +785,7 @@ public:
775 Surface GetDepthBufferSurface(bool preserve_contents); 785 Surface GetDepthBufferSurface(bool preserve_contents);
776 786
777 /// Get the color surface based on the framebuffer configuration and the specified render target 787 /// Get the color surface based on the framebuffer configuration and the specified render target
778 Surface GetColorBufferSurface(size_t index, bool preserve_contents); 788 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
779 789
780 /// Flushes the surface to Switch memory 790 /// Flushes the surface to Switch memory
781 void FlushSurface(const Surface& surface); 791 void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 61080f5cc..894fe6eae 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -14,7 +14,7 @@ namespace OpenGL {
14/// Gets the address for the specified shader stage program 14/// Gets the address for the specified shader stage program
15static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { 15static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
16 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 16 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
17 const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; 17 const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
18 return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + 18 return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
19 shader_config.offset); 19 shader_config.offset);
20} 20}
@@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) {
28 28
29/// Helper function to set shader uniform block bindings for a single shader stage 29/// Helper function to set shader uniform block bindings for a single shader stage
30static void SetShaderUniformBlockBinding(GLuint shader, const char* name, 30static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
31 Maxwell::ShaderStage binding, size_t expected_size) { 31 Maxwell::ShaderStage binding, std::size_t expected_size) {
32 const GLuint ub_index = glGetUniformBlockIndex(shader, name); 32 const GLuint ub_index = glGetUniformBlockIndex(shader, name);
33 if (ub_index == GL_INVALID_INDEX) { 33 if (ub_index == GL_INVALID_INDEX) {
34 return; 34 return;
@@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
36 36
37 GLint ub_size = 0; 37 GLint ub_size = 0;
38 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); 38 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
39 ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size, 39 ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
40 "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); 40 "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
41 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); 41 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
42} 42}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6e6febcbc..9bafe43a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -28,7 +28,7 @@ public:
28 } 28 }
29 29
30 /// Gets the size of the shader in guest memory, required for cache management 30 /// Gets the size of the shader in guest memory, required for cache management
31 size_t GetSizeInBytes() const { 31 std::size_t GetSizeInBytes() const {
32 return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64); 32 return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
33 } 33 }
34 34
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7a5321b9c..00cd05e62 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
12#include "common/assert.h" 12#include "common/assert.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/shader_bytecode.h" 14#include "video_core/engines/shader_bytecode.h"
15#include "video_core/engines/shader_header.h"
15#include "video_core/renderer_opengl/gl_rasterizer.h" 16#include "video_core/renderer_opengl/gl_rasterizer.h"
16#include "video_core/renderer_opengl/gl_shader_decompiler.h" 17#include "video_core/renderer_opengl/gl_shader_decompiler.h"
17 18
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
26using Tegra::Shader::SubOp; 27using Tegra::Shader::SubOp;
27 28
28constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; 29constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
29constexpr u32 PROGRAM_HEADER_SIZE = 0x50; 30constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
30 31
31class DecompileFail : public std::runtime_error { 32class DecompileFail : public std::runtime_error {
32public: 33public:
@@ -189,7 +190,7 @@ public:
189 190
190private: 191private:
191 void AppendIndentation() { 192 void AppendIndentation() {
192 shader_source.append(static_cast<size_t>(scope) * 4, ' '); 193 shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
193 } 194 }
194 195
195 std::string shader_source; 196 std::string shader_source;
@@ -208,7 +209,7 @@ public:
208 UnsignedInteger, 209 UnsignedInteger,
209 }; 210 };
210 211
211 GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} 212 GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
212 213
213 /// Gets the GLSL type string for a register 214 /// Gets the GLSL type string for a register
214 static std::string GetTypeString() { 215 static std::string GetTypeString() {
@@ -226,15 +227,23 @@ public:
226 } 227 }
227 228
228 /// Returns the index of the register 229 /// Returns the index of the register
229 size_t GetIndex() const { 230 std::size_t GetIndex() const {
230 return index; 231 return index;
231 } 232 }
232 233
233private: 234private:
234 const size_t index; 235 const std::size_t index;
235 const std::string& suffix; 236 const std::string& suffix;
236}; 237};
237 238
239enum class InternalFlag : u64 {
240 ZeroFlag = 0,
241 CarryFlag = 1,
242 OverflowFlag = 2,
243 NaNFlag = 3,
244 Amount
245};
246
238/** 247/**
239 * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state 248 * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
240 * of all registers (e.g. whether they are currently being used as Floats or Integers), and 249 * of all registers (e.g. whether they are currently being used as Floats or Integers), and
@@ -328,13 +337,19 @@ public:
328 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, 337 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
329 const std::string& value, u64 dest_num_components, 338 const std::string& value, u64 dest_num_components,
330 u64 value_num_components, bool is_saturated = false, 339 u64 value_num_components, bool is_saturated = false,
331 u64 dest_elem = 0, Register::Size size = Register::Size::Word) { 340 u64 dest_elem = 0, Register::Size size = Register::Size::Word,
341 bool sets_cc = false) {
332 ASSERT_MSG(!is_saturated, "Unimplemented"); 342 ASSERT_MSG(!is_saturated, "Unimplemented");
333 343
334 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; 344 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
335 345
336 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', 346 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
337 dest_num_components, value_num_components, dest_elem); 347 dest_num_components, value_num_components, dest_elem);
348
349 if (sets_cc) {
350 const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
351 SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
352 }
338 } 353 }
339 354
340 /** 355 /**
@@ -351,6 +366,26 @@ public:
351 shader.AddLine(dest + " = " + src + ';'); 366 shader.AddLine(dest + " = " + src + ';');
352 } 367 }
353 368
369 std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
370 switch (cc) {
371 case Tegra::Shader::ControlCode::NEU:
372 return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
373 default:
374 LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc));
375 UNREACHABLE();
376 return "false";
377 }
378 }
379
380 std::string GetInternalFlag(const InternalFlag ii) const {
381 const u32 code = static_cast<u32>(ii);
382 return "internalFlag_" + std::to_string(code) + suffix;
383 }
384
385 void SetInternalFlag(const InternalFlag ii, const std::string& value) const {
386 shader.AddLine(GetInternalFlag(ii) + " = " + value + ';');
387 }
388
354 /** 389 /**
355 * Writes code that does a output attribute assignment to register operation. Output attributes 390 * Writes code that does a output attribute assignment to register operation. Output attributes
356 * are stored as floats, so this may require conversion. 391 * are stored as floats, so this may require conversion.
@@ -414,6 +449,12 @@ public:
414 } 449 }
415 declarations.AddNewLine(); 450 declarations.AddNewLine();
416 451
452 for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
453 const InternalFlag code = static_cast<InternalFlag>(ii);
454 declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
455 }
456 declarations.AddNewLine();
457
417 for (const auto element : declr_input_attribute) { 458 for (const auto element : declr_input_attribute) {
418 // TODO(bunnei): Use proper number of elements for these 459 // TODO(bunnei): Use proper number of elements for these
419 u32 idx = 460 u32 idx =
@@ -468,7 +509,7 @@ public:
468 /// necessary. 509 /// necessary.
469 std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, 510 std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
470 bool is_array) { 511 bool is_array) {
471 const size_t offset = static_cast<size_t>(sampler.index.Value()); 512 const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
472 513
473 // If this sampler has already been used, return the existing mapping. 514 // If this sampler has already been used, return the existing mapping.
474 const auto itr = 515 const auto itr =
@@ -481,7 +522,7 @@ public:
481 } 522 }
482 523
483 // Otherwise create a new mapping for this sampler 524 // Otherwise create a new mapping for this sampler
484 const size_t next_index = used_samplers.size(); 525 const std::size_t next_index = used_samplers.size();
485 const SamplerEntry entry{stage, offset, next_index, type, is_array}; 526 const SamplerEntry entry{stage, offset, next_index, type, is_array};
486 used_samplers.emplace_back(entry); 527 used_samplers.emplace_back(entry);
487 return entry.GetName(); 528 return entry.GetName();
@@ -531,7 +572,7 @@ private:
531 void BuildRegisterList() { 572 void BuildRegisterList() {
532 regs.reserve(Register::NumRegisters); 573 regs.reserve(Register::NumRegisters);
533 574
534 for (size_t index = 0; index < Register::NumRegisters; ++index) { 575 for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
535 regs.emplace_back(index, suffix); 576 regs.emplace_back(index, suffix);
536 } 577 }
537 } 578 }
@@ -674,7 +715,7 @@ public:
674 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) 715 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
675 : subroutines(subroutines), program_code(program_code), main_offset(main_offset), 716 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
676 stage(stage), suffix(suffix) { 717 stage(stage), suffix(suffix) {
677 718 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
678 Generate(suffix); 719 Generate(suffix);
679 } 720 }
680 721
@@ -688,23 +729,6 @@ public:
688 } 729 }
689 730
690private: 731private:
691 // Shader program header for a Fragment Shader.
692 struct FragmentHeader {
693 INSERT_PADDING_WORDS(5);
694 INSERT_PADDING_WORDS(13);
695 u32 enabled_color_outputs;
696 union {
697 BitField<0, 1, u32> writes_samplemask;
698 BitField<1, 1, u32> writes_depth;
699 };
700
701 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
702 const u32 bit = render_target * 4 + component;
703 return enabled_color_outputs & (1 << bit);
704 }
705 };
706 static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
707
708 /// Gets the Subroutine object corresponding to the specified address. 732 /// Gets the Subroutine object corresponding to the specified address.
709 const Subroutine& GetSubroutine(u32 begin, u32 end) const { 733 const Subroutine& GetSubroutine(u32 begin, u32 end) const {
710 const auto iter = subroutines.find(Subroutine{begin, end, suffix}); 734 const auto iter = subroutines.find(Subroutine{begin, end, suffix});
@@ -862,7 +886,7 @@ private:
862 */ 886 */
863 bool IsSchedInstruction(u32 offset) const { 887 bool IsSchedInstruction(u32 offset) const {
864 // sched instructions appear once every 4 instructions. 888 // sched instructions appear once every 4 instructions.
865 static constexpr size_t SchedPeriod = 4; 889 static constexpr std::size_t SchedPeriod = 4;
866 u32 absolute_offset = offset - main_offset; 890 u32 absolute_offset = offset - main_offset;
867 891
868 return (absolute_offset % SchedPeriod) == 0; 892 return (absolute_offset % SchedPeriod) == 0;
@@ -930,7 +954,7 @@ private:
930 std::string result; 954 std::string result;
931 result += '('; 955 result += '(';
932 956
933 for (size_t i = 0; i < shift_amounts.size(); ++i) { 957 for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
934 if (i) 958 if (i)
935 result += '|'; 959 result += '|';
936 result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + 960 result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
@@ -954,9 +978,7 @@ private:
954 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle 978 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
955 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 979 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
956 980
957 ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented"); 981 std::size_t written_components = 0;
958
959 size_t written_components = 0;
960 for (u32 component = 0; component < 4; ++component) { 982 for (u32 component = 0; component < 4; ++component) {
961 if (!instr.texs.IsComponentEnabled(component)) { 983 if (!instr.texs.IsComponentEnabled(component)) {
962 continue; 984 continue;
@@ -1010,10 +1032,8 @@ private:
1010 /// Writes the output values from a fragment shader to the corresponding GLSL output variables. 1032 /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
1011 void EmitFragmentOutputsWrite() { 1033 void EmitFragmentOutputsWrite() {
1012 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 1034 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
1013 FragmentHeader header;
1014 std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
1015 1035
1016 ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); 1036 ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
1017 1037
1018 // Write the color outputs using the data in the shader registers, disabled 1038 // Write the color outputs using the data in the shader registers, disabled
1019 // rendertargets/components are skipped in the register assignment. 1039 // rendertargets/components are skipped in the register assignment.
@@ -1022,7 +1042,7 @@ private:
1022 ++render_target) { 1042 ++render_target) {
1023 // TODO(Subv): Figure out how dual-source blending is configured in the Switch. 1043 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1024 for (u32 component = 0; component < 4; ++component) { 1044 for (u32 component = 0; component < 4; ++component) {
1025 if (header.IsColorComponentOutputEnabled(render_target, component)) { 1045 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1026 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, 1046 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
1027 regs.GetRegisterAsFloat(current_reg))); 1047 regs.GetRegisterAsFloat(current_reg)));
1028 ++current_reg; 1048 ++current_reg;
@@ -1030,7 +1050,7 @@ private:
1030 } 1050 }
1031 } 1051 }
1032 1052
1033 if (header.writes_depth) { 1053 if (header.ps.omap.depth) {
1034 // The depth output is always 2 registers after the last color output, and current_reg 1054 // The depth output is always 2 registers after the last color output, and current_reg
1035 // already contains one past the last color register. 1055 // already contains one past the last color register.
1036 1056
@@ -1510,8 +1530,6 @@ private:
1510 case OpCode::Id::LEA_IMM: 1530 case OpCode::Id::LEA_IMM:
1511 case OpCode::Id::LEA_RZ: 1531 case OpCode::Id::LEA_RZ:
1512 case OpCode::Id::LEA_HI: { 1532 case OpCode::Id::LEA_HI: {
1513 std::string op_a;
1514 std::string op_b;
1515 std::string op_c; 1533 std::string op_c;
1516 1534
1517 switch (opcode->GetId()) { 1535 switch (opcode->GetId()) {
@@ -1642,7 +1660,8 @@ private:
1642 } 1660 }
1643 1661
1644 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1662 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1645 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); 1663 1, instr.alu.saturate_d, 0, instr.conversion.dest_size,
1664 instr.generates_cc.Value() != 0);
1646 break; 1665 break;
1647 } 1666 }
1648 case OpCode::Id::I2F_R: 1667 case OpCode::Id::I2F_R:
@@ -1781,8 +1800,8 @@ private:
1781 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, 1800 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
1782 Tegra::Shader::IpaSampleMode::Default}; 1801 Tegra::Shader::IpaSampleMode::Default};
1783 1802
1784 u32 next_element = instr.attribute.fmt20.element; 1803 u64 next_element = instr.attribute.fmt20.element;
1785 u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); 1804 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
1786 1805
1787 const auto LoadNextElement = [&](u32 reg_offset) { 1806 const auto LoadNextElement = [&](u32 reg_offset) {
1788 regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, 1807 regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
@@ -1846,8 +1865,8 @@ private:
1846 ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, 1865 ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
1847 "Unaligned attribute loads are not supported"); 1866 "Unaligned attribute loads are not supported");
1848 1867
1849 u32 next_element = instr.attribute.fmt20.element; 1868 u64 next_element = instr.attribute.fmt20.element;
1850 u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); 1869 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
1851 1870
1852 const auto StoreNextElement = [&](u32 reg_offset) { 1871 const auto StoreNextElement = [&](u32 reg_offset) {
1853 regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), 1872 regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
@@ -1873,6 +1892,13 @@ private:
1873 Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; 1892 Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
1874 std::string coord; 1893 std::string coord;
1875 1894
1895 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
1896 "NODEP is not implemented");
1897 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
1898 "AOFFI is not implemented");
1899 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
1900 "DC is not implemented");
1901
1876 switch (texture_type) { 1902 switch (texture_type) {
1877 case Tegra::Shader::TextureType::Texture1D: { 1903 case Tegra::Shader::TextureType::Texture1D: {
1878 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 1904 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -1937,8 +1963,8 @@ private:
1937 UNREACHABLE(); 1963 UNREACHABLE();
1938 } 1964 }
1939 } 1965 }
1940 size_t dest_elem{}; 1966 std::size_t dest_elem{};
1941 for (size_t elem = 0; elem < 4; ++elem) { 1967 for (std::size_t elem = 0; elem < 4; ++elem) {
1942 if (!instr.tex.IsComponentEnabled(elem)) { 1968 if (!instr.tex.IsComponentEnabled(elem)) {
1943 // Skip disabled components 1969 // Skip disabled components
1944 continue; 1970 continue;
@@ -1955,6 +1981,11 @@ private:
1955 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; 1981 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
1956 bool is_array{instr.texs.IsArrayTexture()}; 1982 bool is_array{instr.texs.IsArrayTexture()};
1957 1983
1984 ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
1985 "NODEP is not implemented");
1986 ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
1987 "DC is not implemented");
1988
1958 switch (texture_type) { 1989 switch (texture_type) {
1959 case Tegra::Shader::TextureType::Texture2D: { 1990 case Tegra::Shader::TextureType::Texture2D: {
1960 if (is_array) { 1991 if (is_array) {
@@ -1990,6 +2021,13 @@ private:
1990 std::string coord; 2021 std::string coord;
1991 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; 2022 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
1992 const bool is_array{instr.tlds.IsArrayTexture()}; 2023 const bool is_array{instr.tlds.IsArrayTexture()};
2024
2025 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2026 "NODEP is not implemented");
2027 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2028 "AOFFI is not implemented");
2029 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
2030 "MZ is not implemented");
1993 2031
1994 switch (texture_type) { 2032 switch (texture_type) {
1995 case Tegra::Shader::TextureType::Texture1D: { 2033 case Tegra::Shader::TextureType::Texture1D: {
@@ -2024,6 +2062,17 @@ private:
2024 ASSERT(instr.tld4.array == 0); 2062 ASSERT(instr.tld4.array == 0);
2025 std::string coord; 2063 std::string coord;
2026 2064
2065 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2066 "NODEP is not implemented");
2067 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2068 "AOFFI is not implemented");
2069 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
2070 "DC is not implemented");
2071 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
2072 "NDV is not implemented");
2073 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
2074 "PTP is not implemented");
2075
2027 switch (instr.tld4.texture_type) { 2076 switch (instr.tld4.texture_type) {
2028 case Tegra::Shader::TextureType::Texture2D: { 2077 case Tegra::Shader::TextureType::Texture2D: {
2029 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2078 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -2047,8 +2096,8 @@ private:
2047 const std::string texture = "textureGather(" + sampler + ", coords, " + 2096 const std::string texture = "textureGather(" + sampler + ", coords, " +
2048 std::to_string(instr.tld4.component) + ')'; 2097 std::to_string(instr.tld4.component) + ')';
2049 2098
2050 size_t dest_elem{}; 2099 std::size_t dest_elem{};
2051 for (size_t elem = 0; elem < 4; ++elem) { 2100 for (std::size_t elem = 0; elem < 4; ++elem) {
2052 if (!instr.tex.IsComponentEnabled(elem)) { 2101 if (!instr.tex.IsComponentEnabled(elem)) {
2053 // Skip disabled components 2102 // Skip disabled components
2054 continue; 2103 continue;
@@ -2061,6 +2110,13 @@ private:
2061 break; 2110 break;
2062 } 2111 }
2063 case OpCode::Id::TLD4S: { 2112 case OpCode::Id::TLD4S: {
2113 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2114 "NODEP is not implemented");
2115 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2116 "AOFFI is not implemented");
2117 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
2118 "DC is not implemented");
2119
2064 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 2120 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
2065 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); 2121 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
2066 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. 2122 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
@@ -2073,6 +2129,9 @@ private:
2073 break; 2129 break;
2074 } 2130 }
2075 case OpCode::Id::TXQ: { 2131 case OpCode::Id::TXQ: {
2132 ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2133 "NODEP is not implemented");
2134
2076 // TODO: the new commits on the texture refactor, change the way samplers work. 2135 // TODO: the new commits on the texture refactor, change the way samplers work.
2077 // Sadly, not all texture instructions specify the type of texture their sampler 2136 // Sadly, not all texture instructions specify the type of texture their sampler
2078 // uses. This must be fixed at a later instance. 2137 // uses. This must be fixed at a later instance.
@@ -2093,6 +2152,11 @@ private:
2093 break; 2152 break;
2094 } 2153 }
2095 case OpCode::Id::TMML: { 2154 case OpCode::Id::TMML: {
2155 ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2156 "NODEP is not implemented");
2157 ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
2158 "NDV is not implemented");
2159
2096 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 2160 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
2097 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2161 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2098 const bool is_array = instr.tmml.array != 0; 2162 const bool is_array = instr.tmml.array != 0;
@@ -2259,31 +2323,55 @@ private:
2259 break; 2323 break;
2260 } 2324 }
2261 case OpCode::Type::PredicateSetPredicate: { 2325 case OpCode::Type::PredicateSetPredicate: {
2262 const std::string op_a = 2326 switch (opcode->GetId()) {
2263 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); 2327 case OpCode::Id::PSETP: {
2264 const std::string op_b = 2328 const std::string op_a =
2265 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); 2329 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
2330 const std::string op_b =
2331 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
2266 2332
2267 // We can't use the constant predicate as destination. 2333 // We can't use the constant predicate as destination.
2268 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); 2334 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
2269 2335
2270 const std::string second_pred = 2336 const std::string second_pred =
2271 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); 2337 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
2272 2338
2273 const std::string combiner = GetPredicateCombiner(instr.psetp.op); 2339 const std::string combiner = GetPredicateCombiner(instr.psetp.op);
2274 2340
2275 const std::string predicate = 2341 const std::string predicate =
2276 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; 2342 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
2277 2343
2278 // Set the primary predicate to the result of Predicate OP SecondPredicate 2344 // Set the primary predicate to the result of Predicate OP SecondPredicate
2279 SetPredicate(instr.psetp.pred3, 2345 SetPredicate(instr.psetp.pred3,
2280 '(' + predicate + ") " + combiner + " (" + second_pred + ')'); 2346 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
2281 2347
2282 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 2348 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
2283 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, 2349 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
2284 // if enabled 2350 // if enabled
2285 SetPredicate(instr.psetp.pred0, 2351 SetPredicate(instr.psetp.pred0,
2286 "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); 2352 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
2353 }
2354 break;
2355 }
2356 case OpCode::Id::CSETP: {
2357 const std::string pred =
2358 GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
2359 const std::string combiner = GetPredicateCombiner(instr.csetp.op);
2360 const std::string controlCode = regs.GetControlCode(instr.csetp.cc);
2361 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
2362 SetPredicate(instr.csetp.pred3,
2363 '(' + controlCode + ") " + combiner + " (" + pred + ')');
2364 }
2365 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
2366 SetPredicate(instr.csetp.pred0,
2367 "!(" + controlCode + ") " + combiner + " (" + pred + ')');
2368 }
2369 break;
2370 }
2371 default: {
2372 LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName());
2373 UNREACHABLE();
2374 }
2287 } 2375 }
2288 break; 2376 break;
2289 } 2377 }
@@ -2673,6 +2761,7 @@ private:
2673private: 2761private:
2674 const std::set<Subroutine>& subroutines; 2762 const std::set<Subroutine>& subroutines;
2675 const ProgramCode& program_code; 2763 const ProgramCode& program_code;
2764 Tegra::Shader::Header header;
2676 const u32 main_offset; 2765 const u32 main_offset;
2677 Maxwell3D::Regs::ShaderStage stage; 2766 Maxwell3D::Regs::ShaderStage stage;
2678 const std::string& suffix; 2767 const std::string& suffix;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index a43e2997b..d53b93ad5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -13,7 +13,7 @@
13 13
14namespace OpenGL::GLShader { 14namespace OpenGL::GLShader {
15 15
16constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; 16constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
17using ProgramCode = std::vector<u64>; 17using ProgramCode = std::vector<u64>;
18 18
19class ConstBufferEntry { 19class ConstBufferEntry {
@@ -51,7 +51,7 @@ public:
51 } 51 }
52 52
53 std::string GetName() const { 53 std::string GetName() const {
54 return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index); 54 return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
55 } 55 }
56 56
57 u32 GetHash() const { 57 u32 GetHash() const {
@@ -74,15 +74,15 @@ class SamplerEntry {
74 using Maxwell = Tegra::Engines::Maxwell3D::Regs; 74 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
75 75
76public: 76public:
77 SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index, 77 SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
78 Tegra::Shader::TextureType type, bool is_array) 78 Tegra::Shader::TextureType type, bool is_array)
79 : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} 79 : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
80 80
81 size_t GetOffset() const { 81 std::size_t GetOffset() const {
82 return offset; 82 return offset;
83 } 83 }
84 84
85 size_t GetIndex() const { 85 std::size_t GetIndex() const {
86 return sampler_index; 86 return sampler_index;
87 } 87 }
88 88
@@ -91,7 +91,7 @@ public:
91 } 91 }
92 92
93 std::string GetName() const { 93 std::string GetName() const {
94 return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' + 94 return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
95 std::to_string(sampler_index); 95 std::to_string(sampler_index);
96 } 96 }
97 97
@@ -133,7 +133,7 @@ public:
133 } 133 }
134 134
135 static std::string GetArrayName(Maxwell::ShaderStage stage) { 135 static std::string GetArrayName(Maxwell::ShaderStage stage) {
136 return TextureSamplerNames[static_cast<size_t>(stage)]; 136 return TextureSamplerNames[static_cast<std::size_t>(stage)];
137 } 137 }
138 138
139private: 139private:
@@ -143,9 +143,9 @@ private:
143 143
144 /// Offset in TSC memory from which to read the sampler object, as specified by the sampling 144 /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
145 /// instruction. 145 /// instruction.
146 size_t offset; 146 std::size_t offset;
147 Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. 147 Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
148 size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. 148 std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
149 Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) 149 Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
150 bool is_array; ///< Whether the texture is being sampled as an array texture or not. 150 bool is_array; ///< Whether the texture is being sampled as an array texture or not.
151}; 151};
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 533e42caa..b86cd96e8 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,7 +12,7 @@
12namespace OpenGL::GLShader { 12namespace OpenGL::GLShader {
13 13
14/// Number of OpenGL texture samplers that can be used in the fragment shader 14/// Number of OpenGL texture samplers that can be used in the fragment shader
15static constexpr size_t NumTextureSamplers = 32; 15static constexpr std::size_t NumTextureSamplers = 32;
16 16
17using Tegra::Engines::Maxwell3D; 17using Tegra::Engines::Maxwell3D;
18 18
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 6f70deb96..af99132ba 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -272,7 +272,7 @@ void OpenGLState::Apply() const {
272 } 272 }
273 273
274 // Clip distance 274 // Clip distance
275 for (size_t i = 0; i < clip_distance.size(); ++i) { 275 for (std::size_t i = 0; i < clip_distance.size(); ++i) {
276 if (clip_distance[i] != cur_state.clip_distance[i]) { 276 if (clip_distance[i] != cur_state.clip_distance[i]) {
277 if (clip_distance[i]) { 277 if (clip_distance[i]) {
278 glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); 278 glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index aadf68f16..664f3ca20 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
61 mapped_size = size; 61 mapped_size = size;
62 62
63 if (alignment > 0) { 63 if (alignment > 0) {
64 buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); 64 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
65 } 65 }
66 66
67 bool invalidate = false; 67 bool invalidate = false;
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 272294c62..20ba6d4f6 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
46 } 46 }
47} 47}
48 48
49template <std::size_t N, std::size_t M>
50struct alignas(64) SwizzleTable {
51 constexpr SwizzleTable() {
52 for (u32 y = 0; y < N; ++y) {
53 for (u32 x = 0; x < M; ++x) {
54 const u32 x2 = x * 16;
55 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
56 ((x2 % 32) / 16) * 32 + (y % 2) * 16);
57 }
58 }
59 }
60 const std::array<u16, M>& operator[](std::size_t index) const {
61 return values[index];
62 }
63 std::array<std::array<u16, M>, N> values{};
64};
65
66constexpr auto swizzle_table = SwizzleTable<8, 4>();
67
68void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data,
69 u8* unswizzled_data, bool unswizzle, u32 block_height) {
70 std::array<u8*, 2> data_ptrs;
71 const std::size_t stride{width * bytes_per_pixel};
72 const std::size_t image_width_in_gobs{(stride + 63) / 64};
73 const std::size_t copy_size{16};
74 for (std::size_t y = 0; y < height; ++y) {
75 const std::size_t initial_gob =
76 (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
77 (y % (8 * block_height) / 8) * 512;
78 const std::size_t pixel_base{y * width * bytes_per_pixel};
79 const auto& table = swizzle_table[y % 8];
80 for (std::size_t xb = 0; xb < stride; xb += copy_size) {
81 const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height};
82 const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
83 const std::size_t pixel_index{xb + pixel_base};
84 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
85 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
86 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
87 }
88 }
89}
90
49u32 BytesPerPixel(TextureFormat format) { 91u32 BytesPerPixel(TextureFormat format) {
50 switch (format) { 92 switch (format) {
51 case TextureFormat::DXT1: 93 case TextureFormat::DXT1:
@@ -63,6 +105,7 @@ u32 BytesPerPixel(TextureFormat format) {
63 case TextureFormat::R32_G32_B32: 105 case TextureFormat::R32_G32_B32:
64 return 12; 106 return 12;
65 case TextureFormat::ASTC_2D_4X4: 107 case TextureFormat::ASTC_2D_4X4:
108 case TextureFormat::ASTC_2D_8X8:
66 case TextureFormat::A8R8G8B8: 109 case TextureFormat::A8R8G8B8:
67 case TextureFormat::A2B10G10R10: 110 case TextureFormat::A2B10G10R10:
68 case TextureFormat::BF10GF11RF11: 111 case TextureFormat::BF10GF11RF11:
@@ -91,8 +134,13 @@ u32 BytesPerPixel(TextureFormat format) {
91std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, 134std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
92 u32 height, u32 block_height) { 135 u32 height, u32 block_height) {
93 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); 136 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
94 CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, 137 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
95 Memory::GetPointer(address), unswizzled_data.data(), true, block_height); 138 FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel,
139 Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
140 } else {
141 CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
142 Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
143 }
96 return unswizzled_data; 144 return unswizzled_data;
97} 145}
98 146
@@ -111,6 +159,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
111 case TextureFormat::BC6H_UF16: 159 case TextureFormat::BC6H_UF16:
112 case TextureFormat::BC6H_SF16: 160 case TextureFormat::BC6H_SF16:
113 case TextureFormat::ASTC_2D_4X4: 161 case TextureFormat::ASTC_2D_4X4:
162 case TextureFormat::ASTC_2D_8X8:
114 case TextureFormat::A8R8G8B8: 163 case TextureFormat::A8R8G8B8:
115 case TextureFormat::A2B10G10R10: 164 case TextureFormat::A2B10G10R10:
116 case TextureFormat::A1B5G5R5: 165 case TextureFormat::A1B5G5R5: