diff options
Diffstat (limited to 'src/video_core')
24 files changed, 651 insertions, 195 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4a79ce39c..f5ae57039 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -14,6 +14,7 @@ add_library(video_core STATIC | |||
| 14 | engines/maxwell_dma.cpp | 14 | engines/maxwell_dma.cpp |
| 15 | engines/maxwell_dma.h | 15 | engines/maxwell_dma.h |
| 16 | engines/shader_bytecode.h | 16 | engines/shader_bytecode.h |
| 17 | engines/shader_header.h | ||
| 17 | gpu.cpp | 18 | gpu.cpp |
| 18 | gpu.h | 19 | gpu.h |
| 19 | macro_interpreter.cpp | 20 | macro_interpreter.cpp |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index dcf9ef8b9..021b83eaa 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -26,7 +26,7 @@ public: | |||
| 26 | void WriteReg(u32 method, u32 value); | 26 | void WriteReg(u32 method, u32 value); |
| 27 | 27 | ||
| 28 | struct Regs { | 28 | struct Regs { |
| 29 | static constexpr size_t NUM_REGS = 0x258; | 29 | static constexpr std::size_t NUM_REGS = 0x258; |
| 30 | 30 | ||
| 31 | struct Surface { | 31 | struct Surface { |
| 32 | RenderTargetFormat format; | 32 | RenderTargetFormat format; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 329079ddd..8afd26fe9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() { | |||
| 248 | 248 | ||
| 249 | void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | 249 | void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { |
| 250 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. | 250 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. |
| 251 | auto& shader = state.shader_stages[static_cast<size_t>(stage)]; | 251 | auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; |
| 252 | auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)]; | 252 | auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)]; |
| 253 | 253 | ||
| 254 | auto& buffer = shader.const_buffers[bind_data.index]; | 254 | auto& buffer = shader.const_buffers[bind_data.index]; |
| 255 | 255 | ||
| @@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | |||
| 316 | std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { | 316 | std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { |
| 317 | std::vector<Texture::FullTextureInfo> textures; | 317 | std::vector<Texture::FullTextureInfo> textures; |
| 318 | 318 | ||
| 319 | auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)]; | 319 | auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)]; |
| 320 | auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; | 320 | auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; |
| 321 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | 321 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); |
| 322 | 322 | ||
| 323 | GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; | 323 | GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; |
| 324 | 324 | ||
| 325 | // Offset into the texture constbuffer where the texture info begins. | 325 | // Offset into the texture constbuffer where the texture info begins. |
| 326 | static constexpr size_t TextureInfoOffset = 0x20; | 326 | static constexpr std::size_t TextureInfoOffset = 0x20; |
| 327 | 327 | ||
| 328 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; | 328 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; |
| 329 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { | 329 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { |
| @@ -360,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | |||
| 360 | return textures; | 360 | return textures; |
| 361 | } | 361 | } |
| 362 | 362 | ||
| 363 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const { | 363 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, |
| 364 | auto& shader = state.shader_stages[static_cast<size_t>(stage)]; | 364 | std::size_t offset) const { |
| 365 | auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 365 | auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; | 366 | auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; |
| 366 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | 367 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); |
| 367 | 368 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d3be900a4..b81b0723d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -34,17 +34,17 @@ public: | |||
| 34 | /// Register structure of the Maxwell3D engine. | 34 | /// Register structure of the Maxwell3D engine. |
| 35 | /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. | 35 | /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. |
| 36 | struct Regs { | 36 | struct Regs { |
| 37 | static constexpr size_t NUM_REGS = 0xE00; | 37 | static constexpr std::size_t NUM_REGS = 0xE00; |
| 38 | 38 | ||
| 39 | static constexpr size_t NumRenderTargets = 8; | 39 | static constexpr std::size_t NumRenderTargets = 8; |
| 40 | static constexpr size_t NumViewports = 16; | 40 | static constexpr std::size_t NumViewports = 16; |
| 41 | static constexpr size_t NumCBData = 16; | 41 | static constexpr std::size_t NumCBData = 16; |
| 42 | static constexpr size_t NumVertexArrays = 32; | 42 | static constexpr std::size_t NumVertexArrays = 32; |
| 43 | static constexpr size_t NumVertexAttributes = 32; | 43 | static constexpr std::size_t NumVertexAttributes = 32; |
| 44 | static constexpr size_t MaxShaderProgram = 6; | 44 | static constexpr std::size_t MaxShaderProgram = 6; |
| 45 | static constexpr size_t MaxShaderStage = 5; | 45 | static constexpr std::size_t MaxShaderStage = 5; |
| 46 | // Maximum number of const buffers per shader stage. | 46 | // Maximum number of const buffers per shader stage. |
| 47 | static constexpr size_t MaxConstBuffers = 18; | 47 | static constexpr std::size_t MaxConstBuffers = 18; |
| 48 | 48 | ||
| 49 | enum class QueryMode : u32 { | 49 | enum class QueryMode : u32 { |
| 50 | Write = 0, | 50 | Write = 0, |
| @@ -443,9 +443,9 @@ public: | |||
| 443 | } | 443 | } |
| 444 | }; | 444 | }; |
| 445 | 445 | ||
| 446 | bool IsShaderConfigEnabled(size_t index) const { | 446 | bool IsShaderConfigEnabled(std::size_t index) const { |
| 447 | // The VertexB is always enabled. | 447 | // The VertexB is always enabled. |
| 448 | if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) { | 448 | if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { |
| 449 | return true; | 449 | return true; |
| 450 | } | 450 | } |
| 451 | return shader_config[index].enable != 0; | 451 | return shader_config[index].enable != 0; |
| @@ -571,7 +571,7 @@ public: | |||
| 571 | BitField<25, 3, u32> map_7; | 571 | BitField<25, 3, u32> map_7; |
| 572 | }; | 572 | }; |
| 573 | 573 | ||
| 574 | u32 GetMap(size_t index) const { | 574 | u32 GetMap(std::size_t index) const { |
| 575 | const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, | 575 | const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, |
| 576 | map_4, map_5, map_6, map_7}; | 576 | map_4, map_5, map_6, map_7}; |
| 577 | ASSERT(index < maps.size()); | 577 | ASSERT(index < maps.size()); |
| @@ -925,7 +925,7 @@ public: | |||
| 925 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; | 925 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; |
| 926 | 926 | ||
| 927 | /// Returns the texture information for a specific texture in a specific shader stage. | 927 | /// Returns the texture information for a specific texture in a specific shader stage. |
| 928 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; | 928 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; |
| 929 | 929 | ||
| 930 | private: | 930 | private: |
| 931 | VideoCore::RasterizerInterface& rasterizer; | 931 | VideoCore::RasterizerInterface& rasterizer; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c24d33d5c..aa7481b8c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 50 | ASSERT(regs.dst_params.pos_y == 0); | 50 | ASSERT(regs.dst_params.pos_y == 0); |
| 51 | 51 | ||
| 52 | if (regs.exec.is_dst_linear == regs.exec.is_src_linear) { | 52 | if (regs.exec.is_dst_linear == regs.exec.is_src_linear) { |
| 53 | size_t copy_size = regs.x_count; | 53 | std::size_t copy_size = regs.x_count; |
| 54 | 54 | ||
| 55 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 55 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D |
| 56 | // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count). | 56 | // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count). |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 7882f16e0..311ccb616 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -23,7 +23,7 @@ public: | |||
| 23 | void WriteReg(u32 method, u32 value); | 23 | void WriteReg(u32 method, u32 value); |
| 24 | 24 | ||
| 25 | struct Regs { | 25 | struct Regs { |
| 26 | static constexpr size_t NUM_REGS = 0x1D6; | 26 | static constexpr std::size_t NUM_REGS = 0x1D6; |
| 27 | 27 | ||
| 28 | struct Parameters { | 28 | struct Parameters { |
| 29 | union { | 29 | union { |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d6e2397f2..7e1de0fa1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -20,10 +20,10 @@ namespace Tegra::Shader { | |||
| 20 | 20 | ||
| 21 | struct Register { | 21 | struct Register { |
| 22 | /// Number of registers | 22 | /// Number of registers |
| 23 | static constexpr size_t NumRegisters = 256; | 23 | static constexpr std::size_t NumRegisters = 256; |
| 24 | 24 | ||
| 25 | /// Register 255 is special cased to always be 0 | 25 | /// Register 255 is special cased to always be 0 |
| 26 | static constexpr size_t ZeroIndex = 255; | 26 | static constexpr std::size_t ZeroIndex = 255; |
| 27 | 27 | ||
| 28 | enum class Size : u64 { | 28 | enum class Size : u64 { |
| 29 | Byte = 0, | 29 | Byte = 0, |
| @@ -240,6 +240,41 @@ enum class FlowCondition : u64 { | |||
| 240 | Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? | 240 | Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? |
| 241 | }; | 241 | }; |
| 242 | 242 | ||
| 243 | enum class ControlCode : u64 { | ||
| 244 | F = 0, | ||
| 245 | LT = 1, | ||
| 246 | EQ = 2, | ||
| 247 | LE = 3, | ||
| 248 | GT = 4, | ||
| 249 | NE = 5, | ||
| 250 | GE = 6, | ||
| 251 | Num = 7, | ||
| 252 | Nan = 8, | ||
| 253 | LTU = 9, | ||
| 254 | EQU = 10, | ||
| 255 | LEU = 11, | ||
| 256 | GTU = 12, | ||
| 257 | NEU = 13, | ||
| 258 | GEU = 14, | ||
| 259 | // | ||
| 260 | OFF = 16, | ||
| 261 | LO = 17, | ||
| 262 | SFF = 18, | ||
| 263 | LS = 19, | ||
| 264 | HI = 20, | ||
| 265 | SFT = 21, | ||
| 266 | HS = 22, | ||
| 267 | OFT = 23, | ||
| 268 | CSM_TA = 24, | ||
| 269 | CSM_TR = 25, | ||
| 270 | CSM_MX = 26, | ||
| 271 | FCSM_TA = 27, | ||
| 272 | FCSM_TR = 28, | ||
| 273 | FCSM_MX = 29, | ||
| 274 | RLE = 30, | ||
| 275 | RGT = 31, | ||
| 276 | }; | ||
| 277 | |||
| 243 | enum class PredicateResultMode : u64 { | 278 | enum class PredicateResultMode : u64 { |
| 244 | None = 0x0, | 279 | None = 0x0, |
| 245 | NotZero = 0x3, | 280 | NotZero = 0x3, |
| @@ -271,6 +306,15 @@ enum class TextureProcessMode : u64 { | |||
| 271 | LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL | 306 | LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL |
| 272 | }; | 307 | }; |
| 273 | 308 | ||
| 309 | enum class TextureMiscMode : u64 { | ||
| 310 | DC, | ||
| 311 | AOFFI, // Uses Offset | ||
| 312 | NDV, | ||
| 313 | NODEP, | ||
| 314 | MZ, | ||
| 315 | PTP, | ||
| 316 | }; | ||
| 317 | |||
| 274 | enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; | 318 | enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; |
| 275 | enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; | 319 | enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; |
| 276 | 320 | ||
| @@ -546,6 +590,15 @@ union Instruction { | |||
| 546 | } pset; | 590 | } pset; |
| 547 | 591 | ||
| 548 | union { | 592 | union { |
| 593 | BitField<0, 3, u64> pred0; | ||
| 594 | BitField<3, 3, u64> pred3; | ||
| 595 | BitField<8, 5, ControlCode> cc; // flag in cc | ||
| 596 | BitField<39, 3, u64> pred39; | ||
| 597 | BitField<42, 1, u64> neg_pred39; | ||
| 598 | BitField<45, 4, PredOperation> op; // op with pred39 | ||
| 599 | } csetp; | ||
| 600 | |||
| 601 | union { | ||
| 549 | BitField<39, 3, u64> pred39; | 602 | BitField<39, 3, u64> pred39; |
| 550 | BitField<42, 1, u64> neg_pred; | 603 | BitField<42, 1, u64> neg_pred; |
| 551 | BitField<43, 1, u64> neg_a; | 604 | BitField<43, 1, u64> neg_a; |
| @@ -590,42 +643,127 @@ union Instruction { | |||
| 590 | BitField<28, 1, u64> array; | 643 | BitField<28, 1, u64> array; |
| 591 | BitField<29, 2, TextureType> texture_type; | 644 | BitField<29, 2, TextureType> texture_type; |
| 592 | BitField<31, 4, u64> component_mask; | 645 | BitField<31, 4, u64> component_mask; |
| 646 | BitField<49, 1, u64> nodep_flag; | ||
| 647 | BitField<50, 1, u64> dc_flag; | ||
| 648 | BitField<54, 1, u64> aoffi_flag; | ||
| 593 | BitField<55, 3, TextureProcessMode> process_mode; | 649 | BitField<55, 3, TextureProcessMode> process_mode; |
| 594 | 650 | ||
| 595 | bool IsComponentEnabled(size_t component) const { | 651 | bool IsComponentEnabled(std::size_t component) const { |
| 596 | return ((1ull << component) & component_mask) != 0; | 652 | return ((1ull << component) & component_mask) != 0; |
| 597 | } | 653 | } |
| 654 | |||
| 655 | TextureProcessMode GetTextureProcessMode() const { | ||
| 656 | return process_mode; | ||
| 657 | } | ||
| 658 | |||
| 659 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 660 | switch (mode) { | ||
| 661 | case TextureMiscMode::DC: | ||
| 662 | return dc_flag != 0; | ||
| 663 | case TextureMiscMode::NODEP: | ||
| 664 | return nodep_flag != 0; | ||
| 665 | case TextureMiscMode::AOFFI: | ||
| 666 | return aoffi_flag != 0; | ||
| 667 | default: | ||
| 668 | break; | ||
| 669 | } | ||
| 670 | return false; | ||
| 671 | } | ||
| 598 | } tex; | 672 | } tex; |
| 599 | 673 | ||
| 600 | union { | 674 | union { |
| 601 | BitField<22, 6, TextureQueryType> query_type; | 675 | BitField<22, 6, TextureQueryType> query_type; |
| 602 | BitField<31, 4, u64> component_mask; | 676 | BitField<31, 4, u64> component_mask; |
| 677 | BitField<49, 1, u64> nodep_flag; | ||
| 678 | |||
| 679 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 680 | switch (mode) { | ||
| 681 | case TextureMiscMode::NODEP: | ||
| 682 | return nodep_flag != 0; | ||
| 683 | default: | ||
| 684 | break; | ||
| 685 | } | ||
| 686 | return false; | ||
| 687 | } | ||
| 603 | } txq; | 688 | } txq; |
| 604 | 689 | ||
| 605 | union { | 690 | union { |
| 606 | BitField<28, 1, u64> array; | 691 | BitField<28, 1, u64> array; |
| 607 | BitField<29, 2, TextureType> texture_type; | 692 | BitField<29, 2, TextureType> texture_type; |
| 608 | BitField<31, 4, u64> component_mask; | 693 | BitField<31, 4, u64> component_mask; |
| 694 | BitField<35, 1, u64> ndv_flag; | ||
| 695 | BitField<49, 1, u64> nodep_flag; | ||
| 609 | 696 | ||
| 610 | bool IsComponentEnabled(size_t component) const { | 697 | bool IsComponentEnabled(std::size_t component) const { |
| 611 | return ((1ull << component) & component_mask) != 0; | 698 | return ((1ull << component) & component_mask) != 0; |
| 612 | } | 699 | } |
| 700 | |||
| 701 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 702 | switch (mode) { | ||
| 703 | case TextureMiscMode::NDV: | ||
| 704 | return (ndv_flag != 0); | ||
| 705 | case TextureMiscMode::NODEP: | ||
| 706 | return (nodep_flag != 0); | ||
| 707 | default: | ||
| 708 | break; | ||
| 709 | } | ||
| 710 | return false; | ||
| 711 | } | ||
| 613 | } tmml; | 712 | } tmml; |
| 614 | 713 | ||
| 615 | union { | 714 | union { |
| 616 | BitField<28, 1, u64> array; | 715 | BitField<28, 1, u64> array; |
| 617 | BitField<29, 2, TextureType> texture_type; | 716 | BitField<29, 2, TextureType> texture_type; |
| 717 | BitField<35, 1, u64> ndv_flag; | ||
| 718 | BitField<49, 1, u64> nodep_flag; | ||
| 719 | BitField<50, 1, u64> dc_flag; | ||
| 720 | BitField<54, 2, u64> info; | ||
| 618 | BitField<56, 2, u64> component; | 721 | BitField<56, 2, u64> component; |
| 722 | |||
| 723 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 724 | switch (mode) { | ||
| 725 | case TextureMiscMode::NDV: | ||
| 726 | return ndv_flag != 0; | ||
| 727 | case TextureMiscMode::NODEP: | ||
| 728 | return nodep_flag != 0; | ||
| 729 | case TextureMiscMode::DC: | ||
| 730 | return dc_flag != 0; | ||
| 731 | case TextureMiscMode::AOFFI: | ||
| 732 | return info == 1; | ||
| 733 | case TextureMiscMode::PTP: | ||
| 734 | return info == 2; | ||
| 735 | default: | ||
| 736 | break; | ||
| 737 | } | ||
| 738 | return false; | ||
| 739 | } | ||
| 619 | } tld4; | 740 | } tld4; |
| 620 | 741 | ||
| 621 | union { | 742 | union { |
| 743 | BitField<49, 1, u64> nodep_flag; | ||
| 744 | BitField<50, 1, u64> dc_flag; | ||
| 745 | BitField<51, 1, u64> aoffi_flag; | ||
| 622 | BitField<52, 2, u64> component; | 746 | BitField<52, 2, u64> component; |
| 747 | |||
| 748 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 749 | switch (mode) { | ||
| 750 | case TextureMiscMode::DC: | ||
| 751 | return dc_flag != 0; | ||
| 752 | case TextureMiscMode::NODEP: | ||
| 753 | return nodep_flag != 0; | ||
| 754 | case TextureMiscMode::AOFFI: | ||
| 755 | return aoffi_flag != 0; | ||
| 756 | default: | ||
| 757 | break; | ||
| 758 | } | ||
| 759 | return false; | ||
| 760 | } | ||
| 623 | } tld4s; | 761 | } tld4s; |
| 624 | 762 | ||
| 625 | union { | 763 | union { |
| 626 | BitField<0, 8, Register> gpr0; | 764 | BitField<0, 8, Register> gpr0; |
| 627 | BitField<28, 8, Register> gpr28; | 765 | BitField<28, 8, Register> gpr28; |
| 628 | BitField<49, 1, u64> nodep; | 766 | BitField<49, 1, u64> nodep_flag; |
| 629 | BitField<50, 3, u64> component_mask_selector; | 767 | BitField<50, 3, u64> component_mask_selector; |
| 630 | BitField<53, 4, u64> texture_info; | 768 | BitField<53, 4, u64> texture_info; |
| 631 | 769 | ||
| @@ -645,6 +783,37 @@ union Instruction { | |||
| 645 | UNREACHABLE(); | 783 | UNREACHABLE(); |
| 646 | } | 784 | } |
| 647 | 785 | ||
| 786 | TextureProcessMode GetTextureProcessMode() const { | ||
| 787 | switch (texture_info) { | ||
| 788 | case 0: | ||
| 789 | case 2: | ||
| 790 | case 6: | ||
| 791 | case 8: | ||
| 792 | case 9: | ||
| 793 | case 11: | ||
| 794 | return TextureProcessMode::LZ; | ||
| 795 | case 3: | ||
| 796 | case 5: | ||
| 797 | case 13: | ||
| 798 | return TextureProcessMode::LL; | ||
| 799 | default: | ||
| 800 | break; | ||
| 801 | } | ||
| 802 | return TextureProcessMode::None; | ||
| 803 | } | ||
| 804 | |||
| 805 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 806 | switch (mode) { | ||
| 807 | case TextureMiscMode::DC: | ||
| 808 | return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; | ||
| 809 | case TextureMiscMode::NODEP: | ||
| 810 | return nodep_flag != 0; | ||
| 811 | default: | ||
| 812 | break; | ||
| 813 | } | ||
| 814 | return false; | ||
| 815 | } | ||
| 816 | |||
| 648 | bool IsArrayTexture() const { | 817 | bool IsArrayTexture() const { |
| 649 | // TEXS only supports Texture2D arrays. | 818 | // TEXS only supports Texture2D arrays. |
| 650 | return texture_info >= 7 && texture_info <= 9; | 819 | return texture_info >= 7 && texture_info <= 9; |
| @@ -654,7 +823,7 @@ union Instruction { | |||
| 654 | return gpr28.Value() != Register::ZeroIndex; | 823 | return gpr28.Value() != Register::ZeroIndex; |
| 655 | } | 824 | } |
| 656 | 825 | ||
| 657 | bool IsComponentEnabled(size_t component) const { | 826 | bool IsComponentEnabled(std::size_t component) const { |
| 658 | static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ | 827 | static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ |
| 659 | {}, | 828 | {}, |
| 660 | {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, | 829 | {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, |
| @@ -662,7 +831,7 @@ union Instruction { | |||
| 662 | {0x7, 0xb, 0xd, 0xe, 0xf}, | 831 | {0x7, 0xb, 0xd, 0xe, 0xf}, |
| 663 | }}; | 832 | }}; |
| 664 | 833 | ||
| 665 | size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; | 834 | std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; |
| 666 | index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; | 835 | index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; |
| 667 | 836 | ||
| 668 | u32 mask = mask_lut[index][component_mask_selector]; | 837 | u32 mask = mask_lut[index][component_mask_selector]; |
| @@ -673,6 +842,7 @@ union Instruction { | |||
| 673 | } texs; | 842 | } texs; |
| 674 | 843 | ||
| 675 | union { | 844 | union { |
| 845 | BitField<49, 1, u64> nodep_flag; | ||
| 676 | BitField<53, 4, u64> texture_info; | 846 | BitField<53, 4, u64> texture_info; |
| 677 | 847 | ||
| 678 | TextureType GetTextureType() const { | 848 | TextureType GetTextureType() const { |
| @@ -693,6 +863,26 @@ union Instruction { | |||
| 693 | UNREACHABLE(); | 863 | UNREACHABLE(); |
| 694 | } | 864 | } |
| 695 | 865 | ||
| 866 | TextureProcessMode GetTextureProcessMode() const { | ||
| 867 | if (texture_info == 1 || texture_info == 5 || texture_info == 12) | ||
| 868 | return TextureProcessMode::LL; | ||
| 869 | return TextureProcessMode::LZ; | ||
| 870 | } | ||
| 871 | |||
| 872 | bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 873 | switch (mode) { | ||
| 874 | case TextureMiscMode::AOFFI: | ||
| 875 | return texture_info == 12 || texture_info == 4; | ||
| 876 | case TextureMiscMode::MZ: | ||
| 877 | return texture_info == 5; | ||
| 878 | case TextureMiscMode::NODEP: | ||
| 879 | return nodep_flag != 0; | ||
| 880 | default: | ||
| 881 | break; | ||
| 882 | } | ||
| 883 | return false; | ||
| 884 | } | ||
| 885 | |||
| 696 | bool IsArrayTexture() const { | 886 | bool IsArrayTexture() const { |
| 697 | // TEXS only supports Texture2D arrays. | 887 | // TEXS only supports Texture2D arrays. |
| 698 | return texture_info == 8; | 888 | return texture_info == 8; |
| @@ -735,6 +925,7 @@ union Instruction { | |||
| 735 | BitField<36, 5, u64> index; | 925 | BitField<36, 5, u64> index; |
| 736 | } cbuf36; | 926 | } cbuf36; |
| 737 | 927 | ||
| 928 | BitField<47, 1, u64> generates_cc; | ||
| 738 | BitField<61, 1, u64> is_b_imm; | 929 | BitField<61, 1, u64> is_b_imm; |
| 739 | BitField<60, 1, u64> is_b_gpr; | 930 | BitField<60, 1, u64> is_b_gpr; |
| 740 | BitField<59, 1, u64> is_c_gpr; | 931 | BitField<59, 1, u64> is_c_gpr; |
| @@ -859,6 +1050,7 @@ public: | |||
| 859 | ISET_IMM, | 1050 | ISET_IMM, |
| 860 | PSETP, | 1051 | PSETP, |
| 861 | PSET, | 1052 | PSET, |
| 1053 | CSETP, | ||
| 862 | XMAD_IMM, | 1054 | XMAD_IMM, |
| 863 | XMAD_CR, | 1055 | XMAD_CR, |
| 864 | XMAD_RC, | 1056 | XMAD_RC, |
| @@ -947,7 +1139,7 @@ public: | |||
| 947 | private: | 1139 | private: |
| 948 | struct Detail { | 1140 | struct Detail { |
| 949 | private: | 1141 | private: |
| 950 | static constexpr size_t opcode_bitsize = 16; | 1142 | static constexpr std::size_t opcode_bitsize = 16; |
| 951 | 1143 | ||
| 952 | /** | 1144 | /** |
| 953 | * Generates the mask and the expected value after masking from a given bitstring. | 1145 | * Generates the mask and the expected value after masking from a given bitstring. |
| @@ -956,8 +1148,8 @@ private: | |||
| 956 | */ | 1148 | */ |
| 957 | static auto GetMaskAndExpect(const char* const bitstring) { | 1149 | static auto GetMaskAndExpect(const char* const bitstring) { |
| 958 | u16 mask = 0, expect = 0; | 1150 | u16 mask = 0, expect = 0; |
| 959 | for (size_t i = 0; i < opcode_bitsize; i++) { | 1151 | for (std::size_t i = 0; i < opcode_bitsize; i++) { |
| 960 | const size_t bit_position = opcode_bitsize - i - 1; | 1152 | const std::size_t bit_position = opcode_bitsize - i - 1; |
| 961 | switch (bitstring[i]) { | 1153 | switch (bitstring[i]) { |
| 962 | case '0': | 1154 | case '0': |
| 963 | mask |= 1 << bit_position; | 1155 | mask |= 1 << bit_position; |
| @@ -1095,6 +1287,7 @@ private: | |||
| 1095 | INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), | 1287 | INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), |
| 1096 | INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), | 1288 | INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), |
| 1097 | INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), | 1289 | INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), |
| 1290 | INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), | ||
| 1098 | INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), | 1291 | INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), |
| 1099 | INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), | 1292 | INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), |
| 1100 | INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), | 1293 | INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), |
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h new file mode 100644 index 000000000..a885ee3cf --- /dev/null +++ b/src/video_core/engines/shader_header.h | |||
| @@ -0,0 +1,103 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_funcs.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Tegra::Shader { | ||
| 12 | |||
| 13 | enum class OutputTopology : u32 { | ||
| 14 | PointList = 1, | ||
| 15 | LineStrip = 6, | ||
| 16 | TriangleStrip = 7, | ||
| 17 | }; | ||
| 18 | |||
| 19 | // Documentation in: | ||
| 20 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture | ||
| 21 | struct Header { | ||
| 22 | union { | ||
| 23 | BitField<0, 5, u32> sph_type; | ||
| 24 | BitField<5, 5, u32> version; | ||
| 25 | BitField<10, 4, u32> shader_type; | ||
| 26 | BitField<14, 1, u32> mrt_enable; | ||
| 27 | BitField<15, 1, u32> kills_pixels; | ||
| 28 | BitField<16, 1, u32> does_global_store; | ||
| 29 | BitField<17, 4, u32> sass_version; | ||
| 30 | BitField<21, 5, u32> reserved; | ||
| 31 | BitField<26, 1, u32> does_load_or_store; | ||
| 32 | BitField<27, 1, u32> does_fp64; | ||
| 33 | BitField<28, 4, u32> stream_out_mask; | ||
| 34 | } common0; | ||
| 35 | |||
| 36 | union { | ||
| 37 | BitField<0, 24, u32> shader_local_memory_low_size; | ||
| 38 | BitField<24, 8, u32> per_patch_attribute_count; | ||
| 39 | } common1; | ||
| 40 | |||
| 41 | union { | ||
| 42 | BitField<0, 24, u32> shader_local_memory_high_size; | ||
| 43 | BitField<24, 8, u32> threads_per_input_primitive; | ||
| 44 | } common2; | ||
| 45 | |||
| 46 | union { | ||
| 47 | BitField<0, 24, u32> shader_local_memory_crs_size; | ||
| 48 | BitField<24, 4, OutputTopology> output_topology; | ||
| 49 | BitField<28, 4, u32> reserved; | ||
| 50 | } common3; | ||
| 51 | |||
| 52 | union { | ||
| 53 | BitField<0, 12, u32> max_output_vertices; | ||
| 54 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | ||
| 55 | BitField<24, 4, u32> reserved; | ||
| 56 | BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | ||
| 57 | } common4; | ||
| 58 | |||
| 59 | union { | ||
| 60 | struct { | ||
| 61 | INSERT_PADDING_BYTES(3); // ImapSystemValuesA | ||
| 62 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB | ||
| 63 | INSERT_PADDING_BYTES(16); // ImapGenericVector[32] | ||
| 64 | INSERT_PADDING_BYTES(2); // ImapColor | ||
| 65 | INSERT_PADDING_BYTES(2); // ImapSystemValuesC | ||
| 66 | INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] | ||
| 67 | INSERT_PADDING_BYTES(1); // ImapReserved | ||
| 68 | INSERT_PADDING_BYTES(3); // OmapSystemValuesA | ||
| 69 | INSERT_PADDING_BYTES(1); // OmapSystemValuesB | ||
| 70 | INSERT_PADDING_BYTES(16); // OmapGenericVector[32] | ||
| 71 | INSERT_PADDING_BYTES(2); // OmapColor | ||
| 72 | INSERT_PADDING_BYTES(2); // OmapSystemValuesC | ||
| 73 | INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10] | ||
| 74 | INSERT_PADDING_BYTES(1); // OmapReserved | ||
| 75 | } vtg; | ||
| 76 | |||
| 77 | struct { | ||
| 78 | INSERT_PADDING_BYTES(3); // ImapSystemValuesA | ||
| 79 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB | ||
| 80 | INSERT_PADDING_BYTES(32); // ImapGenericVector[32] | ||
| 81 | INSERT_PADDING_BYTES(2); // ImapColor | ||
| 82 | INSERT_PADDING_BYTES(2); // ImapSystemValuesC | ||
| 83 | INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] | ||
| 84 | INSERT_PADDING_BYTES(2); // ImapReserved | ||
| 85 | struct { | ||
| 86 | u32 target; | ||
| 87 | union { | ||
| 88 | BitField<0, 1, u32> sample_mask; | ||
| 89 | BitField<1, 1, u32> depth; | ||
| 90 | BitField<2, 30, u32> reserved; | ||
| 91 | }; | ||
| 92 | } omap; | ||
| 93 | bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | ||
| 94 | const u32 bit = render_target * 4 + component; | ||
| 95 | return omap.target & (1 << bit); | ||
| 96 | } | ||
| 97 | } ps; | ||
| 98 | }; | ||
| 99 | }; | ||
| 100 | |||
| 101 | static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); | ||
| 102 | |||
| 103 | } // namespace Tegra::Shader | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 7329ca766..5cc1e19ca 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 { | |||
| 42 | R32_UINT = 0xE4, | 42 | R32_UINT = 0xE4, |
| 43 | R32_FLOAT = 0xE5, | 43 | R32_FLOAT = 0xE5, |
| 44 | B5G6R5_UNORM = 0xE8, | 44 | B5G6R5_UNORM = 0xE8, |
| 45 | BGR5A1_UNORM = 0xE9, | ||
| 45 | RG8_UNORM = 0xEA, | 46 | RG8_UNORM = 0xEA, |
| 46 | RG8_SNORM = 0xEB, | 47 | RG8_SNORM = 0xEB, |
| 47 | R16_UNORM = 0xEE, | 48 | R16_UNORM = 0xEE, |
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 7d836b816..cee0baaf3 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h | |||
| @@ -152,7 +152,7 @@ private: | |||
| 152 | boost::optional<u32> | 152 | boost::optional<u32> |
| 153 | delayed_pc; ///< Program counter to execute at after the delay slot is executed. | 153 | delayed_pc; ///< Program counter to execute at after the delay slot is executed. |
| 154 | 154 | ||
| 155 | static constexpr size_t NumMacroRegisters = 8; | 155 | static constexpr std::size_t NumMacroRegisters = 8; |
| 156 | 156 | ||
| 157 | /// General purpose macro registers. | 157 | /// General purpose macro registers. |
| 158 | std::array<u32, NumMacroRegisters> registers = {}; | 158 | std::array<u32, NumMacroRegisters> registers = {}; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0b5d18bcb..578aca789 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -12,10 +12,10 @@ | |||
| 12 | 12 | ||
| 13 | namespace OpenGL { | 13 | namespace OpenGL { |
| 14 | 14 | ||
| 15 | OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} | 15 | OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} |
| 16 | 16 | ||
| 17 | GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment, | 17 | GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, |
| 18 | bool cache) { | 18 | std::size_t alignment, bool cache) { |
| 19 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | 19 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |
| 20 | const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; | 20 | const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; |
| 21 | 21 | ||
| @@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz | |||
| 53 | return uploaded_offset; | 53 | return uploaded_offset; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) { | 56 | GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, |
| 57 | std::size_t alignment) { | ||
| 57 | AlignBuffer(alignment); | 58 | AlignBuffer(alignment); |
| 58 | std::memcpy(buffer_ptr, raw_pointer, size); | 59 | std::memcpy(buffer_ptr, raw_pointer, size); |
| 59 | GLintptr uploaded_offset = buffer_offset; | 60 | GLintptr uploaded_offset = buffer_offset; |
| @@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, | |||
| 63 | return uploaded_offset; | 64 | return uploaded_offset; |
| 64 | } | 65 | } |
| 65 | 66 | ||
| 66 | void OGLBufferCache::Map(size_t max_size) { | 67 | void OGLBufferCache::Map(std::size_t max_size) { |
| 67 | bool invalidate; | 68 | bool invalidate; |
| 68 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 69 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = |
| 69 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | 70 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); |
| @@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const { | |||
| 81 | return stream_buffer.GetHandle(); | 82 | return stream_buffer.GetHandle(); |
| 82 | } | 83 | } |
| 83 | 84 | ||
| 84 | void OGLBufferCache::AlignBuffer(size_t alignment) { | 85 | void OGLBufferCache::AlignBuffer(std::size_t alignment) { |
| 85 | // Align the offset, not the mapped pointer | 86 | // Align the offset, not the mapped pointer |
| 86 | GLintptr offset_aligned = | 87 | GLintptr offset_aligned = |
| 87 | static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); | 88 | static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); |
| 88 | buffer_ptr += offset_aligned - buffer_offset; | 89 | buffer_ptr += offset_aligned - buffer_offset; |
| 89 | buffer_offset = offset_aligned; | 90 | buffer_offset = offset_aligned; |
| 90 | } | 91 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6da862902..6c18461f4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -19,32 +19,32 @@ struct CachedBufferEntry final { | |||
| 19 | return addr; | 19 | return addr; |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | size_t GetSizeInBytes() const { | 22 | std::size_t GetSizeInBytes() const { |
| 23 | return size; | 23 | return size; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | VAddr addr; | 26 | VAddr addr; |
| 27 | size_t size; | 27 | std::size_t size; |
| 28 | GLintptr offset; | 28 | GLintptr offset; |
| 29 | size_t alignment; | 29 | std::size_t alignment; |
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | 32 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { |
| 33 | public: | 33 | public: |
| 34 | explicit OGLBufferCache(size_t size); | 34 | explicit OGLBufferCache(std::size_t size); |
| 35 | 35 | ||
| 36 | GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4, | 36 | GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
| 37 | bool cache = true); | 37 | bool cache = true); |
| 38 | 38 | ||
| 39 | GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4); | 39 | GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); |
| 40 | 40 | ||
| 41 | void Map(size_t max_size); | 41 | void Map(std::size_t max_size); |
| 42 | void Unmap(); | 42 | void Unmap(); |
| 43 | 43 | ||
| 44 | GLuint GetHandle() const; | 44 | GLuint GetHandle() const; |
| 45 | 45 | ||
| 46 | protected: | 46 | protected: |
| 47 | void AlignBuffer(size_t alignment); | 47 | void AlignBuffer(std::size_t alignment); |
| 48 | 48 | ||
| 49 | private: | 49 | private: |
| 50 | OGLStreamBuffer stream_buffer; | 50 | OGLStreamBuffer stream_buffer; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7e1bba67d..274c2dbcf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -46,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, | |||
| 46 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) | 46 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) |
| 47 | : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { | 47 | : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { |
| 48 | // Create sampler objects | 48 | // Create sampler objects |
| 49 | for (size_t i = 0; i < texture_samplers.size(); ++i) { | 49 | for (std::size_t i = 0; i < texture_samplers.size(); ++i) { |
| 50 | texture_samplers[i].Create(); | 50 | texture_samplers[i].Create(); |
| 51 | state.texture_units[i].sampler = texture_samplers[i].sampler.handle; | 51 | state.texture_units[i].sampler = texture_samplers[i].sampler.handle; |
| 52 | } | 52 | } |
| @@ -181,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() { | |||
| 181 | u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | 181 | u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; |
| 182 | u32 current_texture_bindpoint = 0; | 182 | u32 current_texture_bindpoint = 0; |
| 183 | 183 | ||
| 184 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 184 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 185 | const auto& shader_config = gpu.regs.shader_config[index]; | 185 | const auto& shader_config = gpu.regs.shader_config[index]; |
| 186 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 186 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 187 | 187 | ||
| @@ -190,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() { | |||
| 190 | continue; | 190 | continue; |
| 191 | } | 191 | } |
| 192 | 192 | ||
| 193 | const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 | 193 | const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 |
| 194 | 194 | ||
| 195 | GLShader::MaxwellUniformData ubo{}; | 195 | GLShader::MaxwellUniformData ubo{}; |
| 196 | ubo.SetFromRegs(gpu.state.shader_stages[stage]); | 196 | ubo.SetFromRegs(gpu.state.shader_stages[stage]); |
| 197 | const GLintptr offset = buffer_cache.UploadHostMemory( | 197 | const GLintptr offset = buffer_cache.UploadHostMemory( |
| 198 | &ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment)); | 198 | &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); |
| 199 | 199 | ||
| 200 | // Bind the buffer | 200 | // Bind the buffer |
| 201 | glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo)); | 201 | glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo)); |
| @@ -238,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() { | |||
| 238 | shader_program_manager->UseTrivialGeometryShader(); | 238 | shader_program_manager->UseTrivialGeometryShader(); |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 241 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| 242 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 242 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 243 | 243 | ||
| 244 | size_t size = 0; | 244 | std::size_t size = 0; |
| 245 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 245 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 246 | if (!regs.vertex_array[index].IsEnabled()) | 246 | if (!regs.vertex_array[index].IsEnabled()) |
| 247 | continue; | 247 | continue; |
| @@ -299,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | |||
| 299 | 299 | ||
| 300 | void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, | 300 | void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, |
| 301 | bool preserve_contents, | 301 | bool preserve_contents, |
| 302 | boost::optional<size_t> single_color_target) { | 302 | boost::optional<std::size_t> single_color_target) { |
| 303 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); | 303 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); |
| 304 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 304 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 305 | 305 | ||
| @@ -330,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep | |||
| 330 | } else { | 330 | } else { |
| 331 | // Multiple color attachments are enabled | 331 | // Multiple color attachments are enabled |
| 332 | std::array<GLenum, Maxwell::NumRenderTargets> buffers; | 332 | std::array<GLenum, Maxwell::NumRenderTargets> buffers; |
| 333 | for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | 333 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { |
| 334 | Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); | 334 | Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); |
| 335 | buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); | 335 | buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); |
| 336 | glFramebufferTexture2D( | 336 | glFramebufferTexture2D( |
| @@ -342,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep | |||
| 342 | } | 342 | } |
| 343 | } else { | 343 | } else { |
| 344 | // No color attachments are enabled - zero out all of them | 344 | // No color attachments are enabled - zero out all of them |
| 345 | for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | 345 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { |
| 346 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, | 346 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, |
| 347 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D, | 347 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D, |
| 348 | 0, 0); | 348 | 0, 0); |
| @@ -462,15 +462,15 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 462 | state.draw.vertex_buffer = buffer_cache.GetHandle(); | 462 | state.draw.vertex_buffer = buffer_cache.GetHandle(); |
| 463 | state.Apply(); | 463 | state.Apply(); |
| 464 | 464 | ||
| 465 | size_t buffer_size = CalculateVertexArraysSize(); | 465 | std::size_t buffer_size = CalculateVertexArraysSize(); |
| 466 | 466 | ||
| 467 | if (is_indexed) { | 467 | if (is_indexed) { |
| 468 | buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; | 468 | buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size; |
| 469 | } | 469 | } |
| 470 | 470 | ||
| 471 | // Uniform space for the 5 shader stages | 471 | // Uniform space for the 5 shader stages |
| 472 | buffer_size = | 472 | buffer_size = |
| 473 | Common::AlignUp<size_t>(buffer_size, 4) + | 473 | Common::AlignUp<std::size_t>(buffer_size, 4) + |
| 474 | (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; | 474 | (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; |
| 475 | 475 | ||
| 476 | // Add space for at least 18 constant buffers | 476 | // Add space for at least 18 constant buffers |
| @@ -644,7 +644,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 644 | MICROPROFILE_SCOPE(OpenGL_UBO); | 644 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 645 | const auto& gpu = Core::System::GetInstance().GPU(); | 645 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 646 | const auto& maxwell3d = gpu.Maxwell3D(); | 646 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 647 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; | 647 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; |
| 648 | const auto& entries = shader->GetShaderEntries().const_buffer_entries; | 648 | const auto& entries = shader->GetShaderEntries().const_buffer_entries; |
| 649 | 649 | ||
| 650 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; | 650 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; |
| @@ -667,7 +667,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 667 | continue; | 667 | continue; |
| 668 | } | 668 | } |
| 669 | 669 | ||
| 670 | size_t size = 0; | 670 | std::size_t size = 0; |
| 671 | 671 | ||
| 672 | if (used_buffer.IsIndirect()) { | 672 | if (used_buffer.IsIndirect()) { |
| 673 | // Buffer is accessed indirectly, so upload the entire thing | 673 | // Buffer is accessed indirectly, so upload the entire thing |
| @@ -689,7 +689,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 689 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | 689 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); |
| 690 | 690 | ||
| 691 | GLintptr const_buffer_offset = buffer_cache.UploadMemory( | 691 | GLintptr const_buffer_offset = buffer_cache.UploadMemory( |
| 692 | buffer.address, size, static_cast<size_t>(uniform_buffer_alignment)); | 692 | buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); |
| 693 | 693 | ||
| 694 | // Now configure the bindpoint of the buffer inside the shader | 694 | // Now configure the bindpoint of the buffer inside the shader |
| 695 | glUniformBlockBinding(shader->GetProgramHandle(), | 695 | glUniformBlockBinding(shader->GetProgramHandle(), |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 163412882..bf9560bdc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -73,7 +73,7 @@ public: | |||
| 73 | }; | 73 | }; |
| 74 | 74 | ||
| 75 | /// Maximum supported size that a constbuffer can have in bytes. | 75 | /// Maximum supported size that a constbuffer can have in bytes. |
| 76 | static constexpr size_t MaxConstbufferSize = 0x10000; | 76 | static constexpr std::size_t MaxConstbufferSize = 0x10000; |
| 77 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | 77 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, |
| 78 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | 78 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); |
| 79 | 79 | ||
| @@ -106,7 +106,7 @@ private: | |||
| 106 | */ | 106 | */ |
| 107 | void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, | 107 | void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, |
| 108 | bool preserve_contents = true, | 108 | bool preserve_contents = true, |
| 109 | boost::optional<size_t> single_color_target = {}); | 109 | boost::optional<std::size_t> single_color_target = {}); |
| 110 | 110 | ||
| 111 | /* | 111 | /* |
| 112 | * Configures the current constbuffers to use for the draw command. | 112 | * Configures the current constbuffers to use for the draw command. |
| @@ -180,12 +180,12 @@ private: | |||
| 180 | 180 | ||
| 181 | std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; | 181 | std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; |
| 182 | 182 | ||
| 183 | static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 183 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 184 | OGLBufferCache buffer_cache; | 184 | OGLBufferCache buffer_cache; |
| 185 | OGLFramebuffer framebuffer; | 185 | OGLFramebuffer framebuffer; |
| 186 | GLint uniform_buffer_alignment; | 186 | GLint uniform_buffer_alignment; |
| 187 | 187 | ||
| 188 | size_t CalculateVertexArraysSize() const; | 188 | std::size_t CalculateVertexArraysSize() const; |
| 189 | 189 | ||
| 190 | void SetupVertexArrays(); | 190 | void SetupVertexArrays(); |
| 191 | 191 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 32001e44b..86682d7cb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -75,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | |||
| 75 | return params; | 75 | return params; |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) { | 78 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) { |
| 79 | const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; | 79 | const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; |
| 80 | SurfaceParams params{}; | 80 | SurfaceParams params{}; |
| 81 | params.addr = TryGetCpuAddr(config.Address()); | 81 | params.addr = TryGetCpuAddr(config.Address()); |
| @@ -167,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form | |||
| 167 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S | 167 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S |
| 168 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI | 168 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI |
| 169 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI | 169 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI |
| 170 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 | ||
| 170 | 171 | ||
| 171 | // Depth formats | 172 | // Depth formats |
| 172 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F | 173 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F |
| @@ -203,7 +204,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) { | |||
| 203 | } | 204 | } |
| 204 | 205 | ||
| 205 | static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | 206 | static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { |
| 206 | ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); | 207 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); |
| 207 | auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; | 208 | auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; |
| 208 | ASSERT(component_type == format.component_type); | 209 | ASSERT(component_type == format.component_type); |
| 209 | 210 | ||
| @@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType | |||
| 213 | static bool IsPixelFormatASTC(PixelFormat format) { | 214 | static bool IsPixelFormatASTC(PixelFormat format) { |
| 214 | switch (format) { | 215 | switch (format) { |
| 215 | case PixelFormat::ASTC_2D_4X4: | 216 | case PixelFormat::ASTC_2D_4X4: |
| 217 | case PixelFormat::ASTC_2D_8X8: | ||
| 216 | return true; | 218 | return true; |
| 217 | default: | 219 | default: |
| 218 | return false; | 220 | return false; |
| @@ -223,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { | |||
| 223 | switch (format) { | 225 | switch (format) { |
| 224 | case PixelFormat::ASTC_2D_4X4: | 226 | case PixelFormat::ASTC_2D_4X4: |
| 225 | return {4, 4}; | 227 | return {4, 4}; |
| 228 | case PixelFormat::ASTC_2D_8X8: | ||
| 229 | return {8, 8}; | ||
| 226 | default: | 230 | default: |
| 227 | LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); | 231 | LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); |
| 228 | UNREACHABLE(); | 232 | UNREACHABLE(); |
| @@ -256,7 +260,7 @@ static bool IsFormatBCn(PixelFormat format) { | |||
| 256 | } | 260 | } |
| 257 | 261 | ||
| 258 | template <bool morton_to_gl, PixelFormat format> | 262 | template <bool morton_to_gl, PixelFormat format> |
| 259 | void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size, | 263 | void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size, |
| 260 | VAddr addr) { | 264 | VAddr addr) { |
| 261 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | 265 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 262 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | 266 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
| @@ -267,7 +271,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t | |||
| 267 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; | 271 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; |
| 268 | const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( | 272 | const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( |
| 269 | addr, tile_size, bytes_per_pixel, stride, height, block_height); | 273 | addr, tile_size, bytes_per_pixel, stride, height, block_height); |
| 270 | const size_t size_to_copy{std::min(gl_buffer_size, data.size())}; | 274 | const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; |
| 271 | memcpy(gl_buffer, data.data(), size_to_copy); | 275 | memcpy(gl_buffer, data.data(), size_to_copy); |
| 272 | } else { | 276 | } else { |
| 273 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should | 277 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should |
| @@ -278,7 +282,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t | |||
| 278 | } | 282 | } |
| 279 | } | 283 | } |
| 280 | 284 | ||
| 281 | static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | 285 | static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), |
| 282 | SurfaceParams::MaxPixelFormat> | 286 | SurfaceParams::MaxPixelFormat> |
| 283 | morton_to_gl_fns = { | 287 | morton_to_gl_fns = { |
| 284 | // clang-format off | 288 | // clang-format off |
| @@ -327,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | |||
| 327 | MortonCopy<true, PixelFormat::RG8S>, | 331 | MortonCopy<true, PixelFormat::RG8S>, |
| 328 | MortonCopy<true, PixelFormat::RG32UI>, | 332 | MortonCopy<true, PixelFormat::RG32UI>, |
| 329 | MortonCopy<true, PixelFormat::R32UI>, | 333 | MortonCopy<true, PixelFormat::R32UI>, |
| 334 | MortonCopy<true, PixelFormat::ASTC_2D_8X8>, | ||
| 330 | MortonCopy<true, PixelFormat::Z32F>, | 335 | MortonCopy<true, PixelFormat::Z32F>, |
| 331 | MortonCopy<true, PixelFormat::Z16>, | 336 | MortonCopy<true, PixelFormat::Z16>, |
| 332 | MortonCopy<true, PixelFormat::Z24S8>, | 337 | MortonCopy<true, PixelFormat::Z24S8>, |
| @@ -335,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | |||
| 335 | // clang-format on | 340 | // clang-format on |
| 336 | }; | 341 | }; |
| 337 | 342 | ||
| 338 | static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | 343 | static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), |
| 339 | SurfaceParams::MaxPixelFormat> | 344 | SurfaceParams::MaxPixelFormat> |
| 340 | gl_to_morton_fns = { | 345 | gl_to_morton_fns = { |
| 341 | // clang-format off | 346 | // clang-format off |
| @@ -386,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | |||
| 386 | MortonCopy<false, PixelFormat::RG8S>, | 391 | MortonCopy<false, PixelFormat::RG8S>, |
| 387 | MortonCopy<false, PixelFormat::RG32UI>, | 392 | MortonCopy<false, PixelFormat::RG32UI>, |
| 388 | MortonCopy<false, PixelFormat::R32UI>, | 393 | MortonCopy<false, PixelFormat::R32UI>, |
| 394 | nullptr, | ||
| 389 | MortonCopy<false, PixelFormat::Z32F>, | 395 | MortonCopy<false, PixelFormat::Z32F>, |
| 390 | MortonCopy<false, PixelFormat::Z16>, | 396 | MortonCopy<false, PixelFormat::Z16>, |
| 391 | MortonCopy<false, PixelFormat::Z24S8>, | 397 | MortonCopy<false, PixelFormat::Z24S8>, |
| @@ -513,9 +519,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { | |||
| 513 | S8Z24 input_pixel{}; | 519 | S8Z24 input_pixel{}; |
| 514 | Z24S8 output_pixel{}; | 520 | Z24S8 output_pixel{}; |
| 515 | constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; | 521 | constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; |
| 516 | for (size_t y = 0; y < height; ++y) { | 522 | for (std::size_t y = 0; y < height; ++y) { |
| 517 | for (size_t x = 0; x < width; ++x) { | 523 | for (std::size_t x = 0; x < width; ++x) { |
| 518 | const size_t offset{bpp * (y * width + x)}; | 524 | const std::size_t offset{bpp * (y * width + x)}; |
| 519 | std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); | 525 | std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); |
| 520 | output_pixel.s8.Assign(input_pixel.s8); | 526 | output_pixel.s8.Assign(input_pixel.s8); |
| 521 | output_pixel.z24.Assign(input_pixel.z24); | 527 | output_pixel.z24.Assign(input_pixel.z24); |
| @@ -526,9 +532,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { | |||
| 526 | 532 | ||
| 527 | static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { | 533 | static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { |
| 528 | constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; | 534 | constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; |
| 529 | for (size_t y = 0; y < height; ++y) { | 535 | for (std::size_t y = 0; y < height; ++y) { |
| 530 | for (size_t x = 0; x < width; ++x) { | 536 | for (std::size_t x = 0; x < width; ++x) { |
| 531 | const size_t offset{bpp * (y * width + x)}; | 537 | const std::size_t offset{bpp * (y * width + x)}; |
| 532 | const u8 temp{data[offset]}; | 538 | const u8 temp{data[offset]}; |
| 533 | data[offset] = data[offset + 1]; | 539 | data[offset] = data[offset + 1]; |
| 534 | data[offset + 1] = temp; | 540 | data[offset + 1] = temp; |
| @@ -544,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { | |||
| 544 | static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | 550 | static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, |
| 545 | u32 width, u32 height) { | 551 | u32 width, u32 height) { |
| 546 | switch (pixel_format) { | 552 | switch (pixel_format) { |
| 547 | case PixelFormat::ASTC_2D_4X4: { | 553 | case PixelFormat::ASTC_2D_4X4: |
| 554 | case PixelFormat::ASTC_2D_8X8: { | ||
| 548 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | 555 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. |
| 549 | u32 block_width{}; | 556 | u32 block_width{}; |
| 550 | u32 block_height{}; | 557 | u32 block_height{}; |
| @@ -591,13 +598,13 @@ void CachedSurface::LoadGLBuffer() { | |||
| 591 | UNREACHABLE(); | 598 | UNREACHABLE(); |
| 592 | } | 599 | } |
| 593 | 600 | ||
| 594 | gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); | 601 | gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size); |
| 595 | morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( | 602 | morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( |
| 596 | params.width, params.block_height, params.height, gl_buffer.data(), copy_size, | 603 | params.width, params.block_height, params.height, gl_buffer.data(), copy_size, |
| 597 | params.addr); | 604 | params.addr); |
| 598 | } else { | 605 | } else { |
| 599 | const u8* const texture_src_data_end{texture_src_data + | 606 | const u8* const texture_src_data_end{texture_src_data + |
| 600 | (static_cast<size_t>(params.depth) * copy_size)}; | 607 | (static_cast<std::size_t>(params.depth) * copy_size)}; |
| 601 | gl_buffer.assign(texture_src_data, texture_src_data_end); | 608 | gl_buffer.assign(texture_src_data, texture_src_data_end); |
| 602 | } | 609 | } |
| 603 | 610 | ||
| @@ -616,7 +623,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle | |||
| 616 | 623 | ||
| 617 | MICROPROFILE_SCOPE(OpenGL_TextureUL); | 624 | MICROPROFILE_SCOPE(OpenGL_TextureUL); |
| 618 | 625 | ||
| 619 | ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height * | 626 | ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height * |
| 620 | GetGLBytesPerPixel(params.pixel_format) * params.depth); | 627 | GetGLBytesPerPixel(params.pixel_format) * params.depth); |
| 621 | 628 | ||
| 622 | const auto& rect{params.GetRect()}; | 629 | const auto& rect{params.GetRect()}; |
| @@ -624,8 +631,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle | |||
| 624 | // Load data from memory to the surface | 631 | // Load data from memory to the surface |
| 625 | const GLint x0 = static_cast<GLint>(rect.left); | 632 | const GLint x0 = static_cast<GLint>(rect.left); |
| 626 | const GLint y0 = static_cast<GLint>(rect.bottom); | 633 | const GLint y0 = static_cast<GLint>(rect.bottom); |
| 627 | const size_t buffer_offset = | 634 | const std::size_t buffer_offset = |
| 628 | static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * | 635 | static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width + |
| 636 | static_cast<std::size_t>(x0)) * | ||
| 629 | GetGLBytesPerPixel(params.pixel_format); | 637 | GetGLBytesPerPixel(params.pixel_format); |
| 630 | 638 | ||
| 631 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); | 639 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); |
| @@ -727,7 +735,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { | |||
| 727 | return GetSurface(depth_params, preserve_contents); | 735 | return GetSurface(depth_params, preserve_contents); |
| 728 | } | 736 | } |
| 729 | 737 | ||
| 730 | Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) { | 738 | Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) { |
| 731 | const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs}; | 739 | const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs}; |
| 732 | 740 | ||
| 733 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 741 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| @@ -825,7 +833,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | |||
| 825 | auto source_format = GetFormatTuple(params.pixel_format, params.component_type); | 833 | auto source_format = GetFormatTuple(params.pixel_format, params.component_type); |
| 826 | auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); | 834 | auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); |
| 827 | 835 | ||
| 828 | size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); | 836 | std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); |
| 829 | 837 | ||
| 830 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); | 838 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); |
| 831 | glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); | 839 | glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); |
| @@ -849,7 +857,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | |||
| 849 | LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " | 857 | LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " |
| 850 | "reinterpretation but the texture is tiled."); | 858 | "reinterpretation but the texture is tiled."); |
| 851 | } | 859 | } |
| 852 | size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); | 860 | std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); |
| 853 | std::vector<u8> data(remaining_size); | 861 | std::vector<u8> data(remaining_size); |
| 854 | Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); | 862 | Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); |
| 855 | glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, | 863 | glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 57ea8593b..d7a4bc37f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -70,19 +70,20 @@ struct SurfaceParams { | |||
| 70 | RG8S = 42, | 70 | RG8S = 42, |
| 71 | RG32UI = 43, | 71 | RG32UI = 43, |
| 72 | R32UI = 44, | 72 | R32UI = 44, |
| 73 | ASTC_2D_8X8 = 45, | ||
| 73 | 74 | ||
| 74 | MaxColorFormat, | 75 | MaxColorFormat, |
| 75 | 76 | ||
| 76 | // Depth formats | 77 | // Depth formats |
| 77 | Z32F = 45, | 78 | Z32F = 46, |
| 78 | Z16 = 46, | 79 | Z16 = 47, |
| 79 | 80 | ||
| 80 | MaxDepthFormat, | 81 | MaxDepthFormat, |
| 81 | 82 | ||
| 82 | // DepthStencil formats | 83 | // DepthStencil formats |
| 83 | Z24S8 = 47, | 84 | Z24S8 = 48, |
| 84 | S8Z24 = 48, | 85 | S8Z24 = 49, |
| 85 | Z32FS8 = 49, | 86 | Z32FS8 = 50, |
| 86 | 87 | ||
| 87 | MaxDepthStencilFormat, | 88 | MaxDepthStencilFormat, |
| 88 | 89 | ||
| @@ -90,7 +91,7 @@ struct SurfaceParams { | |||
| 90 | Invalid = 255, | 91 | Invalid = 255, |
| 91 | }; | 92 | }; |
| 92 | 93 | ||
| 93 | static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max); | 94 | static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); |
| 94 | 95 | ||
| 95 | enum class ComponentType { | 96 | enum class ComponentType { |
| 96 | Invalid = 0, | 97 | Invalid = 0, |
| @@ -192,6 +193,7 @@ struct SurfaceParams { | |||
| 192 | 1, // RG8S | 193 | 1, // RG8S |
| 193 | 1, // RG32UI | 194 | 1, // RG32UI |
| 194 | 1, // R32UI | 195 | 1, // R32UI |
| 196 | 4, // ASTC_2D_8X8 | ||
| 195 | 1, // Z32F | 197 | 1, // Z32F |
| 196 | 1, // Z16 | 198 | 1, // Z16 |
| 197 | 1, // Z24S8 | 199 | 1, // Z24S8 |
| @@ -199,8 +201,8 @@ struct SurfaceParams { | |||
| 199 | 1, // Z32FS8 | 201 | 1, // Z32FS8 |
| 200 | }}; | 202 | }}; |
| 201 | 203 | ||
| 202 | ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); | 204 | ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); |
| 203 | return compression_factor_table[static_cast<size_t>(format)]; | 205 | return compression_factor_table[static_cast<std::size_t>(format)]; |
| 204 | } | 206 | } |
| 205 | 207 | ||
| 206 | static constexpr u32 GetFormatBpp(PixelFormat format) { | 208 | static constexpr u32 GetFormatBpp(PixelFormat format) { |
| @@ -253,6 +255,7 @@ struct SurfaceParams { | |||
| 253 | 16, // RG8S | 255 | 16, // RG8S |
| 254 | 64, // RG32UI | 256 | 64, // RG32UI |
| 255 | 32, // R32UI | 257 | 32, // R32UI |
| 258 | 16, // ASTC_2D_8X8 | ||
| 256 | 32, // Z32F | 259 | 32, // Z32F |
| 257 | 16, // Z16 | 260 | 16, // Z16 |
| 258 | 32, // Z24S8 | 261 | 32, // Z24S8 |
| @@ -260,8 +263,8 @@ struct SurfaceParams { | |||
| 260 | 64, // Z32FS8 | 263 | 64, // Z32FS8 |
| 261 | }}; | 264 | }}; |
| 262 | 265 | ||
| 263 | ASSERT(static_cast<size_t>(format) < bpp_table.size()); | 266 | ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); |
| 264 | return bpp_table[static_cast<size_t>(format)]; | 267 | return bpp_table[static_cast<std::size_t>(format)]; |
| 265 | } | 268 | } |
| 266 | 269 | ||
| 267 | u32 GetFormatBpp() const { | 270 | u32 GetFormatBpp() const { |
| @@ -316,6 +319,8 @@ struct SurfaceParams { | |||
| 316 | return PixelFormat::R11FG11FB10F; | 319 | return PixelFormat::R11FG11FB10F; |
| 317 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: | 320 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: |
| 318 | return PixelFormat::B5G6R5U; | 321 | return PixelFormat::B5G6R5U; |
| 322 | case Tegra::RenderTargetFormat::BGR5A1_UNORM: | ||
| 323 | return PixelFormat::A1B5G5R5U; | ||
| 319 | case Tegra::RenderTargetFormat::RGBA32_UINT: | 324 | case Tegra::RenderTargetFormat::RGBA32_UINT: |
| 320 | return PixelFormat::RGBA32UI; | 325 | return PixelFormat::RGBA32UI; |
| 321 | case Tegra::RenderTargetFormat::R8_UNORM: | 326 | case Tegra::RenderTargetFormat::R8_UNORM: |
| @@ -522,6 +527,8 @@ struct SurfaceParams { | |||
| 522 | return PixelFormat::BC6H_SF16; | 527 | return PixelFormat::BC6H_SF16; |
| 523 | case Tegra::Texture::TextureFormat::ASTC_2D_4X4: | 528 | case Tegra::Texture::TextureFormat::ASTC_2D_4X4: |
| 524 | return PixelFormat::ASTC_2D_4X4; | 529 | return PixelFormat::ASTC_2D_4X4; |
| 530 | case Tegra::Texture::TextureFormat::ASTC_2D_8X8: | ||
| 531 | return PixelFormat::ASTC_2D_8X8; | ||
| 525 | case Tegra::Texture::TextureFormat::R16_G16: | 532 | case Tegra::Texture::TextureFormat::R16_G16: |
| 526 | switch (component_type) { | 533 | switch (component_type) { |
| 527 | case Tegra::Texture::ComponentType::FLOAT: | 534 | case Tegra::Texture::ComponentType::FLOAT: |
| @@ -576,6 +583,7 @@ struct SurfaceParams { | |||
| 576 | case Tegra::RenderTargetFormat::RG16_UNORM: | 583 | case Tegra::RenderTargetFormat::RG16_UNORM: |
| 577 | case Tegra::RenderTargetFormat::R16_UNORM: | 584 | case Tegra::RenderTargetFormat::R16_UNORM: |
| 578 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: | 585 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: |
| 586 | case Tegra::RenderTargetFormat::BGR5A1_UNORM: | ||
| 579 | case Tegra::RenderTargetFormat::RG8_UNORM: | 587 | case Tegra::RenderTargetFormat::RG8_UNORM: |
| 580 | case Tegra::RenderTargetFormat::RGBA16_UNORM: | 588 | case Tegra::RenderTargetFormat::RGBA16_UNORM: |
| 581 | return ComponentType::UNorm; | 589 | return ComponentType::UNorm; |
| @@ -636,16 +644,18 @@ struct SurfaceParams { | |||
| 636 | } | 644 | } |
| 637 | 645 | ||
| 638 | static SurfaceType GetFormatType(PixelFormat pixel_format) { | 646 | static SurfaceType GetFormatType(PixelFormat pixel_format) { |
| 639 | if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) { | 647 | if (static_cast<std::size_t>(pixel_format) < |
| 648 | static_cast<std::size_t>(PixelFormat::MaxColorFormat)) { | ||
| 640 | return SurfaceType::ColorTexture; | 649 | return SurfaceType::ColorTexture; |
| 641 | } | 650 | } |
| 642 | 651 | ||
| 643 | if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) { | 652 | if (static_cast<std::size_t>(pixel_format) < |
| 653 | static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) { | ||
| 644 | return SurfaceType::Depth; | 654 | return SurfaceType::Depth; |
| 645 | } | 655 | } |
| 646 | 656 | ||
| 647 | if (static_cast<size_t>(pixel_format) < | 657 | if (static_cast<std::size_t>(pixel_format) < |
| 648 | static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) { | 658 | static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { |
| 649 | return SurfaceType::DepthStencil; | 659 | return SurfaceType::DepthStencil; |
| 650 | } | 660 | } |
| 651 | 661 | ||
| @@ -659,7 +669,7 @@ struct SurfaceParams { | |||
| 659 | MathUtil::Rectangle<u32> GetRect() const; | 669 | MathUtil::Rectangle<u32> GetRect() const; |
| 660 | 670 | ||
| 661 | /// Returns the size of this surface in bytes, adjusted for compression | 671 | /// Returns the size of this surface in bytes, adjusted for compression |
| 662 | size_t SizeInBytes() const { | 672 | std::size_t SizeInBytes() const { |
| 663 | const u32 compression_factor{GetCompressionFactor(pixel_format)}; | 673 | const u32 compression_factor{GetCompressionFactor(pixel_format)}; |
| 664 | ASSERT(width % compression_factor == 0); | 674 | ASSERT(width % compression_factor == 0); |
| 665 | ASSERT(height % compression_factor == 0); | 675 | ASSERT(height % compression_factor == 0); |
| @@ -671,7 +681,7 @@ struct SurfaceParams { | |||
| 671 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); | 681 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); |
| 672 | 682 | ||
| 673 | /// Creates SurfaceParams from a framebuffer configuration | 683 | /// Creates SurfaceParams from a framebuffer configuration |
| 674 | static SurfaceParams CreateForFramebuffer(size_t index); | 684 | static SurfaceParams CreateForFramebuffer(std::size_t index); |
| 675 | 685 | ||
| 676 | /// Creates SurfaceParams for a depth buffer configuration | 686 | /// Creates SurfaceParams for a depth buffer configuration |
| 677 | static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, | 687 | static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, |
| @@ -694,7 +704,7 @@ struct SurfaceParams { | |||
| 694 | u32 height; | 704 | u32 height; |
| 695 | u32 depth; | 705 | u32 depth; |
| 696 | u32 unaligned_height; | 706 | u32 unaligned_height; |
| 697 | size_t size_in_bytes; | 707 | std::size_t size_in_bytes; |
| 698 | SurfaceTarget target; | 708 | SurfaceTarget target; |
| 699 | }; | 709 | }; |
| 700 | 710 | ||
| @@ -711,7 +721,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { | |||
| 711 | namespace std { | 721 | namespace std { |
| 712 | template <> | 722 | template <> |
| 713 | struct hash<SurfaceReserveKey> { | 723 | struct hash<SurfaceReserveKey> { |
| 714 | size_t operator()(const SurfaceReserveKey& k) const { | 724 | std::size_t operator()(const SurfaceReserveKey& k) const { |
| 715 | return k.Hash(); | 725 | return k.Hash(); |
| 716 | } | 726 | } |
| 717 | }; | 727 | }; |
| @@ -727,7 +737,7 @@ public: | |||
| 727 | return params.addr; | 737 | return params.addr; |
| 728 | } | 738 | } |
| 729 | 739 | ||
| 730 | size_t GetSizeInBytes() const { | 740 | std::size_t GetSizeInBytes() const { |
| 731 | return params.size_in_bytes; | 741 | return params.size_in_bytes; |
| 732 | } | 742 | } |
| 733 | 743 | ||
| @@ -775,7 +785,7 @@ public: | |||
| 775 | Surface GetDepthBufferSurface(bool preserve_contents); | 785 | Surface GetDepthBufferSurface(bool preserve_contents); |
| 776 | 786 | ||
| 777 | /// Get the color surface based on the framebuffer configuration and the specified render target | 787 | /// Get the color surface based on the framebuffer configuration and the specified render target |
| 778 | Surface GetColorBufferSurface(size_t index, bool preserve_contents); | 788 | Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); |
| 779 | 789 | ||
| 780 | /// Flushes the surface to Switch memory | 790 | /// Flushes the surface to Switch memory |
| 781 | void FlushSurface(const Surface& surface); | 791 | void FlushSurface(const Surface& surface); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 61080f5cc..894fe6eae 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -14,7 +14,7 @@ namespace OpenGL { | |||
| 14 | /// Gets the address for the specified shader stage program | 14 | /// Gets the address for the specified shader stage program |
| 15 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 15 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 16 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 16 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 17 | const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; | 17 | const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; |
| 18 | return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + | 18 | return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + |
| 19 | shader_config.offset); | 19 | shader_config.offset); |
| 20 | } | 20 | } |
| @@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) { | |||
| 28 | 28 | ||
| 29 | /// Helper function to set shader uniform block bindings for a single shader stage | 29 | /// Helper function to set shader uniform block bindings for a single shader stage |
| 30 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | 30 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, |
| 31 | Maxwell::ShaderStage binding, size_t expected_size) { | 31 | Maxwell::ShaderStage binding, std::size_t expected_size) { |
| 32 | const GLuint ub_index = glGetUniformBlockIndex(shader, name); | 32 | const GLuint ub_index = glGetUniformBlockIndex(shader, name); |
| 33 | if (ub_index == GL_INVALID_INDEX) { | 33 | if (ub_index == GL_INVALID_INDEX) { |
| 34 | return; | 34 | return; |
| @@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | |||
| 36 | 36 | ||
| 37 | GLint ub_size = 0; | 37 | GLint ub_size = 0; |
| 38 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | 38 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); |
| 39 | ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size, | 39 | ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size, |
| 40 | "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); | 40 | "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); |
| 41 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | 41 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); |
| 42 | } | 42 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6e6febcbc..9bafe43a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -28,7 +28,7 @@ public: | |||
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | /// Gets the size of the shader in guest memory, required for cache management | 30 | /// Gets the size of the shader in guest memory, required for cache management |
| 31 | size_t GetSizeInBytes() const { | 31 | std::size_t GetSizeInBytes() const { |
| 32 | return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64); | 32 | return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64); |
| 33 | } | 33 | } |
| 34 | 34 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7a5321b9c..00cd05e62 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/engines/shader_bytecode.h" | 14 | #include "video_core/engines/shader_bytecode.h" |
| 15 | #include "video_core/engines/shader_header.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 16 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 16 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 17 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 17 | 18 | ||
| @@ -26,7 +27,7 @@ using Tegra::Shader::Sampler; | |||
| 26 | using Tegra::Shader::SubOp; | 27 | using Tegra::Shader::SubOp; |
| 27 | 28 | ||
| 28 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | 29 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; |
| 29 | constexpr u32 PROGRAM_HEADER_SIZE = 0x50; | 30 | constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); |
| 30 | 31 | ||
| 31 | class DecompileFail : public std::runtime_error { | 32 | class DecompileFail : public std::runtime_error { |
| 32 | public: | 33 | public: |
| @@ -189,7 +190,7 @@ public: | |||
| 189 | 190 | ||
| 190 | private: | 191 | private: |
| 191 | void AppendIndentation() { | 192 | void AppendIndentation() { |
| 192 | shader_source.append(static_cast<size_t>(scope) * 4, ' '); | 193 | shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); |
| 193 | } | 194 | } |
| 194 | 195 | ||
| 195 | std::string shader_source; | 196 | std::string shader_source; |
| @@ -208,7 +209,7 @@ public: | |||
| 208 | UnsignedInteger, | 209 | UnsignedInteger, |
| 209 | }; | 210 | }; |
| 210 | 211 | ||
| 211 | GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} | 212 | GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} |
| 212 | 213 | ||
| 213 | /// Gets the GLSL type string for a register | 214 | /// Gets the GLSL type string for a register |
| 214 | static std::string GetTypeString() { | 215 | static std::string GetTypeString() { |
| @@ -226,15 +227,23 @@ public: | |||
| 226 | } | 227 | } |
| 227 | 228 | ||
| 228 | /// Returns the index of the register | 229 | /// Returns the index of the register |
| 229 | size_t GetIndex() const { | 230 | std::size_t GetIndex() const { |
| 230 | return index; | 231 | return index; |
| 231 | } | 232 | } |
| 232 | 233 | ||
| 233 | private: | 234 | private: |
| 234 | const size_t index; | 235 | const std::size_t index; |
| 235 | const std::string& suffix; | 236 | const std::string& suffix; |
| 236 | }; | 237 | }; |
| 237 | 238 | ||
| 239 | enum class InternalFlag : u64 { | ||
| 240 | ZeroFlag = 0, | ||
| 241 | CarryFlag = 1, | ||
| 242 | OverflowFlag = 2, | ||
| 243 | NaNFlag = 3, | ||
| 244 | Amount | ||
| 245 | }; | ||
| 246 | |||
| 238 | /** | 247 | /** |
| 239 | * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state | 248 | * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state |
| 240 | * of all registers (e.g. whether they are currently being used as Floats or Integers), and | 249 | * of all registers (e.g. whether they are currently being used as Floats or Integers), and |
| @@ -328,13 +337,19 @@ public: | |||
| 328 | void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, | 337 | void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, |
| 329 | const std::string& value, u64 dest_num_components, | 338 | const std::string& value, u64 dest_num_components, |
| 330 | u64 value_num_components, bool is_saturated = false, | 339 | u64 value_num_components, bool is_saturated = false, |
| 331 | u64 dest_elem = 0, Register::Size size = Register::Size::Word) { | 340 | u64 dest_elem = 0, Register::Size size = Register::Size::Word, |
| 341 | bool sets_cc = false) { | ||
| 332 | ASSERT_MSG(!is_saturated, "Unimplemented"); | 342 | ASSERT_MSG(!is_saturated, "Unimplemented"); |
| 333 | 343 | ||
| 334 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | 344 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; |
| 335 | 345 | ||
| 336 | SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', | 346 | SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', |
| 337 | dest_num_components, value_num_components, dest_elem); | 347 | dest_num_components, value_num_components, dest_elem); |
| 348 | |||
| 349 | if (sets_cc) { | ||
| 350 | const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; | ||
| 351 | SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); | ||
| 352 | } | ||
| 338 | } | 353 | } |
| 339 | 354 | ||
| 340 | /** | 355 | /** |
| @@ -351,6 +366,26 @@ public: | |||
| 351 | shader.AddLine(dest + " = " + src + ';'); | 366 | shader.AddLine(dest + " = " + src + ';'); |
| 352 | } | 367 | } |
| 353 | 368 | ||
| 369 | std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { | ||
| 370 | switch (cc) { | ||
| 371 | case Tegra::Shader::ControlCode::NEU: | ||
| 372 | return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; | ||
| 373 | default: | ||
| 374 | LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc)); | ||
| 375 | UNREACHABLE(); | ||
| 376 | return "false"; | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | std::string GetInternalFlag(const InternalFlag ii) const { | ||
| 381 | const u32 code = static_cast<u32>(ii); | ||
| 382 | return "internalFlag_" + std::to_string(code) + suffix; | ||
| 383 | } | ||
| 384 | |||
| 385 | void SetInternalFlag(const InternalFlag ii, const std::string& value) const { | ||
| 386 | shader.AddLine(GetInternalFlag(ii) + " = " + value + ';'); | ||
| 387 | } | ||
| 388 | |||
| 354 | /** | 389 | /** |
| 355 | * Writes code that does a output attribute assignment to register operation. Output attributes | 390 | * Writes code that does a output attribute assignment to register operation. Output attributes |
| 356 | * are stored as floats, so this may require conversion. | 391 | * are stored as floats, so this may require conversion. |
| @@ -414,6 +449,12 @@ public: | |||
| 414 | } | 449 | } |
| 415 | declarations.AddNewLine(); | 450 | declarations.AddNewLine(); |
| 416 | 451 | ||
| 452 | for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { | ||
| 453 | const InternalFlag code = static_cast<InternalFlag>(ii); | ||
| 454 | declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); | ||
| 455 | } | ||
| 456 | declarations.AddNewLine(); | ||
| 457 | |||
| 417 | for (const auto element : declr_input_attribute) { | 458 | for (const auto element : declr_input_attribute) { |
| 418 | // TODO(bunnei): Use proper number of elements for these | 459 | // TODO(bunnei): Use proper number of elements for these |
| 419 | u32 idx = | 460 | u32 idx = |
| @@ -468,7 +509,7 @@ public: | |||
| 468 | /// necessary. | 509 | /// necessary. |
| 469 | std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, | 510 | std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, |
| 470 | bool is_array) { | 511 | bool is_array) { |
| 471 | const size_t offset = static_cast<size_t>(sampler.index.Value()); | 512 | const std::size_t offset = static_cast<std::size_t>(sampler.index.Value()); |
| 472 | 513 | ||
| 473 | // If this sampler has already been used, return the existing mapping. | 514 | // If this sampler has already been used, return the existing mapping. |
| 474 | const auto itr = | 515 | const auto itr = |
| @@ -481,7 +522,7 @@ public: | |||
| 481 | } | 522 | } |
| 482 | 523 | ||
| 483 | // Otherwise create a new mapping for this sampler | 524 | // Otherwise create a new mapping for this sampler |
| 484 | const size_t next_index = used_samplers.size(); | 525 | const std::size_t next_index = used_samplers.size(); |
| 485 | const SamplerEntry entry{stage, offset, next_index, type, is_array}; | 526 | const SamplerEntry entry{stage, offset, next_index, type, is_array}; |
| 486 | used_samplers.emplace_back(entry); | 527 | used_samplers.emplace_back(entry); |
| 487 | return entry.GetName(); | 528 | return entry.GetName(); |
| @@ -531,7 +572,7 @@ private: | |||
| 531 | void BuildRegisterList() { | 572 | void BuildRegisterList() { |
| 532 | regs.reserve(Register::NumRegisters); | 573 | regs.reserve(Register::NumRegisters); |
| 533 | 574 | ||
| 534 | for (size_t index = 0; index < Register::NumRegisters; ++index) { | 575 | for (std::size_t index = 0; index < Register::NumRegisters; ++index) { |
| 535 | regs.emplace_back(index, suffix); | 576 | regs.emplace_back(index, suffix); |
| 536 | } | 577 | } |
| 537 | } | 578 | } |
| @@ -674,7 +715,7 @@ public: | |||
| 674 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) | 715 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) |
| 675 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | 716 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), |
| 676 | stage(stage), suffix(suffix) { | 717 | stage(stage), suffix(suffix) { |
| 677 | 718 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | |
| 678 | Generate(suffix); | 719 | Generate(suffix); |
| 679 | } | 720 | } |
| 680 | 721 | ||
| @@ -688,23 +729,6 @@ public: | |||
| 688 | } | 729 | } |
| 689 | 730 | ||
| 690 | private: | 731 | private: |
| 691 | // Shader program header for a Fragment Shader. | ||
| 692 | struct FragmentHeader { | ||
| 693 | INSERT_PADDING_WORDS(5); | ||
| 694 | INSERT_PADDING_WORDS(13); | ||
| 695 | u32 enabled_color_outputs; | ||
| 696 | union { | ||
| 697 | BitField<0, 1, u32> writes_samplemask; | ||
| 698 | BitField<1, 1, u32> writes_depth; | ||
| 699 | }; | ||
| 700 | |||
| 701 | bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | ||
| 702 | const u32 bit = render_target * 4 + component; | ||
| 703 | return enabled_color_outputs & (1 << bit); | ||
| 704 | } | ||
| 705 | }; | ||
| 706 | static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); | ||
| 707 | |||
| 708 | /// Gets the Subroutine object corresponding to the specified address. | 732 | /// Gets the Subroutine object corresponding to the specified address. |
| 709 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { | 733 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { |
| 710 | const auto iter = subroutines.find(Subroutine{begin, end, suffix}); | 734 | const auto iter = subroutines.find(Subroutine{begin, end, suffix}); |
| @@ -862,7 +886,7 @@ private: | |||
| 862 | */ | 886 | */ |
| 863 | bool IsSchedInstruction(u32 offset) const { | 887 | bool IsSchedInstruction(u32 offset) const { |
| 864 | // sched instructions appear once every 4 instructions. | 888 | // sched instructions appear once every 4 instructions. |
| 865 | static constexpr size_t SchedPeriod = 4; | 889 | static constexpr std::size_t SchedPeriod = 4; |
| 866 | u32 absolute_offset = offset - main_offset; | 890 | u32 absolute_offset = offset - main_offset; |
| 867 | 891 | ||
| 868 | return (absolute_offset % SchedPeriod) == 0; | 892 | return (absolute_offset % SchedPeriod) == 0; |
| @@ -930,7 +954,7 @@ private: | |||
| 930 | std::string result; | 954 | std::string result; |
| 931 | result += '('; | 955 | result += '('; |
| 932 | 956 | ||
| 933 | for (size_t i = 0; i < shift_amounts.size(); ++i) { | 957 | for (std::size_t i = 0; i < shift_amounts.size(); ++i) { |
| 934 | if (i) | 958 | if (i) |
| 935 | result += '|'; | 959 | result += '|'; |
| 936 | result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + | 960 | result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + |
| @@ -954,9 +978,7 @@ private: | |||
| 954 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | 978 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle |
| 955 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | 979 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 |
| 956 | 980 | ||
| 957 | ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented"); | 981 | std::size_t written_components = 0; |
| 958 | |||
| 959 | size_t written_components = 0; | ||
| 960 | for (u32 component = 0; component < 4; ++component) { | 982 | for (u32 component = 0; component < 4; ++component) { |
| 961 | if (!instr.texs.IsComponentEnabled(component)) { | 983 | if (!instr.texs.IsComponentEnabled(component)) { |
| 962 | continue; | 984 | continue; |
| @@ -1010,10 +1032,8 @@ private: | |||
| 1010 | /// Writes the output values from a fragment shader to the corresponding GLSL output variables. | 1032 | /// Writes the output values from a fragment shader to the corresponding GLSL output variables. |
| 1011 | void EmitFragmentOutputsWrite() { | 1033 | void EmitFragmentOutputsWrite() { |
| 1012 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | 1034 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); |
| 1013 | FragmentHeader header; | ||
| 1014 | std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); | ||
| 1015 | 1035 | ||
| 1016 | ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); | 1036 | ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented"); |
| 1017 | 1037 | ||
| 1018 | // Write the color outputs using the data in the shader registers, disabled | 1038 | // Write the color outputs using the data in the shader registers, disabled |
| 1019 | // rendertargets/components are skipped in the register assignment. | 1039 | // rendertargets/components are skipped in the register assignment. |
| @@ -1022,7 +1042,7 @@ private: | |||
| 1022 | ++render_target) { | 1042 | ++render_target) { |
| 1023 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 1043 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1024 | for (u32 component = 0; component < 4; ++component) { | 1044 | for (u32 component = 0; component < 4; ++component) { |
| 1025 | if (header.IsColorComponentOutputEnabled(render_target, component)) { | 1045 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { |
| 1026 | shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | 1046 | shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, |
| 1027 | regs.GetRegisterAsFloat(current_reg))); | 1047 | regs.GetRegisterAsFloat(current_reg))); |
| 1028 | ++current_reg; | 1048 | ++current_reg; |
| @@ -1030,7 +1050,7 @@ private: | |||
| 1030 | } | 1050 | } |
| 1031 | } | 1051 | } |
| 1032 | 1052 | ||
| 1033 | if (header.writes_depth) { | 1053 | if (header.ps.omap.depth) { |
| 1034 | // The depth output is always 2 registers after the last color output, and current_reg | 1054 | // The depth output is always 2 registers after the last color output, and current_reg |
| 1035 | // already contains one past the last color register. | 1055 | // already contains one past the last color register. |
| 1036 | 1056 | ||
| @@ -1510,8 +1530,6 @@ private: | |||
| 1510 | case OpCode::Id::LEA_IMM: | 1530 | case OpCode::Id::LEA_IMM: |
| 1511 | case OpCode::Id::LEA_RZ: | 1531 | case OpCode::Id::LEA_RZ: |
| 1512 | case OpCode::Id::LEA_HI: { | 1532 | case OpCode::Id::LEA_HI: { |
| 1513 | std::string op_a; | ||
| 1514 | std::string op_b; | ||
| 1515 | std::string op_c; | 1533 | std::string op_c; |
| 1516 | 1534 | ||
| 1517 | switch (opcode->GetId()) { | 1535 | switch (opcode->GetId()) { |
| @@ -1642,7 +1660,8 @@ private: | |||
| 1642 | } | 1660 | } |
| 1643 | 1661 | ||
| 1644 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | 1662 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, |
| 1645 | 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); | 1663 | 1, instr.alu.saturate_d, 0, instr.conversion.dest_size, |
| 1664 | instr.generates_cc.Value() != 0); | ||
| 1646 | break; | 1665 | break; |
| 1647 | } | 1666 | } |
| 1648 | case OpCode::Id::I2F_R: | 1667 | case OpCode::Id::I2F_R: |
| @@ -1781,8 +1800,8 @@ private: | |||
| 1781 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | 1800 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, |
| 1782 | Tegra::Shader::IpaSampleMode::Default}; | 1801 | Tegra::Shader::IpaSampleMode::Default}; |
| 1783 | 1802 | ||
| 1784 | u32 next_element = instr.attribute.fmt20.element; | 1803 | u64 next_element = instr.attribute.fmt20.element; |
| 1785 | u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); | 1804 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |
| 1786 | 1805 | ||
| 1787 | const auto LoadNextElement = [&](u32 reg_offset) { | 1806 | const auto LoadNextElement = [&](u32 reg_offset) { |
| 1788 | regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, | 1807 | regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, |
| @@ -1846,8 +1865,8 @@ private: | |||
| 1846 | ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, | 1865 | ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, |
| 1847 | "Unaligned attribute loads are not supported"); | 1866 | "Unaligned attribute loads are not supported"); |
| 1848 | 1867 | ||
| 1849 | u32 next_element = instr.attribute.fmt20.element; | 1868 | u64 next_element = instr.attribute.fmt20.element; |
| 1850 | u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); | 1869 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |
| 1851 | 1870 | ||
| 1852 | const auto StoreNextElement = [&](u32 reg_offset) { | 1871 | const auto StoreNextElement = [&](u32 reg_offset) { |
| 1853 | regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), | 1872 | regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), |
| @@ -1873,6 +1892,13 @@ private: | |||
| 1873 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | 1892 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; |
| 1874 | std::string coord; | 1893 | std::string coord; |
| 1875 | 1894 | ||
| 1895 | ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 1896 | "NODEP is not implemented"); | ||
| 1897 | ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 1898 | "AOFFI is not implemented"); | ||
| 1899 | ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||
| 1900 | "DC is not implemented"); | ||
| 1901 | |||
| 1876 | switch (texture_type) { | 1902 | switch (texture_type) { |
| 1877 | case Tegra::Shader::TextureType::Texture1D: { | 1903 | case Tegra::Shader::TextureType::Texture1D: { |
| 1878 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | 1904 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); |
| @@ -1937,8 +1963,8 @@ private: | |||
| 1937 | UNREACHABLE(); | 1963 | UNREACHABLE(); |
| 1938 | } | 1964 | } |
| 1939 | } | 1965 | } |
| 1940 | size_t dest_elem{}; | 1966 | std::size_t dest_elem{}; |
| 1941 | for (size_t elem = 0; elem < 4; ++elem) { | 1967 | for (std::size_t elem = 0; elem < 4; ++elem) { |
| 1942 | if (!instr.tex.IsComponentEnabled(elem)) { | 1968 | if (!instr.tex.IsComponentEnabled(elem)) { |
| 1943 | // Skip disabled components | 1969 | // Skip disabled components |
| 1944 | continue; | 1970 | continue; |
| @@ -1955,6 +1981,11 @@ private: | |||
| 1955 | Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; | 1981 | Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; |
| 1956 | bool is_array{instr.texs.IsArrayTexture()}; | 1982 | bool is_array{instr.texs.IsArrayTexture()}; |
| 1957 | 1983 | ||
| 1984 | ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 1985 | "NODEP is not implemented"); | ||
| 1986 | ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||
| 1987 | "DC is not implemented"); | ||
| 1988 | |||
| 1958 | switch (texture_type) { | 1989 | switch (texture_type) { |
| 1959 | case Tegra::Shader::TextureType::Texture2D: { | 1990 | case Tegra::Shader::TextureType::Texture2D: { |
| 1960 | if (is_array) { | 1991 | if (is_array) { |
| @@ -1990,6 +2021,13 @@ private: | |||
| 1990 | std::string coord; | 2021 | std::string coord; |
| 1991 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | 2022 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; |
| 1992 | const bool is_array{instr.tlds.IsArrayTexture()}; | 2023 | const bool is_array{instr.tlds.IsArrayTexture()}; |
| 2024 | |||
| 2025 | ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2026 | "NODEP is not implemented"); | ||
| 2027 | ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2028 | "AOFFI is not implemented"); | ||
| 2029 | ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), | ||
| 2030 | "MZ is not implemented"); | ||
| 1993 | 2031 | ||
| 1994 | switch (texture_type) { | 2032 | switch (texture_type) { |
| 1995 | case Tegra::Shader::TextureType::Texture1D: { | 2033 | case Tegra::Shader::TextureType::Texture1D: { |
| @@ -2024,6 +2062,17 @@ private: | |||
| 2024 | ASSERT(instr.tld4.array == 0); | 2062 | ASSERT(instr.tld4.array == 0); |
| 2025 | std::string coord; | 2063 | std::string coord; |
| 2026 | 2064 | ||
| 2065 | ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2066 | "NODEP is not implemented"); | ||
| 2067 | ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2068 | "AOFFI is not implemented"); | ||
| 2069 | ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||
| 2070 | "DC is not implemented"); | ||
| 2071 | ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 2072 | "NDV is not implemented"); | ||
| 2073 | ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), | ||
| 2074 | "PTP is not implemented"); | ||
| 2075 | |||
| 2027 | switch (instr.tld4.texture_type) { | 2076 | switch (instr.tld4.texture_type) { |
| 2028 | case Tegra::Shader::TextureType::Texture2D: { | 2077 | case Tegra::Shader::TextureType::Texture2D: { |
| 2029 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | 2078 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); |
| @@ -2047,8 +2096,8 @@ private: | |||
| 2047 | const std::string texture = "textureGather(" + sampler + ", coords, " + | 2096 | const std::string texture = "textureGather(" + sampler + ", coords, " + |
| 2048 | std::to_string(instr.tld4.component) + ')'; | 2097 | std::to_string(instr.tld4.component) + ')'; |
| 2049 | 2098 | ||
| 2050 | size_t dest_elem{}; | 2099 | std::size_t dest_elem{}; |
| 2051 | for (size_t elem = 0; elem < 4; ++elem) { | 2100 | for (std::size_t elem = 0; elem < 4; ++elem) { |
| 2052 | if (!instr.tex.IsComponentEnabled(elem)) { | 2101 | if (!instr.tex.IsComponentEnabled(elem)) { |
| 2053 | // Skip disabled components | 2102 | // Skip disabled components |
| 2054 | continue; | 2103 | continue; |
| @@ -2061,6 +2110,13 @@ private: | |||
| 2061 | break; | 2110 | break; |
| 2062 | } | 2111 | } |
| 2063 | case OpCode::Id::TLD4S: { | 2112 | case OpCode::Id::TLD4S: { |
| 2113 | ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2114 | "NODEP is not implemented"); | ||
| 2115 | ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2116 | "AOFFI is not implemented"); | ||
| 2117 | ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||
| 2118 | "DC is not implemented"); | ||
| 2119 | |||
| 2064 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 2120 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); |
| 2065 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); | 2121 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); |
| 2066 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | 2122 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. |
| @@ -2073,6 +2129,9 @@ private: | |||
| 2073 | break; | 2129 | break; |
| 2074 | } | 2130 | } |
| 2075 | case OpCode::Id::TXQ: { | 2131 | case OpCode::Id::TXQ: { |
| 2132 | ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2133 | "NODEP is not implemented"); | ||
| 2134 | |||
| 2076 | // TODO: the new commits on the texture refactor, change the way samplers work. | 2135 | // TODO: the new commits on the texture refactor, change the way samplers work. |
| 2077 | // Sadly, not all texture instructions specify the type of texture their sampler | 2136 | // Sadly, not all texture instructions specify the type of texture their sampler |
| 2078 | // uses. This must be fixed at a later instance. | 2137 | // uses. This must be fixed at a later instance. |
| @@ -2093,6 +2152,11 @@ private: | |||
| 2093 | break; | 2152 | break; |
| 2094 | } | 2153 | } |
| 2095 | case OpCode::Id::TMML: { | 2154 | case OpCode::Id::TMML: { |
| 2155 | ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2156 | "NODEP is not implemented"); | ||
| 2157 | ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 2158 | "NDV is not implemented"); | ||
| 2159 | |||
| 2096 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 2160 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); |
| 2097 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 2161 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); |
| 2098 | const bool is_array = instr.tmml.array != 0; | 2162 | const bool is_array = instr.tmml.array != 0; |
| @@ -2259,31 +2323,55 @@ private: | |||
| 2259 | break; | 2323 | break; |
| 2260 | } | 2324 | } |
| 2261 | case OpCode::Type::PredicateSetPredicate: { | 2325 | case OpCode::Type::PredicateSetPredicate: { |
| 2262 | const std::string op_a = | 2326 | switch (opcode->GetId()) { |
| 2263 | GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | 2327 | case OpCode::Id::PSETP: { |
| 2264 | const std::string op_b = | 2328 | const std::string op_a = |
| 2265 | GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | 2329 | GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); |
| 2330 | const std::string op_b = | ||
| 2331 | GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 2266 | 2332 | ||
| 2267 | // We can't use the constant predicate as destination. | 2333 | // We can't use the constant predicate as destination. |
| 2268 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 2334 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
| 2269 | 2335 | ||
| 2270 | const std::string second_pred = | 2336 | const std::string second_pred = |
| 2271 | GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | 2337 | GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); |
| 2272 | 2338 | ||
| 2273 | const std::string combiner = GetPredicateCombiner(instr.psetp.op); | 2339 | const std::string combiner = GetPredicateCombiner(instr.psetp.op); |
| 2274 | 2340 | ||
| 2275 | const std::string predicate = | 2341 | const std::string predicate = |
| 2276 | '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; | 2342 | '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; |
| 2277 | 2343 | ||
| 2278 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 2344 | // Set the primary predicate to the result of Predicate OP SecondPredicate |
| 2279 | SetPredicate(instr.psetp.pred3, | 2345 | SetPredicate(instr.psetp.pred3, |
| 2280 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | 2346 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); |
| 2281 | 2347 | ||
| 2282 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 2348 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
| 2283 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 2349 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, |
| 2284 | // if enabled | 2350 | // if enabled |
| 2285 | SetPredicate(instr.psetp.pred0, | 2351 | SetPredicate(instr.psetp.pred0, |
| 2286 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | 2352 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); |
| 2353 | } | ||
| 2354 | break; | ||
| 2355 | } | ||
| 2356 | case OpCode::Id::CSETP: { | ||
| 2357 | const std::string pred = | ||
| 2358 | GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 2359 | const std::string combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 2360 | const std::string controlCode = regs.GetControlCode(instr.csetp.cc); | ||
| 2361 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 2362 | SetPredicate(instr.csetp.pred3, | ||
| 2363 | '(' + controlCode + ") " + combiner + " (" + pred + ')'); | ||
| 2364 | } | ||
| 2365 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 2366 | SetPredicate(instr.csetp.pred0, | ||
| 2367 | "!(" + controlCode + ") " + combiner + " (" + pred + ')'); | ||
| 2368 | } | ||
| 2369 | break; | ||
| 2370 | } | ||
| 2371 | default: { | ||
| 2372 | LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName()); | ||
| 2373 | UNREACHABLE(); | ||
| 2374 | } | ||
| 2287 | } | 2375 | } |
| 2288 | break; | 2376 | break; |
| 2289 | } | 2377 | } |
| @@ -2673,6 +2761,7 @@ private: | |||
| 2673 | private: | 2761 | private: |
| 2674 | const std::set<Subroutine>& subroutines; | 2762 | const std::set<Subroutine>& subroutines; |
| 2675 | const ProgramCode& program_code; | 2763 | const ProgramCode& program_code; |
| 2764 | Tegra::Shader::Header header; | ||
| 2676 | const u32 main_offset; | 2765 | const u32 main_offset; |
| 2677 | Maxwell3D::Regs::ShaderStage stage; | 2766 | Maxwell3D::Regs::ShaderStage stage; |
| 2678 | const std::string& suffix; | 2767 | const std::string& suffix; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index a43e2997b..d53b93ad5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | namespace OpenGL::GLShader { | 14 | namespace OpenGL::GLShader { |
| 15 | 15 | ||
| 16 | constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; | 16 | constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; |
| 17 | using ProgramCode = std::vector<u64>; | 17 | using ProgramCode = std::vector<u64>; |
| 18 | 18 | ||
| 19 | class ConstBufferEntry { | 19 | class ConstBufferEntry { |
| @@ -51,7 +51,7 @@ public: | |||
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | std::string GetName() const { | 53 | std::string GetName() const { |
| 54 | return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index); | 54 | return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | u32 GetHash() const { | 57 | u32 GetHash() const { |
| @@ -74,15 +74,15 @@ class SamplerEntry { | |||
| 74 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 74 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 75 | 75 | ||
| 76 | public: | 76 | public: |
| 77 | SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index, | 77 | SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index, |
| 78 | Tegra::Shader::TextureType type, bool is_array) | 78 | Tegra::Shader::TextureType type, bool is_array) |
| 79 | : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} | 79 | : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} |
| 80 | 80 | ||
| 81 | size_t GetOffset() const { | 81 | std::size_t GetOffset() const { |
| 82 | return offset; | 82 | return offset; |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | size_t GetIndex() const { | 85 | std::size_t GetIndex() const { |
| 86 | return sampler_index; | 86 | return sampler_index; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| @@ -91,7 +91,7 @@ public: | |||
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | std::string GetName() const { | 93 | std::string GetName() const { |
| 94 | return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' + | 94 | return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' + |
| 95 | std::to_string(sampler_index); | 95 | std::to_string(sampler_index); |
| 96 | } | 96 | } |
| 97 | 97 | ||
| @@ -133,7 +133,7 @@ public: | |||
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | static std::string GetArrayName(Maxwell::ShaderStage stage) { | 135 | static std::string GetArrayName(Maxwell::ShaderStage stage) { |
| 136 | return TextureSamplerNames[static_cast<size_t>(stage)]; | 136 | return TextureSamplerNames[static_cast<std::size_t>(stage)]; |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | private: | 139 | private: |
| @@ -143,9 +143,9 @@ private: | |||
| 143 | 143 | ||
| 144 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | 144 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling |
| 145 | /// instruction. | 145 | /// instruction. |
| 146 | size_t offset; | 146 | std::size_t offset; |
| 147 | Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. | 147 | Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. |
| 148 | size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. | 148 | std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. |
| 149 | Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) | 149 | Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) |
| 150 | bool is_array; ///< Whether the texture is being sampled as an array texture or not. | 150 | bool is_array; ///< Whether the texture is being sampled as an array texture or not. |
| 151 | }; | 151 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 533e42caa..b86cd96e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | namespace OpenGL::GLShader { | 12 | namespace OpenGL::GLShader { |
| 13 | 13 | ||
| 14 | /// Number of OpenGL texture samplers that can be used in the fragment shader | 14 | /// Number of OpenGL texture samplers that can be used in the fragment shader |
| 15 | static constexpr size_t NumTextureSamplers = 32; | 15 | static constexpr std::size_t NumTextureSamplers = 32; |
| 16 | 16 | ||
| 17 | using Tegra::Engines::Maxwell3D; | 17 | using Tegra::Engines::Maxwell3D; |
| 18 | 18 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 6f70deb96..af99132ba 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -272,7 +272,7 @@ void OpenGLState::Apply() const { | |||
| 272 | } | 272 | } |
| 273 | 273 | ||
| 274 | // Clip distance | 274 | // Clip distance |
| 275 | for (size_t i = 0; i < clip_distance.size(); ++i) { | 275 | for (std::size_t i = 0; i < clip_distance.size(); ++i) { |
| 276 | if (clip_distance[i] != cur_state.clip_distance[i]) { | 276 | if (clip_distance[i] != cur_state.clip_distance[i]) { |
| 277 | if (clip_distance[i]) { | 277 | if (clip_distance[i]) { |
| 278 | glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); | 278 | glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index aadf68f16..664f3ca20 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a | |||
| 61 | mapped_size = size; | 61 | mapped_size = size; |
| 62 | 62 | ||
| 63 | if (alignment > 0) { | 63 | if (alignment > 0) { |
| 64 | buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); | 64 | buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | bool invalidate = false; | 67 | bool invalidate = false; |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 272294c62..20ba6d4f6 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_ | |||
| 46 | } | 46 | } |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | template <std::size_t N, std::size_t M> | ||
| 50 | struct alignas(64) SwizzleTable { | ||
| 51 | constexpr SwizzleTable() { | ||
| 52 | for (u32 y = 0; y < N; ++y) { | ||
| 53 | for (u32 x = 0; x < M; ++x) { | ||
| 54 | const u32 x2 = x * 16; | ||
| 55 | values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + | ||
| 56 | ((x2 % 32) / 16) * 32 + (y % 2) * 16); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | } | ||
| 60 | const std::array<u16, M>& operator[](std::size_t index) const { | ||
| 61 | return values[index]; | ||
| 62 | } | ||
| 63 | std::array<std::array<u16, M>, N> values{}; | ||
| 64 | }; | ||
| 65 | |||
| 66 | constexpr auto swizzle_table = SwizzleTable<8, 4>(); | ||
| 67 | |||
| 68 | void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data, | ||
| 69 | u8* unswizzled_data, bool unswizzle, u32 block_height) { | ||
| 70 | std::array<u8*, 2> data_ptrs; | ||
| 71 | const std::size_t stride{width * bytes_per_pixel}; | ||
| 72 | const std::size_t image_width_in_gobs{(stride + 63) / 64}; | ||
| 73 | const std::size_t copy_size{16}; | ||
| 74 | for (std::size_t y = 0; y < height; ++y) { | ||
| 75 | const std::size_t initial_gob = | ||
| 76 | (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs + | ||
| 77 | (y % (8 * block_height) / 8) * 512; | ||
| 78 | const std::size_t pixel_base{y * width * bytes_per_pixel}; | ||
| 79 | const auto& table = swizzle_table[y % 8]; | ||
| 80 | for (std::size_t xb = 0; xb < stride; xb += copy_size) { | ||
| 81 | const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height}; | ||
| 82 | const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; | ||
| 83 | const std::size_t pixel_index{xb + pixel_base}; | ||
| 84 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | ||
| 85 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | ||
| 86 | std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 49 | u32 BytesPerPixel(TextureFormat format) { | 91 | u32 BytesPerPixel(TextureFormat format) { |
| 50 | switch (format) { | 92 | switch (format) { |
| 51 | case TextureFormat::DXT1: | 93 | case TextureFormat::DXT1: |
| @@ -63,6 +105,7 @@ u32 BytesPerPixel(TextureFormat format) { | |||
| 63 | case TextureFormat::R32_G32_B32: | 105 | case TextureFormat::R32_G32_B32: |
| 64 | return 12; | 106 | return 12; |
| 65 | case TextureFormat::ASTC_2D_4X4: | 107 | case TextureFormat::ASTC_2D_4X4: |
| 108 | case TextureFormat::ASTC_2D_8X8: | ||
| 66 | case TextureFormat::A8R8G8B8: | 109 | case TextureFormat::A8R8G8B8: |
| 67 | case TextureFormat::A2B10G10R10: | 110 | case TextureFormat::A2B10G10R10: |
| 68 | case TextureFormat::BF10GF11RF11: | 111 | case TextureFormat::BF10GF11RF11: |
| @@ -91,8 +134,13 @@ u32 BytesPerPixel(TextureFormat format) { | |||
| 91 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, | 134 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, |
| 92 | u32 height, u32 block_height) { | 135 | u32 height, u32 block_height) { |
| 93 | std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); | 136 | std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); |
| 94 | CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, | 137 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { |
| 95 | Memory::GetPointer(address), unswizzled_data.data(), true, block_height); | 138 | FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel, |
| 139 | Memory::GetPointer(address), unswizzled_data.data(), true, block_height); | ||
| 140 | } else { | ||
| 141 | CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, | ||
| 142 | Memory::GetPointer(address), unswizzled_data.data(), true, block_height); | ||
| 143 | } | ||
| 96 | return unswizzled_data; | 144 | return unswizzled_data; |
| 97 | } | 145 | } |
| 98 | 146 | ||
| @@ -111,6 +159,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 111 | case TextureFormat::BC6H_UF16: | 159 | case TextureFormat::BC6H_UF16: |
| 112 | case TextureFormat::BC6H_SF16: | 160 | case TextureFormat::BC6H_SF16: |
| 113 | case TextureFormat::ASTC_2D_4X4: | 161 | case TextureFormat::ASTC_2D_4X4: |
| 162 | case TextureFormat::ASTC_2D_8X8: | ||
| 114 | case TextureFormat::A8R8G8B8: | 163 | case TextureFormat::A8R8G8B8: |
| 115 | case TextureFormat::A2B10G10R10: | 164 | case TextureFormat::A2B10G10R10: |
| 116 | case TextureFormat::A1B5G5R5: | 165 | case TextureFormat::A1B5G5R5: |