diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/blit.frag | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/blit.vert | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/quad_array.comp | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/uint8.comp | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 107 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.h | 4 | ||||
| -rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 38 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.cpp | 38 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.h | 14 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 89 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 4 |
15 files changed, 372 insertions, 75 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d6a2cc8b8..dfb12cd2d 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -1973,7 +1973,7 @@ private: | |||
| 1973 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), | 1973 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1974 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1974 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1975 | INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), | 1975 | INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), |
| 1976 | INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"), | 1976 | INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1977 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1977 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1978 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1978 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1979 | INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), | 1979 | INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d1ae4be6d..0389c2143 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -399,6 +399,7 @@ public: | |||
| 399 | DeclareConstantBuffers(); | 399 | DeclareConstantBuffers(); |
| 400 | DeclareGlobalMemory(); | 400 | DeclareGlobalMemory(); |
| 401 | DeclareSamplers(); | 401 | DeclareSamplers(); |
| 402 | DeclareImages(); | ||
| 402 | DeclarePhysicalAttributeReader(); | 403 | DeclarePhysicalAttributeReader(); |
| 403 | 404 | ||
| 404 | code.AddLine("void execute_{}() {{", suffix); | 405 | code.AddLine("void execute_{}() {{", suffix); |
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/renderer_vulkan/shaders/blit.frag new file mode 100644 index 000000000..a06ecd24a --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/blit.frag | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (location = 0) in vec2 frag_tex_coord; | ||
| 17 | |||
| 18 | layout (location = 0) out vec4 color; | ||
| 19 | |||
| 20 | layout (binding = 1) uniform sampler2D color_texture; | ||
| 21 | |||
| 22 | void main() { | ||
| 23 | color = texture(color_texture, frag_tex_coord); | ||
| 24 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/renderer_vulkan/shaders/blit.vert new file mode 100644 index 000000000..c64d9235a --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/blit.vert | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (location = 0) in vec2 vert_position; | ||
| 17 | layout (location = 1) in vec2 vert_tex_coord; | ||
| 18 | |||
| 19 | layout (location = 0) out vec2 frag_tex_coord; | ||
| 20 | |||
| 21 | layout (set = 0, binding = 0) uniform MatrixBlock { | ||
| 22 | mat4 modelview_matrix; | ||
| 23 | }; | ||
| 24 | |||
| 25 | void main() { | ||
| 26 | gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); | ||
| 27 | frag_tex_coord = vert_tex_coord; | ||
| 28 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/renderer_vulkan/shaders/quad_array.comp new file mode 100644 index 000000000..5a5703308 --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/quad_array.comp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (local_size_x = 1024) in; | ||
| 17 | |||
| 18 | layout (std430, set = 0, binding = 0) buffer OutputBuffer { | ||
| 19 | uint output_indexes[]; | ||
| 20 | }; | ||
| 21 | |||
| 22 | layout (push_constant) uniform PushConstants { | ||
| 23 | uint first; | ||
| 24 | }; | ||
| 25 | |||
| 26 | void main() { | ||
| 27 | uint primitive = gl_GlobalInvocationID.x; | ||
| 28 | if (primitive * 6 >= output_indexes.length()) { | ||
| 29 | return; | ||
| 30 | } | ||
| 31 | |||
| 32 | const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); | ||
| 33 | for (uint vertex = 0; vertex < 6; ++vertex) { | ||
| 34 | uint index = first + primitive * 4 + quad_map[vertex]; | ||
| 35 | output_indexes[primitive * 6 + vertex] = index; | ||
| 36 | } | ||
| 37 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/renderer_vulkan/shaders/uint8.comp new file mode 100644 index 000000000..a320f3ae0 --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/uint8.comp | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | #extension GL_EXT_shader_16bit_storage : require | ||
| 16 | #extension GL_EXT_shader_8bit_storage : require | ||
| 17 | |||
| 18 | layout (local_size_x = 1024) in; | ||
| 19 | |||
| 20 | layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { | ||
| 21 | uint8_t input_indexes[]; | ||
| 22 | }; | ||
| 23 | |||
| 24 | layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | ||
| 25 | uint16_t output_indexes[]; | ||
| 26 | }; | ||
| 27 | |||
| 28 | void main() { | ||
| 29 | uint id = gl_GlobalInvocationID.x; | ||
| 30 | if (id < input_indexes.length()) { | ||
| 31 | output_indexes[id] = uint16_t(input_indexes[id]); | ||
| 32 | } | ||
| 33 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 6227bc70b..a8baf91de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -543,7 +543,7 @@ private: | |||
| 543 | } | 543 | } |
| 544 | 544 | ||
| 545 | for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { | 545 | for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { |
| 546 | if (!IsRenderTargetUsed(rt)) { | 546 | if (!specialization.enabled_rendertargets[rt]) { |
| 547 | continue; | 547 | continue; |
| 548 | } | 548 | } |
| 549 | 549 | ||
| @@ -1555,40 +1555,48 @@ private: | |||
| 1555 | 1555 | ||
| 1556 | Expression Texture(Operation operation) { | 1556 | Expression Texture(Operation operation) { |
| 1557 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | 1557 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); |
| 1558 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1559 | 1558 | ||
| 1560 | const bool can_implicit = stage == ShaderType::Fragment; | 1559 | const bool can_implicit = stage == ShaderType::Fragment; |
| 1561 | const Id sampler = GetTextureSampler(operation); | 1560 | const Id sampler = GetTextureSampler(operation); |
| 1562 | const Id coords = GetCoordinates(operation, Type::Float); | 1561 | const Id coords = GetCoordinates(operation, Type::Float); |
| 1563 | 1562 | ||
| 1563 | std::vector<Id> operands; | ||
| 1564 | spv::ImageOperandsMask mask{}; | ||
| 1565 | if (meta.bias) { | ||
| 1566 | mask = mask | spv::ImageOperandsMask::Bias; | ||
| 1567 | operands.push_back(AsFloat(Visit(meta.bias))); | ||
| 1568 | } | ||
| 1569 | |||
| 1570 | if (!can_implicit) { | ||
| 1571 | mask = mask | spv::ImageOperandsMask::Lod; | ||
| 1572 | operands.push_back(v_float_zero); | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | if (!meta.aoffi.empty()) { | ||
| 1576 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1577 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1578 | } | ||
| 1579 | |||
| 1564 | if (meta.depth_compare) { | 1580 | if (meta.depth_compare) { |
| 1565 | // Depth sampling | 1581 | // Depth sampling |
| 1566 | UNIMPLEMENTED_IF(meta.bias); | 1582 | UNIMPLEMENTED_IF(meta.bias); |
| 1567 | const Id dref = AsFloat(Visit(meta.depth_compare)); | 1583 | const Id dref = AsFloat(Visit(meta.depth_compare)); |
| 1568 | if (can_implicit) { | 1584 | if (can_implicit) { |
| 1569 | return {OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, {}), | 1585 | return { |
| 1570 | Type::Float}; | 1586 | OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), |
| 1587 | Type::Float}; | ||
| 1571 | } else { | 1588 | } else { |
| 1572 | return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, | 1589 | return { |
| 1573 | spv::ImageOperandsMask::Lod, v_float_zero), | 1590 | OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), |
| 1574 | Type::Float}; | 1591 | Type::Float}; |
| 1575 | } | 1592 | } |
| 1576 | } | 1593 | } |
| 1577 | 1594 | ||
| 1578 | std::vector<Id> operands; | ||
| 1579 | spv::ImageOperandsMask mask{}; | ||
| 1580 | if (meta.bias) { | ||
| 1581 | mask = mask | spv::ImageOperandsMask::Bias; | ||
| 1582 | operands.push_back(AsFloat(Visit(meta.bias))); | ||
| 1583 | } | ||
| 1584 | |||
| 1585 | Id texture; | 1595 | Id texture; |
| 1586 | if (can_implicit) { | 1596 | if (can_implicit) { |
| 1587 | texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); | 1597 | texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); |
| 1588 | } else { | 1598 | } else { |
| 1589 | texture = OpImageSampleExplicitLod(t_float4, sampler, coords, | 1599 | texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); |
| 1590 | mask | spv::ImageOperandsMask::Lod, v_float_zero, | ||
| 1591 | operands); | ||
| 1592 | } | 1600 | } |
| 1593 | return GetTextureElement(operation, texture, Type::Float); | 1601 | return GetTextureElement(operation, texture, Type::Float); |
| 1594 | } | 1602 | } |
| @@ -1601,7 +1609,8 @@ private: | |||
| 1601 | const Id lod = AsFloat(Visit(meta.lod)); | 1609 | const Id lod = AsFloat(Visit(meta.lod)); |
| 1602 | 1610 | ||
| 1603 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; | 1611 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; |
| 1604 | std::vector<Id> operands; | 1612 | std::vector<Id> operands{lod}; |
| 1613 | |||
| 1605 | if (!meta.aoffi.empty()) { | 1614 | if (!meta.aoffi.empty()) { |
| 1606 | mask = mask | spv::ImageOperandsMask::Offset; | 1615 | mask = mask | spv::ImageOperandsMask::Offset; |
| 1607 | operands.push_back(GetOffsetCoordinates(operation)); | 1616 | operands.push_back(GetOffsetCoordinates(operation)); |
| @@ -1609,11 +1618,10 @@ private: | |||
| 1609 | 1618 | ||
| 1610 | if (meta.sampler.IsShadow()) { | 1619 | if (meta.sampler.IsShadow()) { |
| 1611 | const Id dref = AsFloat(Visit(meta.depth_compare)); | 1620 | const Id dref = AsFloat(Visit(meta.depth_compare)); |
| 1612 | return { | 1621 | return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), |
| 1613 | OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, lod, operands), | 1622 | Type::Float}; |
| 1614 | Type::Float}; | ||
| 1615 | } | 1623 | } |
| 1616 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, lod, operands); | 1624 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); |
| 1617 | return GetTextureElement(operation, texture, Type::Float); | 1625 | return GetTextureElement(operation, texture, Type::Float); |
| 1618 | } | 1626 | } |
| 1619 | 1627 | ||
| @@ -1722,7 +1730,7 @@ private: | |||
| 1722 | const std::vector grad = {dx, dy}; | 1730 | const std::vector grad = {dx, dy}; |
| 1723 | 1731 | ||
| 1724 | static constexpr auto mask = spv::ImageOperandsMask::Grad; | 1732 | static constexpr auto mask = spv::ImageOperandsMask::Grad; |
| 1725 | const Id texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, grad); | 1733 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); |
| 1726 | return GetTextureElement(operation, texture, Type::Float); | 1734 | return GetTextureElement(operation, texture, Type::Float); |
| 1727 | } | 1735 | } |
| 1728 | 1736 | ||
| @@ -1833,7 +1841,7 @@ private: | |||
| 1833 | } | 1841 | } |
| 1834 | 1842 | ||
| 1835 | void PreExit() { | 1843 | void PreExit() { |
| 1836 | if (stage == ShaderType::Vertex) { | 1844 | if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { |
| 1837 | const u32 position_index = out_indices.position.value(); | 1845 | const u32 position_index = out_indices.position.value(); |
| 1838 | const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); | 1846 | const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); |
| 1839 | const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); | 1847 | const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); |
| @@ -1860,12 +1868,18 @@ private: | |||
| 1860 | // rendertargets/components are skipped in the register assignment. | 1868 | // rendertargets/components are skipped in the register assignment. |
| 1861 | u32 current_reg = 0; | 1869 | u32 current_reg = 0; |
| 1862 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | 1870 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { |
| 1871 | if (!specialization.enabled_rendertargets[rt]) { | ||
| 1872 | // Skip rendertargets that are not enabled | ||
| 1873 | continue; | ||
| 1874 | } | ||
| 1863 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 1875 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1864 | for (u32 component = 0; component < 4; ++component) { | 1876 | for (u32 component = 0; component < 4; ++component) { |
| 1877 | const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component); | ||
| 1865 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | 1878 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { |
| 1866 | OpStore(AccessElement(t_out_float, frag_colors.at(rt), component), | 1879 | OpStore(pointer, SafeGetRegister(current_reg)); |
| 1867 | SafeGetRegister(current_reg)); | ||
| 1868 | ++current_reg; | 1880 | ++current_reg; |
| 1881 | } else { | ||
| 1882 | OpStore(pointer, component == 3 ? v_float_one : v_float_zero); | ||
| 1869 | } | 1883 | } |
| 1870 | } | 1884 | } |
| 1871 | } | 1885 | } |
| @@ -1995,15 +2009,6 @@ private: | |||
| 1995 | return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); | 2009 | return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); |
| 1996 | } | 2010 | } |
| 1997 | 2011 | ||
| 1998 | bool IsRenderTargetUsed(u32 rt) const { | ||
| 1999 | for (u32 component = 0; component < 4; ++component) { | ||
| 2000 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 2001 | return true; | ||
| 2002 | } | ||
| 2003 | } | ||
| 2004 | return false; | ||
| 2005 | } | ||
| 2006 | |||
| 2007 | template <typename... Args> | 2012 | template <typename... Args> |
| 2008 | Id AccessElement(Id pointer_type, Id composite, Args... elements_) { | 2013 | Id AccessElement(Id pointer_type, Id composite, Args... elements_) { |
| 2009 | std::vector<Id> members; | 2014 | std::vector<Id> members; |
| @@ -2552,29 +2557,7 @@ public: | |||
| 2552 | } | 2557 | } |
| 2553 | 2558 | ||
| 2554 | Id operator()(const ExprCondCode& expr) { | 2559 | Id operator()(const ExprCondCode& expr) { |
| 2555 | const Node cc = decomp.ir.GetConditionCode(expr.cc); | 2560 | return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); |
| 2556 | Id target; | ||
| 2557 | |||
| 2558 | if (const auto pred = std::get_if<PredicateNode>(&*cc)) { | ||
| 2559 | const auto index = pred->GetIndex(); | ||
| 2560 | switch (index) { | ||
| 2561 | case Tegra::Shader::Pred::NeverExecute: | ||
| 2562 | target = decomp.v_false; | ||
| 2563 | break; | ||
| 2564 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 2565 | target = decomp.v_true; | ||
| 2566 | break; | ||
| 2567 | default: | ||
| 2568 | target = decomp.predicates.at(index); | ||
| 2569 | break; | ||
| 2570 | } | ||
| 2571 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { | ||
| 2572 | target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); | ||
| 2573 | } else { | ||
| 2574 | UNREACHABLE(); | ||
| 2575 | } | ||
| 2576 | |||
| 2577 | return decomp.OpLoad(decomp.t_bool, target); | ||
| 2578 | } | 2561 | } |
| 2579 | 2562 | ||
| 2580 | Id operator()(const ExprVar& expr) { | 2563 | Id operator()(const ExprVar& expr) { |
| @@ -2589,7 +2572,7 @@ public: | |||
| 2589 | const Id target = decomp.Constant(decomp.t_uint, expr.value); | 2572 | const Id target = decomp.Constant(decomp.t_uint, expr.value); |
| 2590 | Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); | 2573 | Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); |
| 2591 | gpr = decomp.OpBitcast(decomp.t_uint, gpr); | 2574 | gpr = decomp.OpBitcast(decomp.t_uint, gpr); |
| 2592 | return decomp.OpLogicalEqual(decomp.t_uint, gpr, target); | 2575 | return decomp.OpIEqual(decomp.t_bool, gpr, target); |
| 2593 | } | 2576 | } |
| 2594 | 2577 | ||
| 2595 | Id Visit(const Expr& node) { | 2578 | Id Visit(const Expr& node) { |
| @@ -2659,11 +2642,11 @@ public: | |||
| 2659 | const Id loop_label = decomp.OpLabel(); | 2642 | const Id loop_label = decomp.OpLabel(); |
| 2660 | const Id endloop_label = decomp.OpLabel(); | 2643 | const Id endloop_label = decomp.OpLabel(); |
| 2661 | const Id loop_start_block = decomp.OpLabel(); | 2644 | const Id loop_start_block = decomp.OpLabel(); |
| 2662 | const Id loop_end_block = decomp.OpLabel(); | 2645 | const Id loop_continue_block = decomp.OpLabel(); |
| 2663 | current_loop_exit = endloop_label; | 2646 | current_loop_exit = endloop_label; |
| 2664 | decomp.OpBranch(loop_label); | 2647 | decomp.OpBranch(loop_label); |
| 2665 | decomp.AddLabel(loop_label); | 2648 | decomp.AddLabel(loop_label); |
| 2666 | decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone); | 2649 | decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); |
| 2667 | decomp.OpBranch(loop_start_block); | 2650 | decomp.OpBranch(loop_start_block); |
| 2668 | decomp.AddLabel(loop_start_block); | 2651 | decomp.AddLabel(loop_start_block); |
| 2669 | ASTNode current = ast.nodes.GetFirst(); | 2652 | ASTNode current = ast.nodes.GetFirst(); |
| @@ -2671,6 +2654,8 @@ public: | |||
| 2671 | Visit(current); | 2654 | Visit(current); |
| 2672 | current = current->GetNext(); | 2655 | current = current->GetNext(); |
| 2673 | } | 2656 | } |
| 2657 | decomp.OpBranch(loop_continue_block); | ||
| 2658 | decomp.AddLabel(loop_continue_block); | ||
| 2674 | ExprDecompiler expr_parser{decomp}; | 2659 | ExprDecompiler expr_parser{decomp}; |
| 2675 | const Id condition = expr_parser.Visit(ast.condition); | 2660 | const Id condition = expr_parser.Visit(ast.condition); |
| 2676 | decomp.OpBranchConditional(condition, loop_label, endloop_label); | 2661 | decomp.OpBranchConditional(condition, loop_label, endloop_label); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 2b01321b6..10794be1c 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -94,6 +94,7 @@ struct Specialization final { | |||
| 94 | Maxwell::PrimitiveTopology primitive_topology{}; | 94 | Maxwell::PrimitiveTopology primitive_topology{}; |
| 95 | std::optional<float> point_size{}; | 95 | std::optional<float> point_size{}; |
| 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 97 | bool ndc_minus_one_to_one{}; | ||
| 97 | 98 | ||
| 98 | // Tessellation specific | 99 | // Tessellation specific |
| 99 | struct { | 100 | struct { |
| @@ -101,6 +102,9 @@ struct Specialization final { | |||
| 101 | Maxwell::TessellationSpacing spacing{}; | 102 | Maxwell::TessellationSpacing spacing{}; |
| 102 | bool clockwise{}; | 103 | bool clockwise{}; |
| 103 | } tessellation; | 104 | } tessellation; |
| 105 | |||
| 106 | // Fragment specific | ||
| 107 | std::bitset<8> enabled_rendertargets; | ||
| 104 | }; | 108 | }; |
| 105 | // Old gcc versions don't consider this trivially copyable. | 109 | // Old gcc versions don't consider this trivially copyable. |
| 106 | // static_assert(std::is_trivially_copyable_v<Specialization>); | 110 | // static_assert(std::is_trivially_copyable_v<Specialization>); |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 32facd6ba..0eeb75559 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 63 | case OpCode::Id::I2F_R: | 63 | case OpCode::Id::I2F_R: |
| 64 | case OpCode::Id::I2F_C: | 64 | case OpCode::Id::I2F_C: |
| 65 | case OpCode::Id::I2F_IMM: { | 65 | case OpCode::Id::I2F_IMM: { |
| 66 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | ||
| 67 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | 66 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 67 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 69 | "Condition codes generation in I2F is not implemented"); | 68 | "Condition codes generation in I2F is not implemented"); |
| 70 | 69 | ||
| 71 | Node value = [&]() { | 70 | Node value = [&] { |
| 72 | switch (opcode->get().GetId()) { | 71 | switch (opcode->get().GetId()) { |
| 73 | case OpCode::Id::I2F_R: | 72 | case OpCode::Id::I2F_R: |
| 74 | return GetRegister(instr.gpr20); | 73 | return GetRegister(instr.gpr20); |
| @@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 81 | return Immediate(0); | 80 | return Immediate(0); |
| 82 | } | 81 | } |
| 83 | }(); | 82 | }(); |
| 83 | |||
| 84 | const bool input_signed = instr.conversion.is_input_signed; | 84 | const bool input_signed = instr.conversion.is_input_signed; |
| 85 | |||
| 86 | if (instr.conversion.src_size == Register::Size::Byte) { | ||
| 87 | const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; | ||
| 88 | if (offset > 0) { | ||
| 89 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | ||
| 90 | std::move(value), Immediate(offset)); | ||
| 91 | } | ||
| 92 | } else { | ||
| 93 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | ||
| 94 | } | ||
| 95 | |||
| 85 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | 96 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
| 86 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | 97 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); |
| 87 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | 98 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 78e92f52e..c934d0719 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -22,7 +22,23 @@ using Tegra::Shader::Register; | |||
| 22 | 22 | ||
| 23 | namespace { | 23 | namespace { |
| 24 | 24 | ||
| 25 | u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { | 25 | u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { |
| 26 | switch (uniform_type) { | ||
| 27 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 28 | case Tegra::Shader::UniformType::Single: | ||
| 29 | return 1; | ||
| 30 | case Tegra::Shader::UniformType::Double: | ||
| 31 | return 2; | ||
| 32 | case Tegra::Shader::UniformType::Quad: | ||
| 33 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 34 | return 4; | ||
| 35 | default: | ||
| 36 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | ||
| 37 | return 1; | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { | ||
| 26 | switch (uniform_type) { | 42 | switch (uniform_type) { |
| 27 | case Tegra::Shader::UniformType::Single: | 43 | case Tegra::Shader::UniformType::Single: |
| 28 | return 1; | 44 | return 1; |
| @@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 170 | const auto [real_address_base, base_address, descriptor] = | 186 | const auto [real_address_base, base_address, descriptor] = |
| 171 | TrackGlobalMemory(bb, instr, false); | 187 | TrackGlobalMemory(bb, instr, false); |
| 172 | 188 | ||
| 173 | const u32 count = GetUniformTypeElementsCount(type); | 189 | const u32 count = GetLdgMemorySize(type); |
| 174 | if (!real_address_base || !base_address) { | 190 | if (!real_address_base || !base_address) { |
| 175 | // Tracking failed, load zeroes. | 191 | // Tracking failed, load zeroes. |
| 176 | for (u32 i = 0; i < count; ++i) { | 192 | for (u32 i = 0; i < count; ++i) { |
| @@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 181 | 197 | ||
| 182 | for (u32 i = 0; i < count; ++i) { | 198 | for (u32 i = 0; i < count; ++i) { |
| 183 | const Node it_offset = Immediate(i * 4); | 199 | const Node it_offset = Immediate(i * 4); |
| 184 | const Node real_address = | 200 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 185 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 201 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 186 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 202 | |
| 203 | if (type == Tegra::Shader::UniformType::UnsignedByte) { | ||
| 204 | // To handle unaligned loads get the byte used to dereferenced global memory | ||
| 205 | // and extract that byte from the loaded uint32. | ||
| 206 | Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); | ||
| 207 | byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); | ||
| 208 | |||
| 209 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), | ||
| 210 | Immediate(8)); | ||
| 211 | } | ||
| 187 | 212 | ||
| 188 | SetTemporary(bb, i, gmem); | 213 | SetTemporary(bb, i, gmem); |
| 189 | } | 214 | } |
| 215 | |||
| 190 | for (u32 i = 0; i < count; ++i) { | 216 | for (u32 i = 0; i < count; ++i) { |
| 191 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | 217 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 192 | } | 218 | } |
| @@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 276 | break; | 302 | break; |
| 277 | } | 303 | } |
| 278 | 304 | ||
| 279 | const u32 count = GetUniformTypeElementsCount(type); | 305 | const u32 count = GetStgMemorySize(type); |
| 280 | for (u32 i = 0; i < count; ++i) { | 306 | for (u32 i = 0; i < count; ++i) { |
| 281 | const Node it_offset = Immediate(i * 4); | 307 | const Node it_offset = Immediate(i * 4); |
| 282 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | 308 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 994c05611..dff01a541 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -743,13 +743,18 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 743 | // When lod is used always is in gpr20 | 743 | // When lod is used always is in gpr20 |
| 744 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | 744 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 745 | 745 | ||
| 746 | // Fill empty entries from the guest sampler. | 746 | // Fill empty entries from the guest sampler |
| 747 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); | 747 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); |
| 748 | if (type_coord_count != entry_coord_count) { | 748 | if (type_coord_count != entry_coord_count) { |
| 749 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); | 749 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); |
| 750 | } | 750 | |
| 751 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { | 751 | // When the size is higher we insert zeroes |
| 752 | coords.push_back(GetRegister(Register::ZeroIndex)); | 752 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { |
| 753 | coords.push_back(GetRegister(Register::ZeroIndex)); | ||
| 754 | } | ||
| 755 | |||
| 756 | // Then we ensure the size matches the number of entries (dropping unused values) | ||
| 757 | coords.resize(entry_coord_count); | ||
| 753 | } | 758 | } |
| 754 | 759 | ||
| 755 | Node4 values; | 760 | Node4 values; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index a4f1edd9a..38b3a4ba8 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -392,4 +392,42 @@ std::string SurfaceParams::TargetName() const { | |||
| 392 | } | 392 | } |
| 393 | } | 393 | } |
| 394 | 394 | ||
| 395 | u32 SurfaceParams::GetBlockSize() const { | ||
| 396 | const u32 x = 64U << block_width; | ||
| 397 | const u32 y = 8U << block_height; | ||
| 398 | const u32 z = 1U << block_depth; | ||
| 399 | return x * y * z; | ||
| 400 | } | ||
| 401 | |||
| 402 | std::pair<u32, u32> SurfaceParams::GetBlockXY() const { | ||
| 403 | const u32 x_pixels = 64U / GetBytesPerPixel(); | ||
| 404 | const u32 x = x_pixels << block_width; | ||
| 405 | const u32 y = 8U << block_height; | ||
| 406 | return {x, y}; | ||
| 407 | } | ||
| 408 | |||
| 409 | std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { | ||
| 410 | const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||
| 411 | const u32 block_size = GetBlockSize(); | ||
| 412 | const u32 block_index = offset / block_size; | ||
| 413 | const u32 gob_offset = offset % block_size; | ||
| 414 | const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize()); | ||
| 415 | const u32 x_gob_pixels = 64U / GetBytesPerPixel(); | ||
| 416 | const u32 x_block_pixels = x_gob_pixels << block_width; | ||
| 417 | const u32 y_block_pixels = 8U << block_height; | ||
| 418 | const u32 z_block_pixels = 1U << block_depth; | ||
| 419 | const u32 x_blocks = div_ceil(width, x_block_pixels); | ||
| 420 | const u32 y_blocks = div_ceil(height, y_block_pixels); | ||
| 421 | const u32 z_blocks = div_ceil(depth, z_block_pixels); | ||
| 422 | const u32 base_x = block_index % x_blocks; | ||
| 423 | const u32 base_y = (block_index / x_blocks) % y_blocks; | ||
| 424 | const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; | ||
| 425 | u32 x = base_x * x_block_pixels; | ||
| 426 | u32 y = base_y * y_block_pixels; | ||
| 427 | u32 z = base_z * z_block_pixels; | ||
| 428 | z += gob_index >> block_height; | ||
| 429 | y += (gob_index * 8U) % y_block_pixels; | ||
| 430 | return {x, y, z}; | ||
| 431 | } | ||
| 432 | |||
| 395 | } // namespace VideoCommon | 433 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 129817ad3..992b5c022 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <utility> | ||
| 8 | |||
| 7 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 8 | #include "common/bit_util.h" | 10 | #include "common/bit_util.h" |
| 9 | #include "common/cityhash.h" | 11 | #include "common/cityhash.h" |
| @@ -136,6 +138,15 @@ public: | |||
| 136 | 138 | ||
| 137 | std::size_t GetConvertedMipmapSize(u32 level) const; | 139 | std::size_t GetConvertedMipmapSize(u32 level) const; |
| 138 | 140 | ||
| 141 | /// Get this texture Tegra Block size in guest memory layout | ||
| 142 | u32 GetBlockSize() const; | ||
| 143 | |||
| 144 | /// Get X, Y coordinates max sizes of a single block. | ||
| 145 | std::pair<u32, u32> GetBlockXY() const; | ||
| 146 | |||
| 147 | /// Get the offset in x, y, z coordinates from a memory offset | ||
| 148 | std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const; | ||
| 149 | |||
| 139 | /// Returns the size of a layer in bytes in guest memory. | 150 | /// Returns the size of a layer in bytes in guest memory. |
| 140 | std::size_t GetGuestLayerSize() const { | 151 | std::size_t GetGuestLayerSize() const { |
| 141 | return GetLayerSize(false, false); | 152 | return GetLayerSize(false, false); |
| @@ -269,7 +280,8 @@ private: | |||
| 269 | 280 | ||
| 270 | /// Returns the size of all mipmap levels and aligns as needed. | 281 | /// Returns the size of all mipmap levels and aligns as needed. |
| 271 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { | 282 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { |
| 272 | return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); | 283 | return GetLayerSize(as_host_size, uncompressed) * |
| 284 | (layer_only ? 1U : (is_layered ? depth : 1U)); | ||
| 273 | } | 285 | } |
| 274 | 286 | ||
| 275 | /// Returns the size of a layer | 287 | /// Returns the size of a layer |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 02d2e9136..f4c015635 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -616,6 +616,86 @@ private: | |||
| 616 | } | 616 | } |
| 617 | 617 | ||
| 618 | /** | 618 | /** |
| 619 | * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D | ||
| 620 | * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of | ||
| 621 | * the HLE methods. | ||
| 622 | * | ||
| 623 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 624 | * @param params The parameters on the new surface. | ||
| 625 | * @param gpu_addr The starting address of the new surface. | ||
| 626 | * @param cache_addr The starting address of the new surface on physical memory. | ||
| 627 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 628 | * left blank. | ||
| 629 | */ | ||
| 630 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | ||
| 631 | const SurfaceParams& params, | ||
| 632 | const GPUVAddr gpu_addr, | ||
| 633 | const CacheAddr cache_addr, | ||
| 634 | bool preserve_contents) { | ||
| 635 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 636 | bool failed = false; | ||
| 637 | if (params.num_levels > 1) { | ||
| 638 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 639 | return std::nullopt; | ||
| 640 | } | ||
| 641 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 642 | bool modified = false; | ||
| 643 | for (auto& surface : overlaps) { | ||
| 644 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 645 | if (src_params.target != SurfaceTarget::Texture2D) { | ||
| 646 | failed = true; | ||
| 647 | break; | ||
| 648 | } | ||
| 649 | if (src_params.height != params.height) { | ||
| 650 | failed = true; | ||
| 651 | break; | ||
| 652 | } | ||
| 653 | if (src_params.block_depth != params.block_depth || | ||
| 654 | src_params.block_height != params.block_height) { | ||
| 655 | failed = true; | ||
| 656 | break; | ||
| 657 | } | ||
| 658 | const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); | ||
| 659 | const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); | ||
| 660 | modified |= surface->IsModified(); | ||
| 661 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | ||
| 662 | 1); | ||
| 663 | ImageCopy(surface, new_surface, copy_params); | ||
| 664 | } | ||
| 665 | if (failed) { | ||
| 666 | return std::nullopt; | ||
| 667 | } | ||
| 668 | for (const auto& surface : overlaps) { | ||
| 669 | Unregister(surface); | ||
| 670 | } | ||
| 671 | new_surface->MarkAsModified(modified, Tick()); | ||
| 672 | Register(new_surface); | ||
| 673 | auto view = new_surface->GetMainView(); | ||
| 674 | return {{std::move(new_surface), view}}; | ||
| 675 | } else { | ||
| 676 | for (const auto& surface : overlaps) { | ||
| 677 | if (!surface->MatchTarget(params.target)) { | ||
| 678 | if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { | ||
| 679 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 680 | return std::nullopt; | ||
| 681 | } | ||
| 682 | Unregister(surface); | ||
| 683 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 684 | } | ||
| 685 | return std::nullopt; | ||
| 686 | } | ||
| 687 | if (surface->GetCacheAddr() != cache_addr) { | ||
| 688 | continue; | ||
| 689 | } | ||
| 690 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | ||
| 691 | return {{surface, surface->GetMainView()}}; | ||
| 692 | } | ||
| 693 | } | ||
| 694 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 695 | } | ||
| 696 | } | ||
| 697 | |||
| 698 | /** | ||
| 619 | * Gets the starting address and parameters of a candidate surface and tries | 699 | * Gets the starting address and parameters of a candidate surface and tries |
| 620 | * to find a matching surface within the cache. This is done in 3 big steps: | 700 | * to find a matching surface within the cache. This is done in 3 big steps: |
| 621 | * | 701 | * |
| @@ -687,6 +767,15 @@ private: | |||
| 687 | } | 767 | } |
| 688 | } | 768 | } |
| 689 | 769 | ||
| 770 | // Check if it's a 3D texture | ||
| 771 | if (params.block_depth > 0) { | ||
| 772 | auto surface = | ||
| 773 | Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); | ||
| 774 | if (surface) { | ||
| 775 | return *surface; | ||
| 776 | } | ||
| 777 | } | ||
| 778 | |||
| 690 | // Split cases between 1 overlap or many. | 779 | // Split cases between 1 overlap or many. |
| 691 | if (overlaps.size() == 1) { | 780 | if (overlaps.size() == 1) { |
| 692 | TSurface current_surface = overlaps[0]; | 781 | TSurface current_surface = overlaps[0]; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index f1e3952bc..e5eac3f3b 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -12,6 +12,10 @@ namespace Tegra::Texture { | |||
| 12 | 12 | ||
| 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents | 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents |
| 14 | // an small rect of (64/bytes_per_pixel)X8. | 14 | // an small rect of (64/bytes_per_pixel)X8. |
| 15 | inline std::size_t GetGOBSize() { | ||
| 16 | return 512; | ||
| 17 | } | ||
| 18 | |||
| 15 | inline std::size_t GetGOBSizeShift() { | 19 | inline std::size_t GetGOBSizeShift() { |
| 16 | return 9; | 20 | return 9; |
| 17 | } | 21 | } |