diff options
| author | 2022-11-09 17:58:10 +0100 | |
|---|---|---|
| committer | 2023-01-01 16:43:57 -0500 | |
| commit | aad0cbf024fb8077a9b375a093c60a7e2ab1db3d (patch) | |
| tree | 8c6a86c92ed8cedbafb5f34dd9f72283eaaf4342 | |
| parent | MacroHLE: Add Index Buffer size estimation. (diff) | |
| download | yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.gz yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.xz yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.zip | |
MacroHLE: Add HLE replacement for base vertex and base instance.
22 files changed, 265 insertions, 70 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 73b67f0af..e4802bf9e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -339,6 +339,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 339 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | 339 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; |
| 340 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | 340 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); |
| 341 | } | 341 | } |
| 342 | case IR::Attribute::BaseInstance: | ||
| 343 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); | ||
| 344 | case IR::Attribute::BaseVertex: | ||
| 345 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); | ||
| 342 | case IR::Attribute::FrontFace: | 346 | case IR::Attribute::FrontFace: |
| 343 | return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), | 347 | return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), |
| 344 | ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), | 348 | ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), |
| @@ -380,6 +384,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { | |||
| 380 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | 384 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; |
| 381 | return ctx.OpISub(ctx.U32[1], index, base); | 385 | return ctx.OpISub(ctx.U32[1], index, base); |
| 382 | } | 386 | } |
| 387 | case IR::Attribute::BaseInstance: | ||
| 388 | return ctx.OpLoad(ctx.U32[1], ctx.base_instance); | ||
| 389 | case IR::Attribute::BaseVertex: | ||
| 390 | return ctx.OpLoad(ctx.U32[1], ctx.base_vertex); | ||
| 383 | default: | 391 | default: |
| 384 | throw NotImplementedException("Read U32 attribute {}", attr); | 392 | throw NotImplementedException("Read U32 attribute {}", attr); |
| 385 | } | 393 | } |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 41dc6d031..563a5fc49 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -1379,18 +1379,28 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1379 | if (loads[IR::Attribute::InstanceId]) { | 1379 | if (loads[IR::Attribute::InstanceId]) { |
| 1380 | if (profile.support_vertex_instance_id) { | 1380 | if (profile.support_vertex_instance_id) { |
| 1381 | instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); | 1381 | instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); |
| 1382 | if (loads[IR::Attribute::BaseInstance]) { | ||
| 1383 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||
| 1384 | } | ||
| 1382 | } else { | 1385 | } else { |
| 1383 | instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); | 1386 | instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); |
| 1384 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | 1387 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); |
| 1385 | } | 1388 | } |
| 1389 | } else if (loads[IR::Attribute::BaseInstance]) { | ||
| 1390 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | ||
| 1386 | } | 1391 | } |
| 1387 | if (loads[IR::Attribute::VertexId]) { | 1392 | if (loads[IR::Attribute::VertexId]) { |
| 1388 | if (profile.support_vertex_instance_id) { | 1393 | if (profile.support_vertex_instance_id) { |
| 1389 | vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); | 1394 | vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); |
| 1395 | if (loads[IR::Attribute::BaseVertex]) { | ||
| 1396 | base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||
| 1397 | } | ||
| 1390 | } else { | 1398 | } else { |
| 1391 | vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); | 1399 | vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); |
| 1392 | base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | 1400 | base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); |
| 1393 | } | 1401 | } |
| 1402 | } else if (loads[IR::Attribute::BaseVertex]) { | ||
| 1403 | base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||
| 1394 | } | 1404 | } |
| 1395 | if (loads[IR::Attribute::FrontFace]) { | 1405 | if (loads[IR::Attribute::FrontFace]) { |
| 1396 | front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); | 1406 | front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); |
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 402f2664f..b9b4455f6 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h | |||
| @@ -34,6 +34,11 @@ public: | |||
| 34 | 34 | ||
| 35 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; | 35 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; |
| 36 | 36 | ||
| 37 | [[nodiscard]] virtual bool HasHLEMacroState() const = 0; | ||
| 38 | |||
| 39 | [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer( | ||
| 40 | u32 bank, u32 offset) = 0; | ||
| 41 | |||
| 37 | virtual void Dump(u64 hash) = 0; | 42 | virtual void Dump(u64 hash) = 0; |
| 38 | 43 | ||
| 39 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { | 44 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { |
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7d3d882e4..73e189a89 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp | |||
| @@ -446,6 +446,10 @@ std::string NameOf(Attribute attribute) { | |||
| 446 | return "ViewportMask"; | 446 | return "ViewportMask"; |
| 447 | case Attribute::FrontFace: | 447 | case Attribute::FrontFace: |
| 448 | return "FrontFace"; | 448 | return "FrontFace"; |
| 449 | case Attribute::BaseInstance: | ||
| 450 | return "BaseInstance"; | ||
| 451 | case Attribute::BaseVertex: | ||
| 452 | return "BaseVertex"; | ||
| 449 | } | 453 | } |
| 450 | return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); | 454 | return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); |
| 451 | } | 455 | } |
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 6ee3947b1..364d8a912 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h | |||
| @@ -219,6 +219,10 @@ enum class Attribute : u64 { | |||
| 219 | FixedFncTexture9Q = 231, | 219 | FixedFncTexture9Q = 231, |
| 220 | ViewportMask = 232, | 220 | ViewportMask = 232, |
| 221 | FrontFace = 255, | 221 | FrontFace = 255, |
| 222 | |||
| 223 | // Implementation attributes | ||
| 224 | BaseInstance = 256, | ||
| 225 | BaseVertex = 257, | ||
| 222 | }; | 226 | }; |
| 223 | 227 | ||
| 224 | constexpr size_t NUM_GENERICS = 32; | 228 | constexpr size_t NUM_GENERICS = 32; |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0cdac0eff..eb2e49a68 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { | |||
| 294 | return Inst<F32>(Opcode::GetAttribute, attribute, vertex); | 294 | return Inst<F32>(Opcode::GetAttribute, attribute, vertex); |
| 295 | } | 295 | } |
| 296 | 296 | ||
| 297 | U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { | ||
| 298 | return GetAttributeU32(attribute, Imm32(0)); | ||
| 299 | } | ||
| 300 | |||
| 301 | U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) { | ||
| 302 | return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex); | ||
| 303 | } | ||
| 304 | |||
| 297 | void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | 305 | void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { |
| 298 | Inst(Opcode::SetAttribute, attribute, value, vertex); | 306 | Inst(Opcode::SetAttribute, attribute, value, vertex); |
| 299 | } | 307 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2df992feb..7aaaa4ab0 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -74,6 +74,8 @@ public: | |||
| 74 | 74 | ||
| 75 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | 75 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); |
| 76 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); | 76 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); |
| 77 | [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); | ||
| 78 | [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex); | ||
| 77 | void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); | 79 | void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); |
| 78 | 80 | ||
| 79 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); | 81 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 3adbd2b16..ac159d24b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -219,7 +219,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 219 | } | 219 | } |
| 220 | Optimization::SsaRewritePass(program); | 220 | Optimization::SsaRewritePass(program); |
| 221 | 221 | ||
| 222 | Optimization::ConstantPropagationPass(program); | 222 | Optimization::ConstantPropagationPass(env, program); |
| 223 | 223 | ||
| 224 | Optimization::PositionPass(env, program); | 224 | Optimization::PositionPass(env, program); |
| 225 | 225 | ||
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 826f9a54a..ac10405f3 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <type_traits> | 7 | #include <type_traits> |
| 8 | 8 | ||
| 9 | #include "common/bit_cast.h" | 9 | #include "common/bit_cast.h" |
| 10 | #include "shader_recompiler/environment.h" | ||
| 10 | #include "shader_recompiler/exception.h" | 11 | #include "shader_recompiler/exception.h" |
| 11 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 12 | #include "shader_recompiler/frontend/ir/value.h" | 13 | #include "shader_recompiler/frontend/ir/value.h" |
| @@ -515,6 +516,8 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { | |||
| 515 | case IR::Attribute::PrimitiveId: | 516 | case IR::Attribute::PrimitiveId: |
| 516 | case IR::Attribute::InstanceId: | 517 | case IR::Attribute::InstanceId: |
| 517 | case IR::Attribute::VertexId: | 518 | case IR::Attribute::VertexId: |
| 519 | case IR::Attribute::BaseVertex: | ||
| 520 | case IR::Attribute::BaseInstance: | ||
| 518 | break; | 521 | break; |
| 519 | default: | 522 | default: |
| 520 | return; | 523 | return; |
| @@ -644,7 +647,37 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 644 | } | 647 | } |
| 645 | } | 648 | } |
| 646 | 649 | ||
| 647 | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | 650 | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { |
| 651 | const IR::Value bank{inst.Arg(0)}; | ||
| 652 | const IR::Value offset{inst.Arg(1)}; | ||
| 653 | if (!bank.IsImmediate() || !offset.IsImmediate()) { | ||
| 654 | return; | ||
| 655 | } | ||
| 656 | const auto bank_value = bank.U32(); | ||
| 657 | const auto offset_value = offset.U32(); | ||
| 658 | auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value); | ||
| 659 | if (!replacement) { | ||
| 660 | return; | ||
| 661 | } | ||
| 662 | const auto new_attribute = [replacement]() { | ||
| 663 | switch (*replacement) { | ||
| 664 | case ReplaceConstant::BaseInstance: | ||
| 665 | return IR::Attribute::BaseInstance; | ||
| 666 | case ReplaceConstant::BaseVertex: | ||
| 667 | return IR::Attribute::BaseVertex; | ||
| 668 | default: | ||
| 669 | throw NotImplementedException("Not implemented replacement variable {}", *replacement); | ||
| 670 | } | ||
| 671 | }(); | ||
| 672 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 673 | if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { | ||
| 674 | inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute)); | ||
| 675 | } else { | ||
| 676 | inst.ReplaceUsesWith(ir.GetAttribute(new_attribute)); | ||
| 677 | } | ||
| 678 | } | ||
| 679 | |||
| 680 | void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | ||
| 648 | switch (inst.GetOpcode()) { | 681 | switch (inst.GetOpcode()) { |
| 649 | case IR::Opcode::GetRegister: | 682 | case IR::Opcode::GetRegister: |
| 650 | return FoldGetRegister(inst); | 683 | return FoldGetRegister(inst); |
| @@ -789,18 +822,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | |||
| 789 | IR::Opcode::CompositeInsertF16x4); | 822 | IR::Opcode::CompositeInsertF16x4); |
| 790 | case IR::Opcode::FSwizzleAdd: | 823 | case IR::Opcode::FSwizzleAdd: |
| 791 | return FoldFSwizzleAdd(block, inst); | 824 | return FoldFSwizzleAdd(block, inst); |
| 825 | case IR::Opcode::GetCbufF32: | ||
| 826 | case IR::Opcode::GetCbufU32: | ||
| 827 | if (env.HasHLEMacroState()) { | ||
| 828 | return FoldConstBuffer(env, block, inst); | ||
| 829 | } | ||
| 830 | break; | ||
| 792 | default: | 831 | default: |
| 793 | break; | 832 | break; |
| 794 | } | 833 | } |
| 795 | } | 834 | } |
| 796 | } // Anonymous namespace | 835 | } // Anonymous namespace |
| 797 | 836 | ||
| 798 | void ConstantPropagationPass(IR::Program& program) { | 837 | void ConstantPropagationPass(Environment& env, IR::Program& program) { |
| 799 | const auto end{program.post_order_blocks.rend()}; | 838 | const auto end{program.post_order_blocks.rend()}; |
| 800 | for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { | 839 | for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { |
| 801 | IR::Block* const block{*it}; | 840 | IR::Block* const block{*it}; |
| 802 | for (IR::Inst& inst : block->Instructions()) { | 841 | for (IR::Inst& inst : block->Instructions()) { |
| 803 | ConstantPropagation(*block, inst); | 842 | ConstantPropagation(env, *block, inst); |
| 804 | } | 843 | } |
| 805 | } | 844 | } |
| 806 | } | 845 | } |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 11bfe801a..1f8f2ba95 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -13,7 +13,7 @@ struct HostTranslateInfo; | |||
| 13 | namespace Shader::Optimization { | 13 | namespace Shader::Optimization { |
| 14 | 14 | ||
| 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); | 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); |
| 16 | void ConstantPropagationPass(IR::Program& program); | 16 | void ConstantPropagationPass(Environment& env, IR::Program& program); |
| 17 | void DeadCodeEliminationPass(IR::Program& program); | 17 | void DeadCodeEliminationPass(IR::Program& program); |
| 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program); |
| 19 | void IdentityRemovalPass(IR::Program& program); | 19 | void IdentityRemovalPass(IR::Program& program); |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d9c6e92db..ea0f48344 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -16,6 +16,11 @@ | |||
| 16 | 16 | ||
| 17 | namespace Shader { | 17 | namespace Shader { |
| 18 | 18 | ||
| 19 | enum class ReplaceConstant : u32 { | ||
| 20 | BaseInstance, | ||
| 21 | BaseVertex, | ||
| 22 | }; | ||
| 23 | |||
| 19 | enum class TextureType : u32 { | 24 | enum class TextureType : u32 { |
| 20 | Color1D, | 25 | Color1D, |
| 21 | ColorArray1D, | 26 | ColorArray1D, |
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h index 7b28a285f..18a9aaf50 100644 --- a/src/shader_recompiler/varying_state.h +++ b/src/shader_recompiler/varying_state.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | namespace Shader { | 11 | namespace Shader { |
| 12 | 12 | ||
| 13 | struct VaryingState { | 13 | struct VaryingState { |
| 14 | std::bitset<256> mask{}; | 14 | std::bitset<512> mask{}; |
| 15 | 15 | ||
| 16 | void Set(IR::Attribute attribute, bool state = true) { | 16 | void Set(IR::Attribute attribute, bool state = true) { |
| 17 | mask[static_cast<size_t>(attribute)] = state; | 17 | mask[static_cast<size_t>(attribute)] = state; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a0dd7400d..50d8a94b1 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -182,8 +182,14 @@ u32 Maxwell3D::GetMaxCurrentVertices() { | |||
| 182 | size_t Maxwell3D::EstimateIndexBufferSize() { | 182 | size_t Maxwell3D::EstimateIndexBufferSize() { |
| 183 | GPUVAddr start_address = regs.index_buffer.StartAddress(); | 183 | GPUVAddr start_address = regs.index_buffer.StartAddress(); |
| 184 | GPUVAddr end_address = regs.index_buffer.EndAddress(); | 184 | GPUVAddr end_address = regs.index_buffer.EndAddress(); |
| 185 | return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address), | 185 | constexpr std::array<size_t, 4> max_sizes = { |
| 186 | static_cast<size_t>(end_address - start_address)); | 186 | std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), |
| 187 | std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; | ||
| 188 | const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); | ||
| 189 | return std::min<size_t>( | ||
| 190 | memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / | ||
| 191 | byte_size, | ||
| 192 | static_cast<size_t>(end_address - start_address)); | ||
| 187 | } | 193 | } |
| 188 | 194 | ||
| 189 | u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { | 195 | u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { |
| @@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { | |||
| 572 | return regs.reg_array[method]; | 578 | return regs.reg_array[method]; |
| 573 | } | 579 | } |
| 574 | 580 | ||
| 581 | void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { | ||
| 582 | const u64 key = (static_cast<u64>(bank) << 32) | offset; | ||
| 583 | replace_table.emplace(key, name); | ||
| 584 | } | ||
| 585 | |||
| 575 | } // namespace Tegra::Engines | 586 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cfe1e4883..397e88f67 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -3020,6 +3020,23 @@ public: | |||
| 3020 | /// Store temporary hw register values, used by some calls to restore state after a operation | 3020 | /// Store temporary hw register values, used by some calls to restore state after a operation |
| 3021 | Regs shadow_state; | 3021 | Regs shadow_state; |
| 3022 | 3022 | ||
| 3023 | // None Engine | ||
| 3024 | enum class EngineHint : u32 { | ||
| 3025 | None = 0x0, | ||
| 3026 | OnHLEMacro = 0x1, | ||
| 3027 | }; | ||
| 3028 | |||
| 3029 | EngineHint engine_state{EngineHint::None}; | ||
| 3030 | |||
| 3031 | enum class HLEReplaceName : u32 { | ||
| 3032 | BaseVertex = 0x0, | ||
| 3033 | BaseInstance = 0x1, | ||
| 3034 | }; | ||
| 3035 | |||
| 3036 | void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); | ||
| 3037 | |||
| 3038 | std::unordered_map<u64, HLEReplaceName> replace_table; | ||
| 3039 | |||
| 3023 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); | 3040 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); |
| 3024 | static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); | 3041 | static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); |
| 3025 | 3042 | ||
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 93b6d42a4..638247e55 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -14,26 +14,29 @@ | |||
| 14 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 15 | 15 | ||
| 16 | namespace Tegra { | 16 | namespace Tegra { |
| 17 | |||
| 18 | using Maxwell = Engines::Maxwell3D; | ||
| 19 | |||
| 17 | namespace { | 20 | namespace { |
| 18 | 21 | ||
| 19 | bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | 22 | bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) { |
| 20 | switch (topology) { | 23 | switch (topology) { |
| 21 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | 24 | case Maxwell::Regs::PrimitiveTopology::Points: |
| 22 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | 25 | case Maxwell::Regs::PrimitiveTopology::Lines: |
| 23 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: | 26 | case Maxwell::Regs::PrimitiveTopology::LineLoop: |
| 24 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | 27 | case Maxwell::Regs::PrimitiveTopology::LineStrip: |
| 25 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | 28 | case Maxwell::Regs::PrimitiveTopology::Triangles: |
| 26 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | 29 | case Maxwell::Regs::PrimitiveTopology::TriangleStrip: |
| 27 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | 30 | case Maxwell::Regs::PrimitiveTopology::TriangleFan: |
| 28 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | 31 | case Maxwell::Regs::PrimitiveTopology::LinesAdjacency: |
| 29 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | 32 | case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency: |
| 30 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | 33 | case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency: |
| 31 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | 34 | case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency: |
| 32 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: | 35 | case Maxwell::Regs::PrimitiveTopology::Patches: |
| 33 | return true; | 36 | return true; |
| 34 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: | 37 | case Maxwell::Regs::PrimitiveTopology::Quads: |
| 35 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: | 38 | case Maxwell::Regs::PrimitiveTopology::QuadStrip: |
| 36 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: | 39 | case Maxwell::Regs::PrimitiveTopology::Polygon: |
| 37 | default: | 40 | default: |
| 38 | return false; | 41 | return false; |
| 39 | } | 42 | } |
| @@ -82,8 +85,7 @@ public: | |||
| 82 | : HLEMacroImpl(maxwell3d_), extended(extended_) {} | 85 | : HLEMacroImpl(maxwell3d_), extended(extended_) {} |
| 83 | 86 | ||
| 84 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | 87 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
| 85 | auto topology = | 88 | auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); |
| 86 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); | ||
| 87 | if (!IsTopologySafe(topology)) { | 89 | if (!IsTopologySafe(topology)) { |
| 88 | Fallback(parameters); | 90 | Fallback(parameters); |
| 89 | return; | 91 | return; |
| @@ -99,18 +101,16 @@ public: | |||
| 99 | params.stride = 0; | 101 | params.stride = 0; |
| 100 | 102 | ||
| 101 | if (extended) { | 103 | if (extended) { |
| 102 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 104 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 103 | maxwell3d.CallMethod(0x8e4, parameters[4], true); | 105 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); |
| 104 | } | 106 | } |
| 105 | 107 | ||
| 106 | maxwell3d.draw_manager->DrawArrayIndirect(topology); | 108 | maxwell3d.draw_manager->DrawArrayIndirect(topology); |
| 107 | 109 | ||
| 108 | if (extended) { | 110 | if (extended) { |
| 109 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 111 | maxwell3d.engine_state = Maxwell::EngineHint::None; |
| 110 | maxwell3d.CallMethod(0x8e4, 0, true); | 112 | maxwell3d.replace_table.clear(); |
| 111 | } | 113 | } |
| 112 | maxwell3d.regs.vertex_buffer.first = 0; | ||
| 113 | maxwell3d.regs.vertex_buffer.count = 0; | ||
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | private: | 116 | private: |
| @@ -134,13 +134,18 @@ private: | |||
| 134 | 134 | ||
| 135 | const u32 base_instance = parameters[4]; | 135 | const u32 base_instance = parameters[4]; |
| 136 | if (extended) { | 136 | if (extended) { |
| 137 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 137 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 138 | maxwell3d.CallMethod(0x8e4, base_instance, true); | 138 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); |
| 139 | } | 139 | } |
| 140 | 140 | ||
| 141 | maxwell3d.draw_manager->DrawArray( | 141 | maxwell3d.draw_manager->DrawArray( |
| 142 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | 142 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), |
| 143 | vertex_first, vertex_count, base_instance, instance_count); | 143 | vertex_first, vertex_count, base_instance, instance_count); |
| 144 | |||
| 145 | if (extended) { | ||
| 146 | maxwell3d.engine_state = Maxwell::EngineHint::None; | ||
| 147 | maxwell3d.replace_table.clear(); | ||
| 148 | } | ||
| 144 | } | 149 | } |
| 145 | 150 | ||
| 146 | bool extended; | 151 | bool extended; |
| @@ -151,8 +156,7 @@ public: | |||
| 151 | explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | 156 | explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
| 152 | 157 | ||
| 153 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | 158 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
| 154 | auto topology = | 159 | auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); |
| 155 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); | ||
| 156 | if (!IsTopologySafe(topology)) { | 160 | if (!IsTopologySafe(topology)) { |
| 157 | Fallback(parameters); | 161 | Fallback(parameters); |
| 158 | return; | 162 | return; |
| @@ -164,16 +168,12 @@ public: | |||
| 164 | minimum_limit = std::max(parameters[3], minimum_limit); | 168 | minimum_limit = std::max(parameters[3], minimum_limit); |
| 165 | } | 169 | } |
| 166 | const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | 170 | const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |
| 167 | const u32 base_size = std::max(minimum_limit, estimate); | 171 | const u32 base_size = std::max<u32>(minimum_limit, estimate); |
| 168 | const u32 element_base = parameters[4]; | ||
| 169 | const u32 base_instance = parameters[5]; | ||
| 170 | maxwell3d.regs.index_buffer.first = 0; | ||
| 171 | maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping | ||
| 172 | maxwell3d.regs.draw.topology.Assign(topology); | 172 | maxwell3d.regs.draw.topology.Assign(topology); |
| 173 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 173 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 174 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 174 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 175 | maxwell3d.CallMethod(0x8e4, element_base, true); | 175 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); |
| 176 | maxwell3d.CallMethod(0x8e5, base_instance, true); | 176 | maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); |
| 177 | auto& params = maxwell3d.draw_manager->GetIndirectParams(); | 177 | auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
| 178 | params.is_indexed = true; | 178 | params.is_indexed = true; |
| 179 | params.include_count = false; | 179 | params.include_count = false; |
| @@ -184,9 +184,8 @@ public: | |||
| 184 | params.stride = 0; | 184 | params.stride = 0; |
| 185 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 185 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 186 | maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | 186 | maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); |
| 187 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 187 | maxwell3d.engine_state = Maxwell::EngineHint::None; |
| 188 | maxwell3d.CallMethod(0x8e4, 0x0, true); | 188 | maxwell3d.replace_table.clear(); |
| 189 | maxwell3d.CallMethod(0x8e5, 0x0, true); | ||
| 190 | } | 189 | } |
| 191 | 190 | ||
| 192 | private: | 191 | private: |
| @@ -197,18 +196,17 @@ private: | |||
| 197 | const u32 base_instance = parameters[5]; | 196 | const u32 base_instance = parameters[5]; |
| 198 | maxwell3d.regs.vertex_id_base = element_base; | 197 | maxwell3d.regs.vertex_id_base = element_base; |
| 199 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 198 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 200 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 199 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 201 | maxwell3d.CallMethod(0x8e4, element_base, true); | 200 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); |
| 202 | maxwell3d.CallMethod(0x8e5, base_instance, true); | 201 | maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); |
| 203 | 202 | ||
| 204 | maxwell3d.draw_manager->DrawIndex( | 203 | maxwell3d.draw_manager->DrawIndex( |
| 205 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | 204 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), |
| 206 | parameters[3], parameters[1], element_base, base_instance, instance_count); | 205 | parameters[3], parameters[1], element_base, base_instance, instance_count); |
| 207 | 206 | ||
| 208 | maxwell3d.regs.vertex_id_base = 0x0; | 207 | maxwell3d.regs.vertex_id_base = 0x0; |
| 209 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 208 | maxwell3d.engine_state = Maxwell::EngineHint::None; |
| 210 | maxwell3d.CallMethod(0x8e4, 0x0, true); | 209 | maxwell3d.replace_table.clear(); |
| 211 | maxwell3d.CallMethod(0x8e5, 0x0, true); | ||
| 212 | } | 210 | } |
| 213 | 211 | ||
| 214 | u32 minimum_limit{1 << 18}; | 212 | u32 minimum_limit{1 << 18}; |
| @@ -238,8 +236,7 @@ public: | |||
| 238 | : HLEMacroImpl(maxwell3d_) {} | 236 | : HLEMacroImpl(maxwell3d_) {} |
| 239 | 237 | ||
| 240 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | 238 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
| 241 | const auto topology = | 239 | const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]); |
| 242 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); | ||
| 243 | if (!IsTopologySafe(topology)) { | 240 | if (!IsTopologySafe(topology)) { |
| 244 | Fallback(parameters); | 241 | Fallback(parameters); |
| 245 | return; | 242 | return; |
| @@ -277,9 +274,6 @@ public: | |||
| 277 | } | 274 | } |
| 278 | const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | 275 | const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |
| 279 | const u32 base_size = std::max(minimum_limit, estimate); | 276 | const u32 base_size = std::max(minimum_limit, estimate); |
| 280 | |||
| 281 | maxwell3d.regs.index_buffer.first = 0; | ||
| 282 | maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size); | ||
| 283 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 277 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 284 | auto& params = maxwell3d.draw_manager->GetIndirectParams(); | 278 | auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
| 285 | params.is_indexed = true; | 279 | params.is_indexed = true; |
| @@ -290,7 +284,12 @@ public: | |||
| 290 | params.max_draw_counts = draw_count; | 284 | params.max_draw_counts = draw_count; |
| 291 | params.stride = stride; | 285 | params.stride = stride; |
| 292 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 286 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 293 | maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); | 287 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 288 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||
| 289 | maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||
| 290 | maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | ||
| 291 | maxwell3d.engine_state = Maxwell::EngineHint::None; | ||
| 292 | maxwell3d.replace_table.clear(); | ||
| 294 | } | 293 | } |
| 295 | 294 | ||
| 296 | private: | 295 | private: |
| @@ -299,9 +298,8 @@ private: | |||
| 299 | // Clean everything. | 298 | // Clean everything. |
| 300 | // Clean everything. | 299 | // Clean everything. |
| 301 | maxwell3d.regs.vertex_id_base = 0x0; | 300 | maxwell3d.regs.vertex_id_base = 0x0; |
| 302 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 301 | maxwell3d.engine_state = Maxwell::EngineHint::None; |
| 303 | maxwell3d.CallMethod(0x8e4, 0x0, true); | 302 | maxwell3d.replace_table.clear(); |
| 304 | maxwell3d.CallMethod(0x8e5, 0x0, true); | ||
| 305 | }); | 303 | }); |
| 306 | maxwell3d.RefreshParameters(); | 304 | maxwell3d.RefreshParameters(); |
| 307 | const u32 start_indirect = parameters[0]; | 305 | const u32 start_indirect = parameters[0]; |
| @@ -310,8 +308,7 @@ private: | |||
| 310 | // Nothing to do. | 308 | // Nothing to do. |
| 311 | return; | 309 | return; |
| 312 | } | 310 | } |
| 313 | const auto topology = | 311 | const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]); |
| 314 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); | ||
| 315 | maxwell3d.regs.draw.topology.Assign(topology); | 312 | maxwell3d.regs.draw.topology.Assign(topology); |
| 316 | const u32 padding = parameters[3]; | 313 | const u32 padding = parameters[3]; |
| 317 | const std::size_t max_draws = parameters[4]; | 314 | const std::size_t max_draws = parameters[4]; |
| @@ -326,9 +323,9 @@ private: | |||
| 326 | const u32 base_vertex = parameters[base + 3]; | 323 | const u32 base_vertex = parameters[base + 3]; |
| 327 | const u32 base_instance = parameters[base + 4]; | 324 | const u32 base_instance = parameters[base + 4]; |
| 328 | maxwell3d.regs.vertex_id_base = base_vertex; | 325 | maxwell3d.regs.vertex_id_base = base_vertex; |
| 329 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 326 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 330 | maxwell3d.CallMethod(0x8e4, base_vertex, true); | 327 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); |
| 331 | maxwell3d.CallMethod(0x8e5, base_instance, true); | 328 | maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); |
| 332 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 329 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 333 | maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], | 330 | maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], |
| 334 | base_vertex, base_instance, parameters[base + 1]); | 331 | base_vertex, base_instance, parameters[base + 1]); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8f6c51045..11e7d225e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { | |||
| 577 | return range_so_far; | 577 | return range_so_far; |
| 578 | } | 578 | } |
| 579 | 579 | ||
| 580 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | 580 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { |
| 581 | PTEKind base_kind = GetPageKind(gpu_addr); | 581 | PTEKind base_kind = GetPageKind(gpu_addr); |
| 582 | if (base_kind == PTEKind::INVALID) { | 582 | if (base_kind == PTEKind::INVALID) { |
| 583 | return 0; | 583 | return 0; |
| @@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | |||
| 596 | return true; | 596 | return true; |
| 597 | } | 597 | } |
| 598 | range_so_far += copy_amount; | 598 | range_so_far += copy_amount; |
| 599 | if (range_so_far >= max_size) { | ||
| 600 | result = true; | ||
| 601 | return true; | ||
| 602 | } | ||
| 599 | return false; | 603 | return false; |
| 600 | }; | 604 | }; |
| 601 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 605 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| @@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | |||
| 605 | return true; | 609 | return true; |
| 606 | } | 610 | } |
| 607 | range_so_far += copy_amount; | 611 | range_so_far += copy_amount; |
| 612 | if (range_so_far >= max_size) { | ||
| 613 | result = true; | ||
| 614 | return true; | ||
| 615 | } | ||
| 608 | return false; | 616 | return false; |
| 609 | }; | 617 | }; |
| 610 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | 618 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 65f6e8134..ca22520d7 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -118,7 +118,8 @@ public: | |||
| 118 | 118 | ||
| 119 | PTEKind GetPageKind(GPUVAddr gpu_addr) const; | 119 | PTEKind GetPageKind(GPUVAddr gpu_addr) const; |
| 120 | 120 | ||
| 121 | size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const; | 121 | size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, |
| 122 | size_t max_size = std::numeric_limits<size_t>::max()) const; | ||
| 122 | 123 | ||
| 123 | private: | 124 | private: |
| 124 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | 125 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index e62b36822..df229f41b 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 97 | smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); | 97 | smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); |
| 98 | alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); | 98 | alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); |
| 99 | alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); | 99 | alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); |
| 100 | app_stage.Assign(maxwell3d.engine_state); | ||
| 100 | 101 | ||
| 101 | for (size_t i = 0; i < regs.rt.size(); ++i) { | 102 | for (size_t i = 0; i < regs.rt.size(); ++i) { |
| 102 | color_formats[i] = static_cast<u8>(regs.rt[i].format); | 103 | color_formats[i] = static_cast<u8>(regs.rt[i].format); |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index ab79fb8f3..03bf64b57 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -197,6 +197,7 @@ struct FixedPipelineState { | |||
| 197 | BitField<14, 1, u32> smooth_lines; | 197 | BitField<14, 1, u32> smooth_lines; |
| 198 | BitField<15, 1, u32> alpha_to_coverage_enabled; | 198 | BitField<15, 1, u32> alpha_to_coverage_enabled; |
| 199 | BitField<16, 1, u32> alpha_to_one_enabled; | 199 | BitField<16, 1, u32> alpha_to_one_enabled; |
| 200 | BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage; | ||
| 200 | }; | 201 | }; |
| 201 | std::array<u8, Maxwell::NumRenderTargets> color_formats; | 202 | std::array<u8, Maxwell::NumRenderTargets> color_formats; |
| 202 | 203 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e7262420c..58b955821 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment; | |||
| 54 | using VideoCommon::GenericEnvironment; | 54 | using VideoCommon::GenericEnvironment; |
| 55 | using VideoCommon::GraphicsEnvironment; | 55 | using VideoCommon::GraphicsEnvironment; |
| 56 | 56 | ||
| 57 | constexpr u32 CACHE_VERSION = 8; | 57 | constexpr u32 CACHE_VERSION = 9; |
| 58 | 58 | ||
| 59 | template <typename Container> | 59 | template <typename Container> |
| 60 | auto MakeSpan(Container& container) { | 60 | auto MakeSpan(Container& container) { |
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 958810747..99d85bfb3 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp | |||
| @@ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { | |||
| 202 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | 202 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; |
| 203 | const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; | 203 | const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; |
| 204 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | 204 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; |
| 205 | const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())}; | ||
| 205 | 206 | ||
| 206 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | 207 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) |
| 207 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | 208 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) |
| 208 | .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), | 209 | .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), |
| 209 | sizeof(num_texture_pixel_formats)) | 210 | sizeof(num_texture_pixel_formats)) |
| 210 | .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | 211 | .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) |
| 212 | .write(reinterpret_cast<const char*>(&num_cbuf_replacement_values), | ||
| 213 | sizeof(num_cbuf_replacement_values)) | ||
| 211 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | 214 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) |
| 212 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | 215 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) |
| 213 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | 216 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) |
| @@ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { | |||
| 229 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | 232 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) |
| 230 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | 233 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); |
| 231 | } | 234 | } |
| 235 | for (const auto& [key, type] : cbuf_replacements) { | ||
| 236 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 237 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 238 | } | ||
| 232 | if (stage == Shader::Stage::Compute) { | 239 | if (stage == Shader::Stage::Compute) { |
| 233 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | 240 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) |
| 234 | .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); | 241 | .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); |
| @@ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | |||
| 318 | ASSERT(local_size <= std::numeric_limits<u32>::max()); | 325 | ASSERT(local_size <= std::numeric_limits<u32>::max()); |
| 319 | local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; | 326 | local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; |
| 320 | texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; | 327 | texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; |
| 328 | has_hle_engine_state = | ||
| 329 | maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro; | ||
| 321 | } | 330 | } |
| 322 | 331 | ||
| 323 | u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | 332 | u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { |
| @@ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | |||
| 331 | return value; | 340 | return value; |
| 332 | } | 341 | } |
| 333 | 342 | ||
| 343 | std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank, | ||
| 344 | u32 offset) { | ||
| 345 | if (!has_hle_engine_state) { | ||
| 346 | return std::nullopt; | ||
| 347 | } | ||
| 348 | const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); | ||
| 349 | auto it = maxwell3d->replace_table.find(key); | ||
| 350 | if (it == maxwell3d->replace_table.end()) { | ||
| 351 | return std::nullopt; | ||
| 352 | } | ||
| 353 | const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { | ||
| 354 | switch (name) { | ||
| 355 | case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: | ||
| 356 | return Shader::ReplaceConstant::BaseVertex; | ||
| 357 | case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: | ||
| 358 | return Shader::ReplaceConstant::BaseInstance; | ||
| 359 | default: | ||
| 360 | UNREACHABLE(); | ||
| 361 | } | ||
| 362 | }(it->second); | ||
| 363 | cbuf_replacements.emplace(key, converted_value); | ||
| 364 | return converted_value; | ||
| 365 | } | ||
| 366 | |||
| 334 | Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { | 367 | Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { |
| 335 | const auto& regs{maxwell3d->regs}; | 368 | const auto& regs{maxwell3d->regs}; |
| 336 | const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; | 369 | const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; |
| @@ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | |||
| 409 | u64 num_texture_types{}; | 442 | u64 num_texture_types{}; |
| 410 | u64 num_texture_pixel_formats{}; | 443 | u64 num_texture_pixel_formats{}; |
| 411 | u64 num_cbuf_values{}; | 444 | u64 num_cbuf_values{}; |
| 445 | u64 num_cbuf_replacement_values{}; | ||
| 412 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | 446 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) |
| 413 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | 447 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) |
| 414 | .read(reinterpret_cast<char*>(&num_texture_pixel_formats), | 448 | .read(reinterpret_cast<char*>(&num_texture_pixel_formats), |
| 415 | sizeof(num_texture_pixel_formats)) | 449 | sizeof(num_texture_pixel_formats)) |
| 416 | .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | 450 | .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) |
| 451 | .read(reinterpret_cast<char*>(&num_cbuf_replacement_values), | ||
| 452 | sizeof(num_cbuf_replacement_values)) | ||
| 417 | .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | 453 | .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) |
| 418 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | 454 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) |
| 419 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | 455 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) |
| @@ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | |||
| 444 | .read(reinterpret_cast<char*>(&value), sizeof(value)); | 480 | .read(reinterpret_cast<char*>(&value), sizeof(value)); |
| 445 | cbuf_values.emplace(key, value); | 481 | cbuf_values.emplace(key, value); |
| 446 | } | 482 | } |
| 483 | for (size_t i = 0; i < num_cbuf_replacement_values; ++i) { | ||
| 484 | u64 key; | ||
| 485 | Shader::ReplaceConstant value; | ||
| 486 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 487 | .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||
| 488 | cbuf_replacements.emplace(key, value); | ||
| 489 | } | ||
| 447 | if (stage == Shader::Stage::Compute) { | 490 | if (stage == Shader::Stage::Compute) { |
| 448 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) | 491 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) |
| 449 | .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | 492 | .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); |
| @@ -512,6 +555,16 @@ std::array<u32, 3> FileEnvironment::WorkgroupSize() const { | |||
| 512 | return workgroup_size; | 555 | return workgroup_size; |
| 513 | } | 556 | } |
| 514 | 557 | ||
| 558 | std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank, | ||
| 559 | u32 offset) { | ||
| 560 | const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); | ||
| 561 | auto it = cbuf_replacements.find(key); | ||
| 562 | if (it == cbuf_replacements.end()) { | ||
| 563 | return std::nullopt; | ||
| 564 | } | ||
| 565 | return it->second; | ||
| 566 | } | ||
| 567 | |||
| 515 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | 568 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, |
| 516 | const std::filesystem::path& filename, u32 cache_version) try { | 569 | const std::filesystem::path& filename, u32 cache_version) try { |
| 517 | std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); | 570 | std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); |
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 1342fab1e..d75987a52 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h | |||
| @@ -60,6 +60,10 @@ public: | |||
| 60 | 60 | ||
| 61 | void Serialize(std::ofstream& file) const; | 61 | void Serialize(std::ofstream& file) const; |
| 62 | 62 | ||
| 63 | bool HasHLEMacroState() const override { | ||
| 64 | return has_hle_engine_state; | ||
| 65 | } | ||
| 66 | |||
| 63 | protected: | 67 | protected: |
| 64 | std::optional<u64> TryFindSize(); | 68 | std::optional<u64> TryFindSize(); |
| 65 | 69 | ||
| @@ -73,6 +77,7 @@ protected: | |||
| 73 | std::unordered_map<u32, Shader::TextureType> texture_types; | 77 | std::unordered_map<u32, Shader::TextureType> texture_types; |
| 74 | std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | 78 | std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; |
| 75 | std::unordered_map<u64, u32> cbuf_values; | 79 | std::unordered_map<u64, u32> cbuf_values; |
| 80 | std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements; | ||
| 76 | 81 | ||
| 77 | u32 local_memory_size{}; | 82 | u32 local_memory_size{}; |
| 78 | u32 texture_bound{}; | 83 | u32 texture_bound{}; |
| @@ -89,6 +94,7 @@ protected: | |||
| 89 | u32 viewport_transform_state = 1; | 94 | u32 viewport_transform_state = 1; |
| 90 | 95 | ||
| 91 | bool has_unbound_instructions = false; | 96 | bool has_unbound_instructions = false; |
| 97 | bool has_hle_engine_state = false; | ||
| 92 | }; | 98 | }; |
| 93 | 99 | ||
| 94 | class GraphicsEnvironment final : public GenericEnvironment { | 100 | class GraphicsEnvironment final : public GenericEnvironment { |
| @@ -109,6 +115,8 @@ public: | |||
| 109 | 115 | ||
| 110 | u32 ReadViewportTransformState() override; | 116 | u32 ReadViewportTransformState() override; |
| 111 | 117 | ||
| 118 | std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override; | ||
| 119 | |||
| 112 | private: | 120 | private: |
| 113 | Tegra::Engines::Maxwell3D* maxwell3d{}; | 121 | Tegra::Engines::Maxwell3D* maxwell3d{}; |
| 114 | size_t stage_index{}; | 122 | size_t stage_index{}; |
| @@ -131,6 +139,11 @@ public: | |||
| 131 | 139 | ||
| 132 | u32 ReadViewportTransformState() override; | 140 | u32 ReadViewportTransformState() override; |
| 133 | 141 | ||
| 142 | std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer( | ||
| 143 | [[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override { | ||
| 144 | return std::nullopt; | ||
| 145 | } | ||
| 146 | |||
| 134 | private: | 147 | private: |
| 135 | Tegra::Engines::KeplerCompute* kepler_compute{}; | 148 | Tegra::Engines::KeplerCompute* kepler_compute{}; |
| 136 | }; | 149 | }; |
| @@ -166,6 +179,13 @@ public: | |||
| 166 | 179 | ||
| 167 | [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; | 180 | [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; |
| 168 | 181 | ||
| 182 | [[nodiscard]] std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, | ||
| 183 | u32 offset) override; | ||
| 184 | |||
| 185 | [[nodiscard]] bool HasHLEMacroState() const override { | ||
| 186 | return cbuf_replacements.size() != 0; | ||
| 187 | } | ||
| 188 | |||
| 169 | void Dump(u64 hash) override; | 189 | void Dump(u64 hash) override; |
| 170 | 190 | ||
| 171 | private: | 191 | private: |
| @@ -173,6 +193,7 @@ private: | |||
| 173 | std::unordered_map<u32, Shader::TextureType> texture_types; | 193 | std::unordered_map<u32, Shader::TextureType> texture_types; |
| 174 | std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | 194 | std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; |
| 175 | std::unordered_map<u64, u32> cbuf_values; | 195 | std::unordered_map<u64, u32> cbuf_values; |
| 196 | std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements; | ||
| 176 | std::array<u32, 3> workgroup_size{}; | 197 | std::array<u32, 3> workgroup_size{}; |
| 177 | u32 local_memory_size{}; | 198 | u32 local_memory_size{}; |
| 178 | u32 shared_memory_size{}; | 199 | u32 shared_memory_size{}; |