summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2022-11-09 17:58:10 +0100
committerGravatar Fernando Sahmkow2023-01-01 16:43:57 -0500
commitaad0cbf024fb8077a9b375a093c60a7e2ab1db3d (patch)
tree8c6a86c92ed8cedbafb5f34dd9f72283eaaf4342
parentMacroHLE: Add Index Buffer size estimation. (diff)
downloadyuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.gz
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.xz
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.zip
MacroHLE: Add HLE replacement for base vertex and base instance.
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp8
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp10
-rw-r--r--src/shader_recompiler/environment.h5
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp4
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp8
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp2
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp45
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/shader_recompiler/shader_info.h5
-rw-r--r--src/shader_recompiler/varying_state.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp15
-rw-r--r--src/video_core/engines/maxwell_3d.h17
-rw-r--r--src/video_core/macro/macro_hle.cpp115
-rw-r--r--src/video_core/memory_manager.cpp10
-rw-r--r--src/video_core/memory_manager.h3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp1
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
-rw-r--r--src/video_core/shader_environment.cpp53
-rw-r--r--src/video_core/shader_environment.h21
22 files changed, 265 insertions, 70 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 73b67f0af..e4802bf9e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -339,6 +339,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
339 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; 339 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
340 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); 340 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
341 } 341 }
342 case IR::Attribute::BaseInstance:
343 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
344 case IR::Attribute::BaseVertex:
345 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
342 case IR::Attribute::FrontFace: 346 case IR::Attribute::FrontFace:
343 return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), 347 return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
344 ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), 348 ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
@@ -380,6 +384,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
380 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; 384 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
381 return ctx.OpISub(ctx.U32[1], index, base); 385 return ctx.OpISub(ctx.U32[1], index, base);
382 } 386 }
387 case IR::Attribute::BaseInstance:
388 return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
389 case IR::Attribute::BaseVertex:
390 return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
383 default: 391 default:
384 throw NotImplementedException("Read U32 attribute {}", attr); 392 throw NotImplementedException("Read U32 attribute {}", attr);
385 } 393 }
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 41dc6d031..563a5fc49 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1379,18 +1379,28 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1379 if (loads[IR::Attribute::InstanceId]) { 1379 if (loads[IR::Attribute::InstanceId]) {
1380 if (profile.support_vertex_instance_id) { 1380 if (profile.support_vertex_instance_id) {
1381 instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); 1381 instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
1382 if (loads[IR::Attribute::BaseInstance]) {
1383 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
1384 }
1382 } else { 1385 } else {
1383 instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); 1386 instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
1384 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); 1387 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
1385 } 1388 }
1389 } else if (loads[IR::Attribute::BaseInstance]) {
1390 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
1386 } 1391 }
1387 if (loads[IR::Attribute::VertexId]) { 1392 if (loads[IR::Attribute::VertexId]) {
1388 if (profile.support_vertex_instance_id) { 1393 if (profile.support_vertex_instance_id) {
1389 vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); 1394 vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
1395 if (loads[IR::Attribute::BaseVertex]) {
1396 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
1397 }
1390 } else { 1398 } else {
1391 vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); 1399 vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
1392 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); 1400 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
1393 } 1401 }
1402 } else if (loads[IR::Attribute::BaseVertex]) {
1403 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
1394 } 1404 }
1395 if (loads[IR::Attribute::FrontFace]) { 1405 if (loads[IR::Attribute::FrontFace]) {
1396 front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); 1406 front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 402f2664f..b9b4455f6 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -34,6 +34,11 @@ public:
34 34
35 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; 35 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
36 36
37 [[nodiscard]] virtual bool HasHLEMacroState() const = 0;
38
39 [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(
40 u32 bank, u32 offset) = 0;
41
37 virtual void Dump(u64 hash) = 0; 42 virtual void Dump(u64 hash) = 0;
38 43
39 [[nodiscard]] const ProgramHeader& SPH() const noexcept { 44 [[nodiscard]] const ProgramHeader& SPH() const noexcept {
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
index 7d3d882e4..73e189a89 100644
--- a/src/shader_recompiler/frontend/ir/attribute.cpp
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -446,6 +446,10 @@ std::string NameOf(Attribute attribute) {
446 return "ViewportMask"; 446 return "ViewportMask";
447 case Attribute::FrontFace: 447 case Attribute::FrontFace:
448 return "FrontFace"; 448 return "FrontFace";
449 case Attribute::BaseInstance:
450 return "BaseInstance";
451 case Attribute::BaseVertex:
452 return "BaseVertex";
449 } 453 }
450 return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); 454 return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
451} 455}
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
index 6ee3947b1..364d8a912 100644
--- a/src/shader_recompiler/frontend/ir/attribute.h
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -219,6 +219,10 @@ enum class Attribute : u64 {
219 FixedFncTexture9Q = 231, 219 FixedFncTexture9Q = 231,
220 ViewportMask = 232, 220 ViewportMask = 232,
221 FrontFace = 255, 221 FrontFace = 255,
222
223 // Implementation attributes
224 BaseInstance = 256,
225 BaseVertex = 257,
222}; 226};
223 227
224constexpr size_t NUM_GENERICS = 32; 228constexpr size_t NUM_GENERICS = 32;
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 0cdac0eff..eb2e49a68 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
294 return Inst<F32>(Opcode::GetAttribute, attribute, vertex); 294 return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
295} 295}
296 296
297U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
298 return GetAttributeU32(attribute, Imm32(0));
299}
300
301U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) {
302 return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex);
303}
304
297void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { 305void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
298 Inst(Opcode::SetAttribute, attribute, value, vertex); 306 Inst(Opcode::SetAttribute, attribute, value, vertex);
299} 307}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 2df992feb..7aaaa4ab0 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -74,6 +74,8 @@ public:
74 74
75 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); 75 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
76 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); 76 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
77 [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
78 [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);
77 void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); 79 void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
78 80
79 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); 81 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 3adbd2b16..ac159d24b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -219,7 +219,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
219 } 219 }
220 Optimization::SsaRewritePass(program); 220 Optimization::SsaRewritePass(program);
221 221
222 Optimization::ConstantPropagationPass(program); 222 Optimization::ConstantPropagationPass(env, program);
223 223
224 Optimization::PositionPass(env, program); 224 Optimization::PositionPass(env, program);
225 225
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 826f9a54a..ac10405f3 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -7,6 +7,7 @@
7#include <type_traits> 7#include <type_traits>
8 8
9#include "common/bit_cast.h" 9#include "common/bit_cast.h"
10#include "shader_recompiler/environment.h"
10#include "shader_recompiler/exception.h" 11#include "shader_recompiler/exception.h"
11#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
12#include "shader_recompiler/frontend/ir/value.h" 13#include "shader_recompiler/frontend/ir/value.h"
@@ -515,6 +516,8 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
515 case IR::Attribute::PrimitiveId: 516 case IR::Attribute::PrimitiveId:
516 case IR::Attribute::InstanceId: 517 case IR::Attribute::InstanceId:
517 case IR::Attribute::VertexId: 518 case IR::Attribute::VertexId:
519 case IR::Attribute::BaseVertex:
520 case IR::Attribute::BaseInstance:
518 break; 521 break;
519 default: 522 default:
520 return; 523 return;
@@ -644,7 +647,37 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
644 } 647 }
645} 648}
646 649
647void ConstantPropagation(IR::Block& block, IR::Inst& inst) { 650void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
651 const IR::Value bank{inst.Arg(0)};
652 const IR::Value offset{inst.Arg(1)};
653 if (!bank.IsImmediate() || !offset.IsImmediate()) {
654 return;
655 }
656 const auto bank_value = bank.U32();
657 const auto offset_value = offset.U32();
658 auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
659 if (!replacement) {
660 return;
661 }
662 const auto new_attribute = [replacement]() {
663 switch (*replacement) {
664 case ReplaceConstant::BaseInstance:
665 return IR::Attribute::BaseInstance;
666 case ReplaceConstant::BaseVertex:
667 return IR::Attribute::BaseVertex;
668 default:
669 throw NotImplementedException("Not implemented replacement variable {}", *replacement);
670 }
671 }();
672 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
673 if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
674 inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
675 } else {
676 inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
677 }
678}
679
680void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
648 switch (inst.GetOpcode()) { 681 switch (inst.GetOpcode()) {
649 case IR::Opcode::GetRegister: 682 case IR::Opcode::GetRegister:
650 return FoldGetRegister(inst); 683 return FoldGetRegister(inst);
@@ -789,18 +822,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
789 IR::Opcode::CompositeInsertF16x4); 822 IR::Opcode::CompositeInsertF16x4);
790 case IR::Opcode::FSwizzleAdd: 823 case IR::Opcode::FSwizzleAdd:
791 return FoldFSwizzleAdd(block, inst); 824 return FoldFSwizzleAdd(block, inst);
825 case IR::Opcode::GetCbufF32:
826 case IR::Opcode::GetCbufU32:
827 if (env.HasHLEMacroState()) {
828 return FoldConstBuffer(env, block, inst);
829 }
830 break;
792 default: 831 default:
793 break; 832 break;
794 } 833 }
795} 834}
796} // Anonymous namespace 835} // Anonymous namespace
797 836
798void ConstantPropagationPass(IR::Program& program) { 837void ConstantPropagationPass(Environment& env, IR::Program& program) {
799 const auto end{program.post_order_blocks.rend()}; 838 const auto end{program.post_order_blocks.rend()};
800 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { 839 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
801 IR::Block* const block{*it}; 840 IR::Block* const block{*it};
802 for (IR::Inst& inst : block->Instructions()) { 841 for (IR::Inst& inst : block->Instructions()) {
803 ConstantPropagation(*block, inst); 842 ConstantPropagation(env, *block, inst);
804 } 843 }
805 } 844 }
806} 845}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 11bfe801a..1f8f2ba95 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -13,7 +13,7 @@ struct HostTranslateInfo;
13namespace Shader::Optimization { 13namespace Shader::Optimization {
14 14
15void CollectShaderInfoPass(Environment& env, IR::Program& program); 15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(IR::Program& program); 16void ConstantPropagationPass(Environment& env, IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program); 17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program); 18void GlobalMemoryToStorageBufferPass(IR::Program& program);
19void IdentityRemovalPass(IR::Program& program); 19void IdentityRemovalPass(IR::Program& program);
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index d9c6e92db..ea0f48344 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -16,6 +16,11 @@
16 16
17namespace Shader { 17namespace Shader {
18 18
19enum class ReplaceConstant : u32 {
20 BaseInstance,
21 BaseVertex,
22};
23
19enum class TextureType : u32 { 24enum class TextureType : u32 {
20 Color1D, 25 Color1D,
21 ColorArray1D, 26 ColorArray1D,
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h
index 7b28a285f..18a9aaf50 100644
--- a/src/shader_recompiler/varying_state.h
+++ b/src/shader_recompiler/varying_state.h
@@ -11,7 +11,7 @@
11namespace Shader { 11namespace Shader {
12 12
13struct VaryingState { 13struct VaryingState {
14 std::bitset<256> mask{}; 14 std::bitset<512> mask{};
15 15
16 void Set(IR::Attribute attribute, bool state = true) { 16 void Set(IR::Attribute attribute, bool state = true) {
17 mask[static_cast<size_t>(attribute)] = state; 17 mask[static_cast<size_t>(attribute)] = state;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a0dd7400d..50d8a94b1 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -182,8 +182,14 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
182size_t Maxwell3D::EstimateIndexBufferSize() { 182size_t Maxwell3D::EstimateIndexBufferSize() {
183 GPUVAddr start_address = regs.index_buffer.StartAddress(); 183 GPUVAddr start_address = regs.index_buffer.StartAddress();
184 GPUVAddr end_address = regs.index_buffer.EndAddress(); 184 GPUVAddr end_address = regs.index_buffer.EndAddress();
185 return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address), 185 constexpr std::array<size_t, 4> max_sizes = {
186 static_cast<size_t>(end_address - start_address)); 186 std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
187 std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
188 const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
189 return std::min<size_t>(
190 memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
191 byte_size,
192 static_cast<size_t>(end_address - start_address));
187} 193}
188 194
189u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { 195u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
@@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
572 return regs.reg_array[method]; 578 return regs.reg_array[method];
573} 579}
574 580
581void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) {
582 const u64 key = (static_cast<u64>(bank) << 32) | offset;
583 replace_table.emplace(key, name);
584}
585
575} // namespace Tegra::Engines 586} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index cfe1e4883..397e88f67 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -3020,6 +3020,23 @@ public:
3020 /// Store temporary hw register values, used by some calls to restore state after a operation 3020 /// Store temporary hw register values, used by some calls to restore state after a operation
3021 Regs shadow_state; 3021 Regs shadow_state;
3022 3022
3023 // None Engine
3024 enum class EngineHint : u32 {
3025 None = 0x0,
3026 OnHLEMacro = 0x1,
3027 };
3028
3029 EngineHint engine_state{EngineHint::None};
3030
3031 enum class HLEReplaceName : u32 {
3032 BaseVertex = 0x0,
3033 BaseInstance = 0x1,
3034 };
3035
3036 void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name);
3037
3038 std::unordered_map<u64, HLEReplaceName> replace_table;
3039
3023 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); 3040 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
3024 static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); 3041 static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
3025 3042
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 93b6d42a4..638247e55 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -14,26 +14,29 @@
14#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
15 15
16namespace Tegra { 16namespace Tegra {
17
18using Maxwell = Engines::Maxwell3D;
19
17namespace { 20namespace {
18 21
19bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { 22bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) {
20 switch (topology) { 23 switch (topology) {
21 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: 24 case Maxwell::Regs::PrimitiveTopology::Points:
22 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: 25 case Maxwell::Regs::PrimitiveTopology::Lines:
23 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: 26 case Maxwell::Regs::PrimitiveTopology::LineLoop:
24 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: 27 case Maxwell::Regs::PrimitiveTopology::LineStrip:
25 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: 28 case Maxwell::Regs::PrimitiveTopology::Triangles:
26 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: 29 case Maxwell::Regs::PrimitiveTopology::TriangleStrip:
27 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: 30 case Maxwell::Regs::PrimitiveTopology::TriangleFan:
28 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: 31 case Maxwell::Regs::PrimitiveTopology::LinesAdjacency:
29 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: 32 case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency:
30 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: 33 case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency:
31 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: 34 case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency:
32 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: 35 case Maxwell::Regs::PrimitiveTopology::Patches:
33 return true; 36 return true;
34 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: 37 case Maxwell::Regs::PrimitiveTopology::Quads:
35 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: 38 case Maxwell::Regs::PrimitiveTopology::QuadStrip:
36 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: 39 case Maxwell::Regs::PrimitiveTopology::Polygon:
37 default: 40 default:
38 return false; 41 return false;
39 } 42 }
@@ -82,8 +85,7 @@ public:
82 : HLEMacroImpl(maxwell3d_), extended(extended_) {} 85 : HLEMacroImpl(maxwell3d_), extended(extended_) {}
83 86
84 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { 87 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
85 auto topology = 88 auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
86 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
87 if (!IsTopologySafe(topology)) { 89 if (!IsTopologySafe(topology)) {
88 Fallback(parameters); 90 Fallback(parameters);
89 return; 91 return;
@@ -99,18 +101,16 @@ public:
99 params.stride = 0; 101 params.stride = 0;
100 102
101 if (extended) { 103 if (extended) {
102 maxwell3d.CallMethod(0x8e3, 0x640, true); 104 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
103 maxwell3d.CallMethod(0x8e4, parameters[4], true); 105 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
104 } 106 }
105 107
106 maxwell3d.draw_manager->DrawArrayIndirect(topology); 108 maxwell3d.draw_manager->DrawArrayIndirect(topology);
107 109
108 if (extended) { 110 if (extended) {
109 maxwell3d.CallMethod(0x8e3, 0x640, true); 111 maxwell3d.engine_state = Maxwell::EngineHint::None;
110 maxwell3d.CallMethod(0x8e4, 0, true); 112 maxwell3d.replace_table.clear();
111 } 113 }
112 maxwell3d.regs.vertex_buffer.first = 0;
113 maxwell3d.regs.vertex_buffer.count = 0;
114 } 114 }
115 115
116private: 116private:
@@ -134,13 +134,18 @@ private:
134 134
135 const u32 base_instance = parameters[4]; 135 const u32 base_instance = parameters[4];
136 if (extended) { 136 if (extended) {
137 maxwell3d.CallMethod(0x8e3, 0x640, true); 137 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
138 maxwell3d.CallMethod(0x8e4, base_instance, true); 138 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
139 } 139 }
140 140
141 maxwell3d.draw_manager->DrawArray( 141 maxwell3d.draw_manager->DrawArray(
142 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 142 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
143 vertex_first, vertex_count, base_instance, instance_count); 143 vertex_first, vertex_count, base_instance, instance_count);
144
145 if (extended) {
146 maxwell3d.engine_state = Maxwell::EngineHint::None;
147 maxwell3d.replace_table.clear();
148 }
144 } 149 }
145 150
146 bool extended; 151 bool extended;
@@ -151,8 +156,7 @@ public:
151 explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} 156 explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
152 157
153 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { 158 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
154 auto topology = 159 auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
155 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
156 if (!IsTopologySafe(topology)) { 160 if (!IsTopologySafe(topology)) {
157 Fallback(parameters); 161 Fallback(parameters);
158 return; 162 return;
@@ -164,16 +168,12 @@ public:
164 minimum_limit = std::max(parameters[3], minimum_limit); 168 minimum_limit = std::max(parameters[3], minimum_limit);
165 } 169 }
166 const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); 170 const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
167 const u32 base_size = std::max(minimum_limit, estimate); 171 const u32 base_size = std::max<u32>(minimum_limit, estimate);
168 const u32 element_base = parameters[4];
169 const u32 base_instance = parameters[5];
170 maxwell3d.regs.index_buffer.first = 0;
171 maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping
172 maxwell3d.regs.draw.topology.Assign(topology); 172 maxwell3d.regs.draw.topology.Assign(topology);
173 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 173 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
174 maxwell3d.CallMethod(0x8e3, 0x640, true); 174 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
175 maxwell3d.CallMethod(0x8e4, element_base, true); 175 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
176 maxwell3d.CallMethod(0x8e5, base_instance, true); 176 maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
177 auto& params = maxwell3d.draw_manager->GetIndirectParams(); 177 auto& params = maxwell3d.draw_manager->GetIndirectParams();
178 params.is_indexed = true; 178 params.is_indexed = true;
179 params.include_count = false; 179 params.include_count = false;
@@ -184,9 +184,8 @@ public:
184 params.stride = 0; 184 params.stride = 0;
185 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 185 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
186 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); 186 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
187 maxwell3d.CallMethod(0x8e3, 0x640, true); 187 maxwell3d.engine_state = Maxwell::EngineHint::None;
188 maxwell3d.CallMethod(0x8e4, 0x0, true); 188 maxwell3d.replace_table.clear();
189 maxwell3d.CallMethod(0x8e5, 0x0, true);
190 } 189 }
191 190
192private: 191private:
@@ -197,18 +196,17 @@ private:
197 const u32 base_instance = parameters[5]; 196 const u32 base_instance = parameters[5];
198 maxwell3d.regs.vertex_id_base = element_base; 197 maxwell3d.regs.vertex_id_base = element_base;
199 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 198 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
200 maxwell3d.CallMethod(0x8e3, 0x640, true); 199 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
201 maxwell3d.CallMethod(0x8e4, element_base, true); 200 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
202 maxwell3d.CallMethod(0x8e5, base_instance, true); 201 maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
203 202
204 maxwell3d.draw_manager->DrawIndex( 203 maxwell3d.draw_manager->DrawIndex(
205 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 204 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
206 parameters[3], parameters[1], element_base, base_instance, instance_count); 205 parameters[3], parameters[1], element_base, base_instance, instance_count);
207 206
208 maxwell3d.regs.vertex_id_base = 0x0; 207 maxwell3d.regs.vertex_id_base = 0x0;
209 maxwell3d.CallMethod(0x8e3, 0x640, true); 208 maxwell3d.engine_state = Maxwell::EngineHint::None;
210 maxwell3d.CallMethod(0x8e4, 0x0, true); 209 maxwell3d.replace_table.clear();
211 maxwell3d.CallMethod(0x8e5, 0x0, true);
212 } 210 }
213 211
214 u32 minimum_limit{1 << 18}; 212 u32 minimum_limit{1 << 18};
@@ -238,8 +236,7 @@ public:
238 : HLEMacroImpl(maxwell3d_) {} 236 : HLEMacroImpl(maxwell3d_) {}
239 237
240 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { 238 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
241 const auto topology = 239 const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
242 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
243 if (!IsTopologySafe(topology)) { 240 if (!IsTopologySafe(topology)) {
244 Fallback(parameters); 241 Fallback(parameters);
245 return; 242 return;
@@ -277,9 +274,6 @@ public:
277 } 274 }
278 const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); 275 const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
279 const u32 base_size = std::max(minimum_limit, estimate); 276 const u32 base_size = std::max(minimum_limit, estimate);
280
281 maxwell3d.regs.index_buffer.first = 0;
282 maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size);
283 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 277 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
284 auto& params = maxwell3d.draw_manager->GetIndirectParams(); 278 auto& params = maxwell3d.draw_manager->GetIndirectParams();
285 params.is_indexed = true; 279 params.is_indexed = true;
@@ -290,7 +284,12 @@ public:
290 params.max_draw_counts = draw_count; 284 params.max_draw_counts = draw_count;
291 params.stride = stride; 285 params.stride = stride;
292 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 286 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
293 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); 287 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
288 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
289 maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
290 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
291 maxwell3d.engine_state = Maxwell::EngineHint::None;
292 maxwell3d.replace_table.clear();
294 } 293 }
295 294
296private: 295private:
@@ -299,9 +298,8 @@ private:
299 // Clean everything. 298 // Clean everything.
300 // Clean everything. 299 // Clean everything.
301 maxwell3d.regs.vertex_id_base = 0x0; 300 maxwell3d.regs.vertex_id_base = 0x0;
302 maxwell3d.CallMethod(0x8e3, 0x640, true); 301 maxwell3d.engine_state = Maxwell::EngineHint::None;
303 maxwell3d.CallMethod(0x8e4, 0x0, true); 302 maxwell3d.replace_table.clear();
304 maxwell3d.CallMethod(0x8e5, 0x0, true);
305 }); 303 });
306 maxwell3d.RefreshParameters(); 304 maxwell3d.RefreshParameters();
307 const u32 start_indirect = parameters[0]; 305 const u32 start_indirect = parameters[0];
@@ -310,8 +308,7 @@ private:
310 // Nothing to do. 308 // Nothing to do.
311 return; 309 return;
312 } 310 }
313 const auto topology = 311 const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
314 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
315 maxwell3d.regs.draw.topology.Assign(topology); 312 maxwell3d.regs.draw.topology.Assign(topology);
316 const u32 padding = parameters[3]; 313 const u32 padding = parameters[3];
317 const std::size_t max_draws = parameters[4]; 314 const std::size_t max_draws = parameters[4];
@@ -326,9 +323,9 @@ private:
326 const u32 base_vertex = parameters[base + 3]; 323 const u32 base_vertex = parameters[base + 3];
327 const u32 base_instance = parameters[base + 4]; 324 const u32 base_instance = parameters[base + 4];
328 maxwell3d.regs.vertex_id_base = base_vertex; 325 maxwell3d.regs.vertex_id_base = base_vertex;
329 maxwell3d.CallMethod(0x8e3, 0x640, true); 326 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
330 maxwell3d.CallMethod(0x8e4, base_vertex, true); 327 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
331 maxwell3d.CallMethod(0x8e5, base_instance, true); 328 maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
332 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 329 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
333 maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], 330 maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
334 base_vertex, base_instance, parameters[base + 1]); 331 base_vertex, base_instance, parameters[base + 1]);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 8f6c51045..11e7d225e 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
577 return range_so_far; 577 return range_so_far;
578} 578}
579 579
580size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { 580size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
581 PTEKind base_kind = GetPageKind(gpu_addr); 581 PTEKind base_kind = GetPageKind(gpu_addr);
582 if (base_kind == PTEKind::INVALID) { 582 if (base_kind == PTEKind::INVALID) {
583 return 0; 583 return 0;
@@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
596 return true; 596 return true;
597 } 597 }
598 range_so_far += copy_amount; 598 range_so_far += copy_amount;
599 if (range_so_far >= max_size) {
600 result = true;
601 return true;
602 }
599 return false; 603 return false;
600 }; 604 };
601 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 605 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
605 return true; 609 return true;
606 } 610 }
607 range_so_far += copy_amount; 611 range_so_far += copy_amount;
612 if (range_so_far >= max_size) {
613 result = true;
614 return true;
615 }
608 return false; 616 return false;
609 }; 617 };
610 auto check_short_pages = [&](std::size_t page_index, std::size_t offset, 618 auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 65f6e8134..ca22520d7 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -118,7 +118,8 @@ public:
118 118
119 PTEKind GetPageKind(GPUVAddr gpu_addr) const; 119 PTEKind GetPageKind(GPUVAddr gpu_addr) const;
120 120
121 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const; 121 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
122 size_t max_size = std::numeric_limits<size_t>::max()) const;
122 123
123private: 124private:
124 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> 125 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index e62b36822..df229f41b 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
97 smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); 97 smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
98 alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); 98 alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0);
99 alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); 99 alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0);
100 app_stage.Assign(maxwell3d.engine_state);
100 101
101 for (size_t i = 0; i < regs.rt.size(); ++i) { 102 for (size_t i = 0; i < regs.rt.size(); ++i) {
102 color_formats[i] = static_cast<u8>(regs.rt[i].format); 103 color_formats[i] = static_cast<u8>(regs.rt[i].format);
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index ab79fb8f3..03bf64b57 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -197,6 +197,7 @@ struct FixedPipelineState {
197 BitField<14, 1, u32> smooth_lines; 197 BitField<14, 1, u32> smooth_lines;
198 BitField<15, 1, u32> alpha_to_coverage_enabled; 198 BitField<15, 1, u32> alpha_to_coverage_enabled;
199 BitField<16, 1, u32> alpha_to_one_enabled; 199 BitField<16, 1, u32> alpha_to_one_enabled;
200 BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
200 }; 201 };
201 std::array<u8, Maxwell::NumRenderTargets> color_formats; 202 std::array<u8, Maxwell::NumRenderTargets> color_formats;
202 203
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index e7262420c..58b955821 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
54using VideoCommon::GenericEnvironment; 54using VideoCommon::GenericEnvironment;
55using VideoCommon::GraphicsEnvironment; 55using VideoCommon::GraphicsEnvironment;
56 56
57constexpr u32 CACHE_VERSION = 8; 57constexpr u32 CACHE_VERSION = 9;
58 58
59template <typename Container> 59template <typename Container>
60auto MakeSpan(Container& container) { 60auto MakeSpan(Container& container) {
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 958810747..99d85bfb3 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
202 const u64 num_texture_types{static_cast<u64>(texture_types.size())}; 202 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
203 const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; 203 const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
204 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; 204 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
205 const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
205 206
206 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) 207 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
207 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) 208 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
208 .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), 209 .write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
209 sizeof(num_texture_pixel_formats)) 210 sizeof(num_texture_pixel_formats))
210 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) 211 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
212 .write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
213 sizeof(num_cbuf_replacement_values))
211 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) 214 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
212 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) 215 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
213 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) 216 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
@@ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
229 file.write(reinterpret_cast<const char*>(&key), sizeof(key)) 232 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
230 .write(reinterpret_cast<const char*>(&type), sizeof(type)); 233 .write(reinterpret_cast<const char*>(&type), sizeof(type));
231 } 234 }
235 for (const auto& [key, type] : cbuf_replacements) {
236 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
237 .write(reinterpret_cast<const char*>(&type), sizeof(type));
238 }
232 if (stage == Shader::Stage::Compute) { 239 if (stage == Shader::Stage::Compute) {
233 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) 240 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
234 .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); 241 .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
@@ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
318 ASSERT(local_size <= std::numeric_limits<u32>::max()); 325 ASSERT(local_size <= std::numeric_limits<u32>::max());
319 local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; 326 local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
320 texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; 327 texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
328 has_hle_engine_state =
329 maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;
321} 330}
322 331
323u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { 332u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
@@ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
331 return value; 340 return value;
332} 341}
333 342
343std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank,
344 u32 offset) {
345 if (!has_hle_engine_state) {
346 return std::nullopt;
347 }
348 const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
349 auto it = maxwell3d->replace_table.find(key);
350 if (it == maxwell3d->replace_table.end()) {
351 return std::nullopt;
352 }
353 const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) {
354 switch (name) {
355 case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex:
356 return Shader::ReplaceConstant::BaseVertex;
357 case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance:
358 return Shader::ReplaceConstant::BaseInstance;
359 default:
360 UNREACHABLE();
361 }
362 }(it->second);
363 cbuf_replacements.emplace(key, converted_value);
364 return converted_value;
365}
366
334Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { 367Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
335 const auto& regs{maxwell3d->regs}; 368 const auto& regs{maxwell3d->regs};
336 const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; 369 const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
@@ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
409 u64 num_texture_types{}; 442 u64 num_texture_types{};
410 u64 num_texture_pixel_formats{}; 443 u64 num_texture_pixel_formats{};
411 u64 num_cbuf_values{}; 444 u64 num_cbuf_values{};
445 u64 num_cbuf_replacement_values{};
412 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) 446 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
413 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) 447 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
414 .read(reinterpret_cast<char*>(&num_texture_pixel_formats), 448 .read(reinterpret_cast<char*>(&num_texture_pixel_formats),
415 sizeof(num_texture_pixel_formats)) 449 sizeof(num_texture_pixel_formats))
416 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) 450 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
451 .read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
452 sizeof(num_cbuf_replacement_values))
417 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) 453 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
418 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) 454 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
419 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) 455 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
@@ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
444 .read(reinterpret_cast<char*>(&value), sizeof(value)); 480 .read(reinterpret_cast<char*>(&value), sizeof(value));
445 cbuf_values.emplace(key, value); 481 cbuf_values.emplace(key, value);
446 } 482 }
483 for (size_t i = 0; i < num_cbuf_replacement_values; ++i) {
484 u64 key;
485 Shader::ReplaceConstant value;
486 file.read(reinterpret_cast<char*>(&key), sizeof(key))
487 .read(reinterpret_cast<char*>(&value), sizeof(value));
488 cbuf_replacements.emplace(key, value);
489 }
447 if (stage == Shader::Stage::Compute) { 490 if (stage == Shader::Stage::Compute) {
448 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) 491 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
449 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); 492 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
@@ -512,6 +555,16 @@ std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
512 return workgroup_size; 555 return workgroup_size;
513} 556}
514 557
558std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank,
559 u32 offset) {
560 const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
561 auto it = cbuf_replacements.find(key);
562 if (it == cbuf_replacements.end()) {
563 return std::nullopt;
564 }
565 return it->second;
566}
567
515void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, 568void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
516 const std::filesystem::path& filename, u32 cache_version) try { 569 const std::filesystem::path& filename, u32 cache_version) try {
517 std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); 570 std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 1342fab1e..d75987a52 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -60,6 +60,10 @@ public:
60 60
61 void Serialize(std::ofstream& file) const; 61 void Serialize(std::ofstream& file) const;
62 62
63 bool HasHLEMacroState() const override {
64 return has_hle_engine_state;
65 }
66
63protected: 67protected:
64 std::optional<u64> TryFindSize(); 68 std::optional<u64> TryFindSize();
65 69
@@ -73,6 +77,7 @@ protected:
73 std::unordered_map<u32, Shader::TextureType> texture_types; 77 std::unordered_map<u32, Shader::TextureType> texture_types;
74 std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; 78 std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
75 std::unordered_map<u64, u32> cbuf_values; 79 std::unordered_map<u64, u32> cbuf_values;
80 std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
76 81
77 u32 local_memory_size{}; 82 u32 local_memory_size{};
78 u32 texture_bound{}; 83 u32 texture_bound{};
@@ -89,6 +94,7 @@ protected:
89 u32 viewport_transform_state = 1; 94 u32 viewport_transform_state = 1;
90 95
91 bool has_unbound_instructions = false; 96 bool has_unbound_instructions = false;
97 bool has_hle_engine_state = false;
92}; 98};
93 99
94class GraphicsEnvironment final : public GenericEnvironment { 100class GraphicsEnvironment final : public GenericEnvironment {
@@ -109,6 +115,8 @@ public:
109 115
110 u32 ReadViewportTransformState() override; 116 u32 ReadViewportTransformState() override;
111 117
118 std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override;
119
112private: 120private:
113 Tegra::Engines::Maxwell3D* maxwell3d{}; 121 Tegra::Engines::Maxwell3D* maxwell3d{};
114 size_t stage_index{}; 122 size_t stage_index{};
@@ -131,6 +139,11 @@ public:
131 139
132 u32 ReadViewportTransformState() override; 140 u32 ReadViewportTransformState() override;
133 141
142 std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(
143 [[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override {
144 return std::nullopt;
145 }
146
134private: 147private:
135 Tegra::Engines::KeplerCompute* kepler_compute{}; 148 Tegra::Engines::KeplerCompute* kepler_compute{};
136}; 149};
@@ -166,6 +179,13 @@ public:
166 179
167 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; 180 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
168 181
182 [[nodiscard]] std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank,
183 u32 offset) override;
184
185 [[nodiscard]] bool HasHLEMacroState() const override {
186 return cbuf_replacements.size() != 0;
187 }
188
169 void Dump(u64 hash) override; 189 void Dump(u64 hash) override;
170 190
171private: 191private:
@@ -173,6 +193,7 @@ private:
173 std::unordered_map<u32, Shader::TextureType> texture_types; 193 std::unordered_map<u32, Shader::TextureType> texture_types;
174 std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; 194 std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
175 std::unordered_map<u64, u32> cbuf_values; 195 std::unordered_map<u64, u32> cbuf_values;
196 std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
176 std::array<u32, 3> workgroup_size{}; 197 std::array<u32, 3> workgroup_size{};
177 u32 local_memory_size{}; 198 u32 local_memory_size{};
178 u32 shared_memory_size{}; 199 u32 shared_memory_size{};