summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando S2023-01-06 10:06:45 -0500
committerGravatar GitHub2023-01-06 10:06:45 -0500
commit8b251fc3f60330f1b1311af2d17cdb1ae9874683 (patch)
tree9c9eca32c58912170c07097d885fc08672cb40a8
parentMerge pull request #9561 from liamwhite/update-dynarmic (diff)
parentRun clang-format (diff)
downloadyuzu-8b251fc3f60330f1b1311af2d17cdb1ae9874683.tar.gz
yuzu-8b251fc3f60330f1b1311af2d17cdb1ae9874683.tar.xz
yuzu-8b251fc3f60330f1b1311af2d17cdb1ae9874683.zip
Merge pull request #9535 from bylaws/master
Port over several shader-compiler fixes from skyline
Diffstat (limited to '')
m---------externals/sirit0
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp8
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp58
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp45
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp114
-rw-r--r--src/shader_recompiler/host_translate_info.h3
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/shader_recompiler/shader_info.h12
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp3
17 files changed, 195 insertions, 91 deletions
diff --git a/externals/sirit b/externals/sirit
Subproject d7ad93a88864bda94e282e95028f90b5784e4d2 Subproject ab75463999f4f3291976b079d42d52ee91eebf3
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index db9c94ce8..0cd87a48f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
321 case IR::Attribute::PositionY: 321 case IR::Attribute::PositionY:
322 case IR::Attribute::PositionZ: 322 case IR::Attribute::PositionZ:
323 case IR::Attribute::PositionW: 323 case IR::Attribute::PositionW:
324 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, 324 return ctx.OpLoad(
325 ctx.Const(element))); 325 ctx.F32[1],
326 ctx.need_input_position_indirect
327 ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
328 ctx.Const(element))
329 : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
326 case IR::Attribute::InstanceId: 330 case IR::Attribute::InstanceId:
327 if (ctx.profile.support_vertex_instance_id) { 331 if (ctx.profile.support_vertex_instance_id) {
328 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); 332 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 2c90f2368..c5db19d09 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); 58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
59} 59}
60 60
61Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { 61Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
62 const Id thirty_two{ctx.Const(32u)}; 62 const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
63 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; 63 const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
64 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; 64 return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
65 return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
66} 65}
67} // Anonymous namespace 66} // Anonymous namespace
68 67
@@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
145Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 144Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
146 Id segmentation_mask) { 145 Id segmentation_mask) {
147 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; 146 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
148 const Id thread_id{GetThreadId(ctx)}; 147 const Id thread_id{EmitLaneId(ctx)};
149 if (ctx.profile.warp_size_potentially_larger_than_guest) {
150 const Id thirty_two{ctx.Const(32u)};
151 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
152 const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
153 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
154 index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
155 clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
156 }
157 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; 148 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
158 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; 149 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
159 150
160 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; 151 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
161 const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; 152 Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
162 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 153 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
163 154
155 if (ctx.profile.warp_size_potentially_larger_than_guest) {
156 src_thread_id = AddPartitionBase(ctx, src_thread_id);
157 }
158
164 SetInBoundsFlag(inst, in_range); 159 SetInBoundsFlag(inst, in_range);
165 return SelectValue(ctx, in_range, value, src_thread_id); 160 return SelectValue(ctx, in_range, value, src_thread_id);
166} 161}
167 162
168Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 163Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
169 Id segmentation_mask) { 164 Id segmentation_mask) {
170 const Id thread_id{GetThreadId(ctx)}; 165 const Id thread_id{EmitLaneId(ctx)};
171 if (ctx.profile.warp_size_potentially_larger_than_guest) {
172 clamp = GetUpperClamp(ctx, thread_id, clamp);
173 }
174 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 166 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
175 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; 167 Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
176 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 168 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
177 169
170 if (ctx.profile.warp_size_potentially_larger_than_guest) {
171 src_thread_id = AddPartitionBase(ctx, src_thread_id);
172 }
173
178 SetInBoundsFlag(inst, in_range); 174 SetInBoundsFlag(inst, in_range);
179 return SelectValue(ctx, in_range, value, src_thread_id); 175 return SelectValue(ctx, in_range, value, src_thread_id);
180} 176}
181 177
182Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 178Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
183 Id segmentation_mask) { 179 Id segmentation_mask) {
184 const Id thread_id{GetThreadId(ctx)}; 180 const Id thread_id{EmitLaneId(ctx)};
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 clamp = GetUpperClamp(ctx, thread_id, clamp);
187 }
188 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 181 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
189 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; 182 Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
190 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 183 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
191 184
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 src_thread_id = AddPartitionBase(ctx, src_thread_id);
187 }
188
192 SetInBoundsFlag(inst, in_range); 189 SetInBoundsFlag(inst, in_range);
193 return SelectValue(ctx, in_range, value, src_thread_id); 190 return SelectValue(ctx, in_range, value, src_thread_id);
194} 191}
195 192
196Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 193Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
197 Id segmentation_mask) { 194 Id segmentation_mask) {
198 const Id thread_id{GetThreadId(ctx)}; 195 const Id thread_id{EmitLaneId(ctx)};
199 if (ctx.profile.warp_size_potentially_larger_than_guest) {
200 clamp = GetUpperClamp(ctx, thread_id, clamp);
201 }
202 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 196 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
203 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; 197 Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
204 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 198 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
205 199
200 if (ctx.profile.warp_size_potentially_larger_than_guest) {
201 src_thread_id = AddPartitionBase(ctx, src_thread_id);
202 }
203
206 SetInBoundsFlag(inst, in_range); 204 SetInBoundsFlag(inst, in_range);
207 return SelectValue(ctx, in_range, value, src_thread_id); 205 return SelectValue(ctx, in_range, value, src_thread_id);
208} 206}
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index ecb2db494..a0c155fdb 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
544 U16 = Name(TypeInt(16, false), "u16"); 544 U16 = Name(TypeInt(16, false), "u16");
545 S16 = Name(TypeInt(16, true), "s16"); 545 S16 = Name(TypeInt(16, true), "s16");
546 } 546 }
547 if (info.uses_int64) { 547 if (info.uses_int64 && profile.support_int64) {
548 AddCapability(spv::Capability::Int64); 548 AddCapability(spv::Capability::Int64);
549 U64 = Name(TypeInt(64, false), "u64"); 549 U64 = Name(TypeInt(64, false), "u64");
550 } 550 }
@@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
721 size_t label_index{0}; 721 size_t label_index{0};
722 if (info.loads.AnyComponent(IR::Attribute::PositionX)) { 722 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
723 AddLabel(labels[label_index]); 723 AddLabel(labels[label_index]);
724 const Id pointer{is_array 724 const Id pointer{[&]() {
725 ? OpAccessChain(input_f32, input_position, vertex, masked_index) 725 if (need_input_position_indirect) {
726 : OpAccessChain(input_f32, input_position, masked_index)}; 726 if (is_array)
727 return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
728 masked_index);
729 else
730 return OpAccessChain(input_f32, input_position, u32_zero_value,
731 masked_index);
732 } else {
733 if (is_array)
734 return OpAccessChain(input_f32, input_position, vertex, masked_index);
735 else
736 return OpAccessChain(input_f32, input_position, masked_index);
737 }
738 }()};
727 const Id result{OpLoad(F32[1], pointer)}; 739 const Id result{OpLoad(F32[1], pointer)};
728 OpReturnValue(result); 740 OpReturnValue(result);
729 ++label_index; 741 ++label_index;
@@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1367 Decorate(layer, spv::Decoration::Flat); 1379 Decorate(layer, spv::Decoration::Flat);
1368 } 1380 }
1369 if (loads.AnyComponent(IR::Attribute::PositionX)) { 1381 if (loads.AnyComponent(IR::Attribute::PositionX)) {
1370 const bool is_fragment{stage != Stage::Fragment}; 1382 const bool is_fragment{stage == Stage::Fragment};
1371 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; 1383 if (!is_fragment && profile.has_broken_spirv_position_input) {
1372 input_position = DefineInput(*this, F32[4], true, built_in); 1384 need_input_position_indirect = true;
1373 if (profile.support_geometry_shader_passthrough) { 1385
1374 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { 1386 const Id input_position_struct = TypeStruct(F32[4]);
1375 Decorate(input_position, spv::Decoration::PassthroughNV); 1387 input_position = DefineInput(*this, input_position_struct, true);
1388
1389 MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
1390 static_cast<unsigned>(spv::BuiltIn::Position));
1391 Decorate(input_position_struct, spv::Decoration::Block);
1392 } else {
1393 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
1394 : spv::BuiltIn::Position};
1395 input_position = DefineInput(*this, F32[4], true, built_in);
1396
1397 if (profile.support_geometry_shader_passthrough) {
1398 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
1399 Decorate(input_position, spv::Decoration::PassthroughNV);
1400 }
1376 } 1401 }
1377 } 1402 }
1378 } 1403 }
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 4414a5169..dbc5c55b9 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -280,6 +280,7 @@ public:
280 Id write_global_func_u32x2{}; 280 Id write_global_func_u32x2{};
281 Id write_global_func_u32x4{}; 281 Id write_global_func_u32x4{};
282 282
283 bool need_input_position_indirect{};
283 Id input_position{}; 284 Id input_position{};
284 std::array<Id, 32> input_generics{}; 285 std::array<Id, 32> input_generics{};
285 286
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index ac159d24b..a3b99e24d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
171 } 171 }
172 return mapping; 172 return mapping;
173} 173}
174
175void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
176 const Shader::VaryingState& passthrough_mask,
177 bool passthrough_position,
178 std::optional<IR::Attribute> passthrough_layer_attr) {
179 for (u32 i = 0; i < program.output_vertices; i++) {
180 // Assign generics from input
181 for (u32 j = 0; j < 32; j++) {
182 if (!passthrough_mask.Generic(j)) {
183 continue;
184 }
185
186 const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
187 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
188 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
189 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
190 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
191 }
192
193 if (passthrough_position) {
194 // Assign position from input
195 const IR::Attribute attr = IR::Attribute::PositionX;
196 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
197 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
198 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
199 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
200 }
201
202 if (passthrough_layer_attr) {
203 // Assign layer
204 ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
205 ir.Imm32(0));
206 }
207
208 // Emit vertex
209 ir.EmitVertex(ir.Imm32(0));
210 }
211 ir.EndPrimitive(ir.Imm32(0));
212}
213
214u32 GetOutputTopologyVertices(OutputTopology output_topology) {
215 switch (output_topology) {
216 case OutputTopology::PointList:
217 return 1;
218 case OutputTopology::LineStrip:
219 return 2;
220 default:
221 return 3;
222 }
223}
224
225void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
226 for (IR::Block* const block : program.blocks) {
227 for (IR::Inst& inst : block->Instructions()) {
228 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
229 IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
230 EmitGeometryPassthrough(
231 ir, program, program.info.passthrough,
232 program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
233 }
234 }
235 }
236}
237
174} // Anonymous namespace 238} // Anonymous namespace
175 239
176IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 240IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
@@ -198,6 +262,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
198 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { 262 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
199 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; 263 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
200 } 264 }
265
266 if (!host_info.support_geometry_shader_passthrough) {
267 program.output_vertices = GetOutputTopologyVertices(program.output_topology);
268 LowerGeometryPassthrough(program, host_info);
269 }
201 } 270 }
202 break; 271 break;
203 } 272 }
@@ -223,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
223 292
224 Optimization::PositionPass(env, program); 293 Optimization::PositionPass(env, program);
225 294
226 Optimization::GlobalMemoryToStorageBufferPass(program); 295 Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
227 Optimization::TexturePass(env, program, host_info); 296 Optimization::TexturePass(env, program, host_info);
228 297
229 if (Settings::values.resolution_info.active) { 298 if (Settings::values.resolution_info.active) {
@@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
342 IR::Program program; 411 IR::Program program;
343 program.stage = Stage::Geometry; 412 program.stage = Stage::Geometry;
344 program.output_topology = output_topology; 413 program.output_topology = output_topology;
345 switch (output_topology) { 414 program.output_vertices = GetOutputTopologyVertices(output_topology);
346 case OutputTopology::PointList:
347 program.output_vertices = 1;
348 break;
349 case OutputTopology::LineStrip:
350 program.output_vertices = 2;
351 break;
352 default:
353 program.output_vertices = 3;
354 break;
355 }
356 415
357 program.is_geometry_passthrough = false; 416 program.is_geometry_passthrough = false;
358 program.info.loads.mask = source_program.info.stores.mask; 417 program.info.loads.mask = source_program.info.stores.mask;
@@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
366 node.data.block = current_block; 425 node.data.block = current_block;
367 426
368 IR::IREmitter ir{*current_block}; 427 IR::IREmitter ir{*current_block};
369 for (u32 i = 0; i < program.output_vertices; i++) { 428 EmitGeometryPassthrough(ir, program, program.info.stores, true,
370 // Assign generics from input 429 source_program.info.emulated_layer);
371 for (u32 j = 0; j < 32; j++) {
372 if (!program.info.stores.Generic(j)) {
373 continue;
374 }
375
376 const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
377 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
378 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
379 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
380 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
381 }
382
383 // Assign position from input
384 const IR::Attribute attr = IR::Attribute::PositionX;
385 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
386 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
387 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
388 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
389
390 // Assign layer
391 ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
392 ir.Imm32(0));
393
394 // Emit vertex
395 ir.EmitVertex(ir.Imm32(0));
396 }
397 ir.EndPrimitive(ir.Imm32(0));
398 430
399 IR::Block* return_block{block_pool.Create(inst_pool)}; 431 IR::Block* return_block{block_pool.Create(inst_pool)};
400 IR::IREmitter{*return_block}.Epilogue(); 432 IR::IREmitter{*return_block}.Epilogue();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index d5d279554..55fc48768 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -15,6 +15,9 @@ struct HostTranslateInfo {
15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered 15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers 16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS 17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
18 u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
19 bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
20 ///< passthrough shaders
18}; 21};
19 22
20} // namespace Shader 23} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 336338e62..9101722ba 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
11#include "shader_recompiler/frontend/ir/breadth_first_search.h" 11#include "shader_recompiler/frontend/ir/breadth_first_search.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h" 13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/host_translate_info.h"
14#include "shader_recompiler/ir_opt/passes.h" 15#include "shader_recompiler/ir_opt/passes.h"
15 16
16namespace Shader::Optimization { 17namespace Shader::Optimization {
@@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
402} 403}
403 404
404/// Returns the offset in indices (not bytes) for an equivalent storage instruction 405/// Returns the offset in indices (not bytes) for an equivalent storage instruction
405IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { 406IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
406 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; 407 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
407 IR::U32 offset; 408 IR::U32 offset;
408 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { 409 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
415 } 416 }
416 // Subtract the least significant 32 bits from the guest offset. The result is the storage 417 // Subtract the least significant 32 bits from the guest offset. The result is the storage
417 // buffer offset in bytes. 418 // buffer offset in bytes.
418 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; 419 IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
420
421 // Align the offset base to match the host alignment requirements
422 low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
419 return ir.ISub(offset, low_cbuf); 423 return ir.ISub(offset, low_cbuf);
420} 424}
421 425
@@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
510} 514}
511} // Anonymous namespace 515} // Anonymous namespace
512 516
513void GlobalMemoryToStorageBufferPass(IR::Program& program) { 517void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
514 StorageInfo info; 518 StorageInfo info;
515 for (IR::Block* const block : program.post_order_blocks) { 519 for (IR::Block* const block : program.post_order_blocks) {
516 for (IR::Inst& inst : block->Instructions()) { 520 for (IR::Inst& inst : block->Instructions()) {
@@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
534 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; 538 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
535 IR::Block* const block{storage_inst.block}; 539 IR::Block* const block{storage_inst.block};
536 IR::Inst* const inst{storage_inst.inst}; 540 IR::Inst* const inst{storage_inst.inst};
537 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; 541 const IR::U32 offset{
542 StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
538 Replace(*block, *inst, index, offset); 543 Replace(*block, *inst, index, offset);
539 } 544 }
540} 545}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 1f8f2ba95..4ffad1172 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -15,7 +15,7 @@ namespace Shader::Optimization {
15void CollectShaderInfoPass(Environment& env, IR::Program& program); 15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(Environment& env, IR::Program& program); 16void ConstantPropagationPass(Environment& env, IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program); 17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program); 18void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
19void IdentityRemovalPass(IR::Program& program); 19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program); 20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program); 21void LowerInt64ToInt32(IR::Program& program);
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index b8841a536..253e0d0bd 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -55,6 +55,8 @@ struct Profile {
55 55
56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead 56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead
57 bool has_broken_spirv_clamp{}; 57 bool has_broken_spirv_clamp{};
58 /// The Position builtin needs to be wrapped in a struct when used as an input
59 bool has_broken_spirv_position_input{};
58 /// Offset image operands with an unsigned type do not work 60 /// Offset image operands with an unsigned type do not work
59 bool has_broken_unsigned_image_offsets{}; 61 bool has_broken_unsigned_image_offsets{};
60 /// Signed instructions with unsigned data types are misinterpreted 62 /// Signed instructions with unsigned data types are misinterpreted
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 44236b6b1..f93181e1e 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -65,6 +65,8 @@ enum class Interpolation {
65struct ConstantBufferDescriptor { 65struct ConstantBufferDescriptor {
66 u32 index; 66 u32 index;
67 u32 count; 67 u32 count;
68
69 auto operator<=>(const ConstantBufferDescriptor&) const = default;
68}; 70};
69 71
70struct StorageBufferDescriptor { 72struct StorageBufferDescriptor {
@@ -72,6 +74,8 @@ struct StorageBufferDescriptor {
72 u32 cbuf_offset; 74 u32 cbuf_offset;
73 u32 count; 75 u32 count;
74 bool is_written; 76 bool is_written;
77
78 auto operator<=>(const StorageBufferDescriptor&) const = default;
75}; 79};
76 80
77struct TextureBufferDescriptor { 81struct TextureBufferDescriptor {
@@ -84,6 +88,8 @@ struct TextureBufferDescriptor {
84 u32 secondary_shift_left; 88 u32 secondary_shift_left;
85 u32 count; 89 u32 count;
86 u32 size_shift; 90 u32 size_shift;
91
92 auto operator<=>(const TextureBufferDescriptor&) const = default;
87}; 93};
88using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; 94using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
89 95
@@ -95,6 +101,8 @@ struct ImageBufferDescriptor {
95 u32 cbuf_offset; 101 u32 cbuf_offset;
96 u32 count; 102 u32 count;
97 u32 size_shift; 103 u32 size_shift;
104
105 auto operator<=>(const ImageBufferDescriptor&) const = default;
98}; 106};
99using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; 107using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
100 108
@@ -110,6 +118,8 @@ struct TextureDescriptor {
110 u32 secondary_shift_left; 118 u32 secondary_shift_left;
111 u32 count; 119 u32 count;
112 u32 size_shift; 120 u32 size_shift;
121
122 auto operator<=>(const TextureDescriptor&) const = default;
113}; 123};
114using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; 124using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
115 125
@@ -122,6 +132,8 @@ struct ImageDescriptor {
122 u32 cbuf_offset; 132 u32 cbuf_offset;
123 u32 count; 133 u32 count;
124 u32 size_shift; 134 u32 size_shift;
135
136 auto operator<=>(const ImageDescriptor&) const = default;
125}; 137};
126using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; 138using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
127 139
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 06fd40851..627917ab6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1938,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1938 bool is_written) const { 1938 bool is_written) const {
1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); 1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); 1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
1941 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1941 const u32 alignment = runtime.GetStorageBufferAlignment();
1942
1943 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1944 const u32 aligned_size =
1945 Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
1946
1947 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1942 if (!cpu_addr || size == 0) { 1948 if (!cpu_addr || size == 0) {
1943 return NULL_BINDING; 1949 return NULL_BINDING;
1944 } 1950 }
1945 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1951
1952 const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
1946 const Binding binding{ 1953 const Binding binding{
1947 .cpu_addr = *cpu_addr, 1954 .cpu_addr = *cpu_addr,
1948 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1955 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
1949 .buffer_id = BufferId{}, 1956 .buffer_id = BufferId{},
1950 }; 1957 };
1951 return binding; 1958 return binding;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a8c3f8b67..bb1962073 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -160,6 +160,10 @@ public:
160 return device.CanReportMemoryUsage(); 160 return device.CanReportMemoryUsage();
161 } 161 }
162 162
163 u32 GetStorageBufferAlignment() const {
164 return static_cast<u32>(device.GetShaderStorageBufferAlignment());
165 }
166
163private: 167private:
164 static constexpr std::array PABO_LUT{ 168 static constexpr std::array PABO_LUT{
165 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 169 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 03b6314ff..7dd854e0f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
236 .needs_demote_reorder = device.IsAmd(), 236 .needs_demote_reorder = device.IsAmd(),
237 .support_snorm_render_buffer = false, 237 .support_snorm_render_buffer = false,
238 .support_viewport_index_layer = device.HasVertexViewportLayer(), 238 .support_viewport_index_layer = device.HasVertexViewportLayer(),
239 .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
240 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
239 } { 241 } {
240 if (use_asynchronous_shaders) { 242 if (use_asynchronous_shaders) {
241 workers = CreateWorkers(); 243 workers = CreateWorkers();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index b0153a502..1cfb4c2ff 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -330,6 +330,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
330 return device.CanReportMemoryUsage(); 330 return device.CanReportMemoryUsage();
331} 331}
332 332
333u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
334 return static_cast<u32>(device.GetStorageBufferAlignment());
335}
336
333void BufferCacheRuntime::Finish() { 337void BufferCacheRuntime::Finish() {
334 scheduler.Finish(); 338 scheduler.Finish();
335} 339}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 183b33632..06539c733 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -73,6 +73,8 @@ public:
73 73
74 bool CanReportMemoryUsage() const; 74 bool CanReportMemoryUsage() const;
75 75
76 u32 GetStorageBufferAlignment() const;
77
76 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 78 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
77 79
78 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 80 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 67e5bc648..013b42cf8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -331,6 +331,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
331 .need_declared_frag_colors = false, 331 .need_declared_frag_colors = false,
332 332
333 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, 333 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
334 .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
334 .has_broken_unsigned_image_offsets = false, 335 .has_broken_unsigned_image_offsets = false,
335 .has_broken_signed_operations = false, 336 .has_broken_signed_operations = false,
336 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, 337 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
@@ -343,6 +344,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
343 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, 344 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
344 .support_snorm_render_buffer = true, 345 .support_snorm_render_buffer = true,
345 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), 346 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
347 .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
348 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
346 }; 349 };
347 350
348 if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { 351 if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {