diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 4 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 3 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/rescaling_pass.cpp | 29 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 3 | ||||
| -rw-r--r-- | src/yuzu_cmd/default_ini.h | 6 |
7 files changed, 113 insertions, 9 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 248ad3ced..b22725584 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 212 | } | 212 | } |
| 213 | Optimization::SsaRewritePass(program); | 213 | Optimization::SsaRewritePass(program); |
| 214 | 214 | ||
| 215 | Optimization::ConstantPropagationPass(program); | ||
| 216 | |||
| 215 | Optimization::GlobalMemoryToStorageBufferPass(program); | 217 | Optimization::GlobalMemoryToStorageBufferPass(program); |
| 216 | Optimization::TexturePass(env, program); | 218 | Optimization::TexturePass(env, program); |
| 217 | 219 | ||
| 218 | Optimization::ConstantPropagationPass(program); | ||
| 219 | |||
| 220 | if (Settings::values.resolution_info.active) { | 220 | if (Settings::values.resolution_info.active) { |
| 221 | Optimization::RescalingPass(program); | 221 | Optimization::RescalingPass(program); |
| 222 | } | 222 | } |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 38592afd0..ddf497e32 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { | |||
| 334 | /// Tries to track the storage buffer address used by a global memory instruction | 334 | /// Tries to track the storage buffer address used by a global memory instruction |
| 335 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | 335 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { |
| 336 | const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { | 336 | const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { |
| 337 | if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { | 337 | if (inst->GetOpcode() != IR::Opcode::GetCbufU32 && |
| 338 | inst->GetOpcode() != IR::Opcode::GetCbufU32x2) { | ||
| 338 | return std::nullopt; | 339 | return std::nullopt; |
| 339 | } | 340 | } |
| 340 | const IR::Value index{inst->Arg(0)}; | 341 | const IR::Value index{inst->Arg(0)}; |
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index c28500dd1..496d4667e 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp | |||
| @@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s | |||
| 183 | } | 183 | } |
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, | ||
| 187 | size_t index) { | ||
| 188 | const IR::Value composite{inst.Arg(index)}; | ||
| 189 | if (composite.IsEmpty()) { | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 193 | const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; | ||
| 194 | const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; | ||
| 195 | switch (info.type) { | ||
| 196 | case TextureType::ColorArray2D: | ||
| 197 | case TextureType::Color2D: | ||
| 198 | inst.SetArg(index, ir.CompositeConstruct(x, y)); | ||
| 199 | break; | ||
| 200 | case TextureType::Color1D: | ||
| 201 | case TextureType::ColorArray1D: | ||
| 202 | case TextureType::Color3D: | ||
| 203 | case TextureType::ColorCube: | ||
| 204 | case TextureType::ColorArrayCube: | ||
| 205 | case TextureType::Buffer: | ||
| 206 | // Nothing to patch here | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | } | ||
| 210 | |||
| 186 | void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { | 211 | void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { |
| 187 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 212 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 188 | const IR::Value coord{inst.Arg(1)}; | 213 | const IR::Value coord{inst.Arg(1)}; |
| @@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { | |||
| 220 | const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; | 245 | const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; |
| 221 | SubScaleCoord(ir, inst, is_scaled); | 246 | SubScaleCoord(ir, inst, is_scaled); |
| 222 | // Scale ImageFetch offset | 247 | // Scale ImageFetch offset |
| 223 | ScaleIntegerComposite(ir, inst, is_scaled, 2); | 248 | ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); |
| 224 | } | 249 | } |
| 225 | 250 | ||
| 226 | void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { | 251 | void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { |
| @@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { | |||
| 242 | const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; | 267 | const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; |
| 243 | ScaleIntegerComposite(ir, inst, is_scaled, 1); | 268 | ScaleIntegerComposite(ir, inst, is_scaled, 1); |
| 244 | // Scale ImageFetch offset | 269 | // Scale ImageFetch offset |
| 245 | ScaleIntegerComposite(ir, inst, is_scaled, 2); | 270 | ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); |
| 246 | } | 271 | } |
| 247 | 272 | ||
| 248 | void PatchImageRead(IR::Block& block, IR::Inst& inst) { | 273 | void PatchImageRead(IR::Block& block, IR::Inst& inst) { |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ba19d1ca2..54a902f56 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/core_timing.h" | 9 | #include "core/core_timing.h" |
| 10 | #include "video_core/dirty_flags.h" | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/gpu.h" | 12 | #include "video_core/gpu.h" |
| 12 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| @@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 208 | return ProcessCBBind(4); | 209 | return ProcessCBBind(4); |
| 209 | case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): | 210 | case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): |
| 210 | return DrawArrays(); | 211 | return DrawArrays(); |
| 212 | case MAXWELL3D_REG_INDEX(small_index): | ||
| 213 | regs.index_array.count = regs.small_index.count; | ||
| 214 | regs.index_array.first = regs.small_index.first; | ||
| 215 | dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||
| 216 | return DrawArrays(); | ||
| 217 | case MAXWELL3D_REG_INDEX(topology_override): | ||
| 218 | use_topology_override = true; | ||
| 219 | return; | ||
| 211 | case MAXWELL3D_REG_INDEX(clear_buffers): | 220 | case MAXWELL3D_REG_INDEX(clear_buffers): |
| 212 | return ProcessClearBuffers(); | 221 | return ProcessClearBuffers(); |
| 213 | case MAXWELL3D_REG_INDEX(query.query_get): | 222 | case MAXWELL3D_REG_INDEX(query.query_get): |
| @@ -352,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { | |||
| 352 | } | 361 | } |
| 353 | } | 362 | } |
| 354 | 363 | ||
| 364 | void Maxwell3D::ProcessTopologyOverride() { | ||
| 365 | using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; | ||
| 366 | using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; | ||
| 367 | |||
| 368 | PrimitiveTopology topology{}; | ||
| 369 | |||
| 370 | switch (regs.topology_override) { | ||
| 371 | case PrimitiveTopologyOverride::None: | ||
| 372 | topology = regs.draw.topology; | ||
| 373 | break; | ||
| 374 | case PrimitiveTopologyOverride::Points: | ||
| 375 | topology = PrimitiveTopology::Points; | ||
| 376 | break; | ||
| 377 | case PrimitiveTopologyOverride::Lines: | ||
| 378 | topology = PrimitiveTopology::Lines; | ||
| 379 | break; | ||
| 380 | case PrimitiveTopologyOverride::LineStrip: | ||
| 381 | topology = PrimitiveTopology::LineStrip; | ||
| 382 | break; | ||
| 383 | default: | ||
| 384 | topology = static_cast<PrimitiveTopology>(regs.topology_override); | ||
| 385 | break; | ||
| 386 | } | ||
| 387 | |||
| 388 | if (use_topology_override) { | ||
| 389 | regs.draw.topology.Assign(topology); | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 355 | void Maxwell3D::FlushMMEInlineDraw() { | 393 | void Maxwell3D::FlushMMEInlineDraw() { |
| 356 | LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), | 394 | LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), |
| 357 | regs.vertex_buffer.count); | 395 | regs.vertex_buffer.count); |
| @@ -362,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() { | |||
| 362 | ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, | 400 | ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, |
| 363 | "Illegal combination of instancing parameters"); | 401 | "Illegal combination of instancing parameters"); |
| 364 | 402 | ||
| 403 | ProcessTopologyOverride(); | ||
| 404 | |||
| 365 | const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; | 405 | const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; |
| 366 | if (ShouldExecute()) { | 406 | if (ShouldExecute()) { |
| 367 | rasterizer->Draw(is_indexed, true); | 407 | rasterizer->Draw(is_indexed, true); |
| @@ -521,6 +561,8 @@ void Maxwell3D::DrawArrays() { | |||
| 521 | ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, | 561 | ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, |
| 522 | "Illegal combination of instancing parameters"); | 562 | "Illegal combination of instancing parameters"); |
| 523 | 563 | ||
| 564 | ProcessTopologyOverride(); | ||
| 565 | |||
| 524 | if (regs.draw.instance_next) { | 566 | if (regs.draw.instance_next) { |
| 525 | // Increment the current instance *before* drawing. | 567 | // Increment the current instance *before* drawing. |
| 526 | state.current_instance += 1; | 568 | state.current_instance += 1; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 38d9b6660..357a74c70 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -367,6 +367,22 @@ public: | |||
| 367 | Patches = 0xe, | 367 | Patches = 0xe, |
| 368 | }; | 368 | }; |
| 369 | 369 | ||
| 370 | // Constants as from NVC0_3D_UNK1970_D3D | ||
| 371 | // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598 | ||
| 372 | enum class PrimitiveTopologyOverride : u32 { | ||
| 373 | None = 0x0, | ||
| 374 | Points = 0x1, | ||
| 375 | Lines = 0x2, | ||
| 376 | LineStrip = 0x3, | ||
| 377 | Triangles = 0x4, | ||
| 378 | TriangleStrip = 0x5, | ||
| 379 | LinesAdjacency = 0xa, | ||
| 380 | LineStripAdjacency = 0xb, | ||
| 381 | TrianglesAdjacency = 0xc, | ||
| 382 | TriangleStripAdjacency = 0xd, | ||
| 383 | Patches = 0xe, | ||
| 384 | }; | ||
| 385 | |||
| 370 | enum class IndexFormat : u32 { | 386 | enum class IndexFormat : u32 { |
| 371 | UnsignedByte = 0x0, | 387 | UnsignedByte = 0x0, |
| 372 | UnsignedShort = 0x1, | 388 | UnsignedShort = 0x1, |
| @@ -1200,7 +1216,12 @@ public: | |||
| 1200 | } | 1216 | } |
| 1201 | } index_array; | 1217 | } index_array; |
| 1202 | 1218 | ||
| 1203 | INSERT_PADDING_WORDS_NOINIT(0x7); | 1219 | union { |
| 1220 | BitField<0, 16, u32> first; | ||
| 1221 | BitField<16, 16, u32> count; | ||
| 1222 | } small_index; | ||
| 1223 | |||
| 1224 | INSERT_PADDING_WORDS_NOINIT(0x6); | ||
| 1204 | 1225 | ||
| 1205 | INSERT_PADDING_WORDS_NOINIT(0x1F); | 1226 | INSERT_PADDING_WORDS_NOINIT(0x1F); |
| 1206 | 1227 | ||
| @@ -1244,7 +1265,11 @@ public: | |||
| 1244 | BitField<11, 1, u32> depth_clamp_disabled; | 1265 | BitField<11, 1, u32> depth_clamp_disabled; |
| 1245 | } view_volume_clip_control; | 1266 | } view_volume_clip_control; |
| 1246 | 1267 | ||
| 1247 | INSERT_PADDING_WORDS_NOINIT(0x1F); | 1268 | INSERT_PADDING_WORDS_NOINIT(0xC); |
| 1269 | |||
| 1270 | PrimitiveTopologyOverride topology_override; | ||
| 1271 | |||
| 1272 | INSERT_PADDING_WORDS_NOINIT(0x12); | ||
| 1248 | 1273 | ||
| 1249 | u32 depth_bounds_enable; | 1274 | u32 depth_bounds_enable; |
| 1250 | 1275 | ||
| @@ -1529,6 +1554,9 @@ private: | |||
| 1529 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. | 1554 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. |
| 1530 | void DrawArrays(); | 1555 | void DrawArrays(); |
| 1531 | 1556 | ||
| 1557 | /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) | ||
| 1558 | void ProcessTopologyOverride(); | ||
| 1559 | |||
| 1532 | // Handles a instance drawcall from MME | 1560 | // Handles a instance drawcall from MME |
| 1533 | void StepInstance(MMEDrawMode expected_mode, u32 count); | 1561 | void StepInstance(MMEDrawMode expected_mode, u32 count); |
| 1534 | 1562 | ||
| @@ -1556,6 +1584,7 @@ private: | |||
| 1556 | Upload::State upload_state; | 1584 | Upload::State upload_state; |
| 1557 | 1585 | ||
| 1558 | bool execute_on{true}; | 1586 | bool execute_on{true}; |
| 1587 | bool use_topology_override{false}; | ||
| 1559 | }; | 1588 | }; |
| 1560 | 1589 | ||
| 1561 | #define ASSERT_REG_POSITION(field_name, position) \ | 1590 | #define ASSERT_REG_POSITION(field_name, position) \ |
| @@ -1672,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585); | |||
| 1672 | ASSERT_REG_POSITION(primitive_restart, 0x591); | 1701 | ASSERT_REG_POSITION(primitive_restart, 0x591); |
| 1673 | ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); | 1702 | ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); |
| 1674 | ASSERT_REG_POSITION(index_array, 0x5F2); | 1703 | ASSERT_REG_POSITION(index_array, 0x5F2); |
| 1704 | ASSERT_REG_POSITION(small_index, 0x5F9); | ||
| 1675 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); | 1705 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); |
| 1676 | ASSERT_REG_POSITION(instanced_arrays, 0x620); | 1706 | ASSERT_REG_POSITION(instanced_arrays, 0x620); |
| 1677 | ASSERT_REG_POSITION(vp_point_size, 0x644); | 1707 | ASSERT_REG_POSITION(vp_point_size, 0x644); |
| @@ -1681,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648); | |||
| 1681 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); | 1711 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); |
| 1682 | ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); | 1712 | ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); |
| 1683 | ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); | 1713 | ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); |
| 1714 | ASSERT_REG_POSITION(topology_override, 0x65C); | ||
| 1684 | ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); | 1715 | ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); |
| 1685 | ASSERT_REG_POSITION(logic_op, 0x671); | 1716 | ASSERT_REG_POSITION(logic_op, 0x671); |
| 1686 | ASSERT_REG_POSITION(clear_buffers, 0x674); | 1717 | ASSERT_REG_POSITION(clear_buffers, 0x674); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 0f62779de..ca6019a3a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im | |||
| 1067 | } | 1067 | } |
| 1068 | break; | 1068 | break; |
| 1069 | case PixelFormat::A8B8G8R8_UNORM: | 1069 | case PixelFormat::A8B8G8R8_UNORM: |
| 1070 | if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { | 1070 | if (src_view.format == PixelFormat::S8_UINT_D24_UNORM || |
| 1071 | src_view.format == PixelFormat::D24_UNORM_S8_UINT) { | ||
| 1071 | return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); | 1072 | return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); |
| 1072 | } | 1073 | } |
| 1073 | break; | 1074 | break; |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 3ac1440c9..34782c378 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -124,7 +124,11 @@ keyboard_enabled = | |||
| 124 | [Core] | 124 | [Core] |
| 125 | # Whether to use multi-core for CPU emulation | 125 | # Whether to use multi-core for CPU emulation |
| 126 | # 0: Disabled, 1 (default): Enabled | 126 | # 0: Disabled, 1 (default): Enabled |
| 127 | use_multi_core= | 127 | use_multi_core = |
| 128 | |||
| 129 | # Enable extended guest system memory layout (6GB DRAM) | ||
| 130 | # 0 (default): Disabled, 1: Enabled | ||
| 131 | use_extended_memory_layout = | ||
| 128 | 132 | ||
| 129 | [Cpu] | 133 | [Cpu] |
| 130 | # Adjusts various optimizations. | 134 | # Adjusts various optimizations. |