diff options
20 files changed, 128 insertions, 36 deletions
diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 1311e66a9..123b3da7e 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp | |||
| @@ -39,7 +39,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | using namespace Common::Literals; | 41 | using namespace Common::Literals; |
| 42 | constexpr u32 StackSize = 32_KiB; | 42 | constexpr u32 StackSize = 128_KiB; |
| 43 | 43 | ||
| 44 | } // namespace | 44 | } // namespace |
| 45 | 45 | ||
diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index 8e81c66a5..def888d15 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | #include "common/bit_cast.h" | 5 | #include "common/bit_cast.h" |
| 6 | #include "core/arm/nce/interpreter_visitor.h" | 6 | #include "core/arm/nce/interpreter_visitor.h" |
| 7 | 7 | ||
| 8 | #include <dynarmic/frontend/A64/decoder/a64.h> | ||
| 9 | |||
| 10 | namespace Core { | 8 | namespace Core { |
| 11 | 9 | ||
| 12 | template <u32 BitSize> | 10 | template <u32 BitSize> |
| @@ -249,6 +247,7 @@ bool InterpreterVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) { | |||
| 249 | return false; | 247 | return false; |
| 250 | } | 248 | } |
| 251 | 249 | ||
| 250 | // Size in bytes | ||
| 252 | const u64 size = 4 << opc.ZeroExtend(); | 251 | const u64 size = 4 << opc.ZeroExtend(); |
| 253 | const u64 offset = imm19.SignExtend<u64>() << 2; | 252 | const u64 offset = imm19.SignExtend<u64>() << 2; |
| 254 | const u64 address = this->GetPc() + offset; | 253 | const u64 address = this->GetPc() + offset; |
| @@ -530,7 +529,7 @@ bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale, | |||
| 530 | } | 529 | } |
| 531 | case MemOp::Load: { | 530 | case MemOp::Load: { |
| 532 | u128 data{}; | 531 | u128 data{}; |
| 533 | m_memory.ReadBlock(address, &data, datasize); | 532 | m_memory.ReadBlock(address, &data, datasize / 8); |
| 534 | this->SetVec(Vt, data); | 533 | this->SetVec(Vt, data); |
| 535 | break; | 534 | break; |
| 536 | } | 535 | } |
diff --git a/src/core/arm/nce/visitor_base.h b/src/core/arm/nce/visitor_base.h index 8fb032912..6a2be3d9b 100644 --- a/src/core/arm/nce/visitor_base.h +++ b/src/core/arm/nce/visitor_base.h | |||
| @@ -4,9 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #pragma GCC diagnostic push | ||
| 8 | #pragma GCC diagnostic ignored "-Wshadow" | ||
| 9 | |||
| 7 | #include <dynarmic/frontend/A64/a64_types.h> | 10 | #include <dynarmic/frontend/A64/a64_types.h> |
| 11 | #include <dynarmic/frontend/A64/decoder/a64.h> | ||
| 8 | #include <dynarmic/frontend/imm.h> | 12 | #include <dynarmic/frontend/imm.h> |
| 9 | 13 | ||
| 14 | #pragma GCC diagnostic pop | ||
| 15 | |||
| 10 | namespace Core { | 16 | namespace Core { |
| 11 | 17 | ||
| 12 | class VisitorBase { | 18 | class VisitorBase { |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e5a78a914..feca5105f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -74,6 +74,11 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | |||
| 74 | case IR::Attribute::ClipDistance7: { | 74 | case IR::Attribute::ClipDistance7: { |
| 75 | const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; | 75 | const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; |
| 76 | const u32 index{static_cast<u32>(attr) - base}; | 76 | const u32 index{static_cast<u32>(attr) - base}; |
| 77 | if (index >= ctx.profile.max_user_clip_distances) { | ||
| 78 | LOG_WARNING(Shader, "Ignoring clip distance store {} >= {} supported", index, | ||
| 79 | ctx.profile.max_user_clip_distances); | ||
| 80 | return std::nullopt; | ||
| 81 | } | ||
| 77 | const Id clip_num{ctx.Const(index)}; | 82 | const Id clip_num{ctx.Const(index)}; |
| 78 | return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); | 83 | return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); |
| 79 | } | 84 | } |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 3350f1f85..2abc21a17 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -1528,7 +1528,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { | |||
| 1528 | if (stage == Stage::Fragment) { | 1528 | if (stage == Stage::Fragment) { |
| 1529 | throw NotImplementedException("Storing ClipDistance in fragment stage"); | 1529 | throw NotImplementedException("Storing ClipDistance in fragment stage"); |
| 1530 | } | 1530 | } |
| 1531 | const Id type{TypeArray(F32[1], Const(8U))}; | 1531 | const Id type{TypeArray( |
| 1532 | F32[1], Const(std::min(info.used_clip_distances, profile.max_user_clip_distances)))}; | ||
| 1532 | clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); | 1533 | clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); |
| 1533 | } | 1534 | } |
| 1534 | if (info.stores[IR::Attribute::Layer] && | 1535 | if (info.stores[IR::Attribute::Layer] && |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 70292686f..cb82a326c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -913,7 +913,11 @@ void GatherInfoFromHeader(Environment& env, Info& info) { | |||
| 913 | } | 913 | } |
| 914 | for (size_t index = 0; index < 8; ++index) { | 914 | for (size_t index = 0; index < 8; ++index) { |
| 915 | const u16 mask{header.vtg.omap_systemc.clip_distances}; | 915 | const u16 mask{header.vtg.omap_systemc.clip_distances}; |
| 916 | info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); | 916 | const bool used{((mask >> index) & 1) != 0}; |
| 917 | info.stores.Set(IR::Attribute::ClipDistance0 + index, used); | ||
| 918 | if (used) { | ||
| 919 | info.used_clip_distances = static_cast<u32>(index) + 1; | ||
| 920 | } | ||
| 917 | } | 921 | } |
| 918 | info.stores.Set(IR::Attribute::PrimitiveId, | 922 | info.stores.Set(IR::Attribute::PrimitiveId, |
| 919 | header.vtg.omap_systemb.primitive_array_id != 0); | 923 | header.vtg.omap_systemb.primitive_array_id != 0); |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 66901a965..7578d41cc 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -87,6 +87,8 @@ struct Profile { | |||
| 87 | bool has_broken_robust{}; | 87 | bool has_broken_robust{}; |
| 88 | 88 | ||
| 89 | u64 min_ssbo_alignment{}; | 89 | u64 min_ssbo_alignment{}; |
| 90 | |||
| 91 | u32 max_user_clip_distances{}; | ||
| 90 | }; | 92 | }; |
| 91 | 93 | ||
| 92 | } // namespace Shader | 94 | } // namespace Shader |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index b4b4afd37..1419b8fe7 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -227,6 +227,8 @@ struct Info { | |||
| 227 | bool requires_layer_emulation{}; | 227 | bool requires_layer_emulation{}; |
| 228 | IR::Attribute emulated_layer{}; | 228 | IR::Attribute emulated_layer{}; |
| 229 | 229 | ||
| 230 | u32 used_clip_distances{}; | ||
| 231 | |||
| 230 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | 232 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> |
| 231 | constant_buffer_descriptors; | 233 | constant_buffer_descriptors; |
| 232 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | 234 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index a71866b75..b787b6994 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -58,6 +58,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast | |||
| 58 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | 58 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); |
| 59 | } | 59 | } |
| 60 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | 60 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); |
| 61 | if (runtime.has_unified_vertex_buffers) { | ||
| 62 | glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | ||
| 63 | } | ||
| 61 | } | 64 | } |
| 62 | 65 | ||
| 63 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { | 66 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { |
| @@ -109,6 +112,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, | |||
| 109 | : device{device_}, staging_buffer_pool{staging_buffer_pool_}, | 112 | : device{device_}, staging_buffer_pool{staging_buffer_pool_}, |
| 110 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | 113 | has_fast_buffer_sub_data{device.HasFastBufferSubData()}, |
| 111 | use_assembly_shaders{device.UseAssemblyShaders()}, | 114 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| 115 | has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, | ||
| 112 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { | 116 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { |
| 113 | GLint gl_max_attributes; | 117 | GLint gl_max_attributes; |
| 114 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); | 118 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); |
| @@ -210,8 +214,14 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz | |||
| 210 | } | 214 | } |
| 211 | 215 | ||
| 212 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { | 216 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { |
| 213 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); | 217 | if (has_unified_vertex_buffers) { |
| 214 | index_buffer_offset = offset; | 218 | buffer.MakeResident(GL_READ_ONLY); |
| 219 | glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, | ||
| 220 | static_cast<GLsizeiptr>(Common::AlignUp(size, 4))); | ||
| 221 | } else { | ||
| 222 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); | ||
| 223 | index_buffer_offset = offset; | ||
| 224 | } | ||
| 215 | } | 225 | } |
| 216 | 226 | ||
| 217 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, | 227 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, |
| @@ -219,8 +229,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, | |||
| 219 | if (index >= max_attributes) { | 229 | if (index >= max_attributes) { |
| 220 | return; | 230 | return; |
| 221 | } | 231 | } |
| 222 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), | 232 | if (has_unified_vertex_buffers) { |
| 223 | static_cast<GLsizei>(stride)); | 233 | buffer.MakeResident(GL_READ_ONLY); |
| 234 | glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); | ||
| 235 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, | ||
| 236 | buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); | ||
| 237 | } else { | ||
| 238 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 239 | static_cast<GLsizei>(stride)); | ||
| 240 | } | ||
| 224 | } | 241 | } |
| 225 | 242 | ||
| 226 | void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { | 243 | void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { |
| @@ -233,9 +250,23 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi | |||
| 233 | [](u64 stride) { return static_cast<GLsizei>(stride); }); | 250 | [](u64 stride) { return static_cast<GLsizei>(stride); }); |
| 234 | const u32 count = | 251 | const u32 count = |
| 235 | std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index); | 252 | std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index); |
| 236 | glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(), | 253 | if (has_unified_vertex_buffers) { |
| 237 | reinterpret_cast<const GLintptr*>(bindings.offsets.data()), | 254 | for (u32 index = 0; index < count; ++index) { |
| 238 | buffer_strides.data()); | 255 | Buffer& buffer = *bindings.buffers[index]; |
| 256 | buffer.MakeResident(GL_READ_ONLY); | ||
| 257 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, bindings.min_index + index, | ||
| 258 | buffer.HostGpuAddr() + bindings.offsets[index], | ||
| 259 | static_cast<GLsizeiptr>(bindings.sizes[index])); | ||
| 260 | } | ||
| 261 | static constexpr std::array<size_t, 32> ZEROS{}; | ||
| 262 | glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), | ||
| 263 | reinterpret_cast<const GLuint*>(ZEROS.data()), | ||
| 264 | reinterpret_cast<const GLintptr*>(ZEROS.data()), buffer_strides.data()); | ||
| 265 | } else { | ||
| 266 | glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(), | ||
| 267 | reinterpret_cast<const GLintptr*>(bindings.offsets.data()), | ||
| 268 | buffer_strides.data()); | ||
| 269 | } | ||
| 239 | } | 270 | } |
| 240 | 271 | ||
| 241 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, | 272 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 71cd45d35..1e8708f59 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -209,6 +209,7 @@ private: | |||
| 209 | 209 | ||
| 210 | bool has_fast_buffer_sub_data = false; | 210 | bool has_fast_buffer_sub_data = false; |
| 211 | bool use_assembly_shaders = false; | 211 | bool use_assembly_shaders = false; |
| 212 | bool has_unified_vertex_buffers = false; | ||
| 212 | 213 | ||
| 213 | bool use_storage_buffers = false; | 214 | bool use_storage_buffers = false; |
| 214 | 215 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a6c93068f..993438a27 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -200,6 +200,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { | |||
| 200 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); | 200 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); |
| 201 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 201 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 202 | has_derivative_control = GLAD_GL_ARB_derivative_control; | 202 | has_derivative_control = GLAD_GL_ARB_derivative_control; |
| 203 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; | ||
| 203 | has_debugging_tool_attached = IsDebugToolAttached(extensions); | 204 | has_debugging_tool_attached = IsDebugToolAttached(extensions); |
| 204 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); | 205 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); |
| 205 | has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; | 206 | has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 96034ea4a..a5a6bbbba 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -72,6 +72,10 @@ public: | |||
| 72 | return has_texture_shadow_lod; | 72 | return has_texture_shadow_lod; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | bool HasVertexBufferUnifiedMemory() const { | ||
| 76 | return has_vertex_buffer_unified_memory; | ||
| 77 | } | ||
| 78 | |||
| 75 | bool HasASTC() const { | 79 | bool HasASTC() const { |
| 76 | return has_astc; | 80 | return has_astc; |
| 77 | } | 81 | } |
| @@ -211,6 +215,7 @@ private: | |||
| 211 | bool has_vertex_viewport_layer{}; | 215 | bool has_vertex_viewport_layer{}; |
| 212 | bool has_image_load_formatted{}; | 216 | bool has_image_load_formatted{}; |
| 213 | bool has_texture_shadow_lod{}; | 217 | bool has_texture_shadow_lod{}; |
| 218 | bool has_vertex_buffer_unified_memory{}; | ||
| 214 | bool has_astc{}; | 219 | bool has_astc{}; |
| 215 | bool has_variable_aoffi{}; | 220 | bool has_variable_aoffi{}; |
| 216 | bool has_component_indexing_bug{}; | 221 | bool has_component_indexing_bug{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 279e5a4e0..4832c03c5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -162,14 +162,18 @@ void RasterizerOpenGL::Clear(u32 layer_count) { | |||
| 162 | SyncFramebufferSRGB(); | 162 | SyncFramebufferSRGB(); |
| 163 | } | 163 | } |
| 164 | if (regs.clear_surface.Z) { | 164 | if (regs.clear_surface.Z) { |
| 165 | ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); | 165 | if (regs.zeta_enable != 0) { |
| 166 | LOG_DEBUG(Render_OpenGL, "Tried to clear Z but buffer is not enabled!"); | ||
| 167 | } | ||
| 166 | use_depth = true; | 168 | use_depth = true; |
| 167 | 169 | ||
| 168 | state_tracker.NotifyDepthMask(); | 170 | state_tracker.NotifyDepthMask(); |
| 169 | glDepthMask(GL_TRUE); | 171 | glDepthMask(GL_TRUE); |
| 170 | } | 172 | } |
| 171 | if (regs.clear_surface.S) { | 173 | if (regs.clear_surface.S) { |
| 172 | ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!"); | 174 | if (regs.zeta_enable) { |
| 175 | LOG_DEBUG(Render_OpenGL, "Tried to clear stencil but buffer is not enabled!"); | ||
| 176 | } | ||
| 173 | use_stencil = true; | 177 | use_stencil = true; |
| 174 | } | 178 | } |
| 175 | 179 | ||
| @@ -1294,15 +1298,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum | |||
| 1294 | program->ConfigureTransformFeedback(); | 1298 | program->ConfigureTransformFeedback(); |
| 1295 | 1299 | ||
| 1296 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || | 1300 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || |
| 1297 | regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) || | 1301 | regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); |
| 1298 | regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry)); | ||
| 1299 | UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); | ||
| 1300 | 1302 | ||
| 1301 | // We may have to call BeginTransformFeedbackNV here since they seem to call different | 1303 | // We may have to call BeginTransformFeedbackNV here since they seem to call different |
| 1302 | // implementations on Nvidia's driver (the pointer is different) but we are using | 1304 | // implementations on Nvidia's driver (the pointer is different) but we are using |
| 1303 | // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB | 1305 | // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB |
| 1304 | // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. | 1306 | // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. |
| 1305 | glBeginTransformFeedback(GL_POINTS); | 1307 | glBeginTransformFeedback(primitive_mode); |
| 1306 | } | 1308 | } |
| 1307 | 1309 | ||
| 1308 | void RasterizerOpenGL::EndTransformFeedback() { | 1310 | void RasterizerOpenGL::EndTransformFeedback() { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 26f2d0ea7..b5999362a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -233,6 +233,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 233 | .ignore_nan_fp_comparisons = true, | 233 | .ignore_nan_fp_comparisons = true, |
| 234 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), | 234 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), |
| 235 | .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), | 235 | .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), |
| 236 | .max_user_clip_distances = 8, | ||
| 236 | }, | 237 | }, |
| 237 | host_info{ | 238 | host_info{ |
| 238 | .support_float64 = true, | 239 | .support_float64 = true, |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7a4f0c5c1..2933718b6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -168,6 +168,14 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
| 168 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { | 168 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { |
| 169 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); | 169 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); |
| 170 | } | 170 | } |
| 171 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | ||
| 172 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 173 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||
| 174 | glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||
| 175 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||
| 176 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||
| 177 | &vertex_buffer_address); | ||
| 178 | } | ||
| 171 | } | 179 | } |
| 172 | 180 | ||
| 173 | RendererOpenGL::~RendererOpenGL() = default; | 181 | RendererOpenGL::~RendererOpenGL() = default; |
| @@ -667,7 +675,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 667 | offsetof(ScreenRectVertex, tex_coord)); | 675 | offsetof(ScreenRectVertex, tex_coord)); |
| 668 | glVertexAttribBinding(PositionLocation, 0); | 676 | glVertexAttribBinding(PositionLocation, 0); |
| 669 | glVertexAttribBinding(TexCoordLocation, 0); | 677 | glVertexAttribBinding(TexCoordLocation, 0); |
| 670 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | 678 | if (device.HasVertexBufferUnifiedMemory()) { |
| 679 | glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); | ||
| 680 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, | ||
| 681 | sizeof(vertices)); | ||
| 682 | } else { | ||
| 683 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | ||
| 684 | } | ||
| 671 | 685 | ||
| 672 | if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { | 686 | if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { |
| 673 | glBindSampler(0, present_sampler.handle); | 687 | glBindSampler(0, present_sampler.handle); |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5958f52f7..2267069e7 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -563,22 +563,27 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi | |||
| 563 | } | 563 | } |
| 564 | buffer_handles.push_back(handle); | 564 | buffer_handles.push_back(handle); |
| 565 | } | 565 | } |
| 566 | const u32 device_max = device.GetMaxVertexInputBindings(); | ||
| 567 | const u32 min_binding = std::min(bindings.min_index, device_max); | ||
| 568 | const u32 max_binding = std::min(bindings.max_index, device_max); | ||
| 569 | const u32 binding_count = max_binding - min_binding; | ||
| 570 | if (binding_count == 0) { | ||
| 571 | return; | ||
| 572 | } | ||
| 566 | if (device.IsExtExtendedDynamicStateSupported()) { | 573 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 567 | scheduler.Record([this, bindings_ = std::move(bindings), | 574 | scheduler.Record([bindings_ = std::move(bindings), |
| 568 | buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { | 575 | buffer_handles_ = std::move(buffer_handles), |
| 569 | cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, | 576 | binding_count](vk::CommandBuffer cmdbuf) { |
| 570 | std::min(bindings_.max_index - bindings_.min_index, | 577 | cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, binding_count, buffer_handles_.data(), |
| 571 | device.GetMaxVertexInputBindings()), | 578 | bindings_.offsets.data(), bindings_.sizes.data(), |
| 572 | buffer_handles_.data(), bindings_.offsets.data(), | 579 | bindings_.strides.data()); |
| 573 | bindings_.sizes.data(), bindings_.strides.data()); | ||
| 574 | }); | 580 | }); |
| 575 | } else { | 581 | } else { |
| 576 | scheduler.Record([this, bindings_ = std::move(bindings), | 582 | scheduler.Record([bindings_ = std::move(bindings), |
| 577 | buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { | 583 | buffer_handles_ = std::move(buffer_handles), |
| 578 | cmdbuf.BindVertexBuffers(bindings_.min_index, | 584 | binding_count](vk::CommandBuffer cmdbuf) { |
| 579 | std::min(bindings_.max_index - bindings_.min_index, | 585 | cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(), |
| 580 | device.GetMaxVertexInputBindings()), | 586 | bindings_.offsets.data()); |
| 581 | buffer_handles_.data(), bindings_.offsets.data()); | ||
| 582 | }); | 587 | }); |
| 583 | } | 588 | } |
| 584 | } | 589 | } |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2a13b2a72..fa63d6228 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -374,6 +374,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 374 | .has_broken_robust = | 374 | .has_broken_robust = |
| 375 | device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, | 375 | device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, |
| 376 | .min_ssbo_alignment = device.GetStorageBufferAlignment(), | 376 | .min_ssbo_alignment = device.GetStorageBufferAlignment(), |
| 377 | .max_user_clip_distances = device.GetMaxUserClipDistances(), | ||
| 377 | }; | 378 | }; |
| 378 | 379 | ||
| 379 | host_info = Shader::HostTranslateInfo{ | 380 | host_info = Shader::HostTranslateInfo{ |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 078777cdd..95954ade7 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -289,12 +289,15 @@ public: | |||
| 289 | } | 289 | } |
| 290 | 290 | ||
| 291 | if (has_multi_queries) { | 291 | if (has_multi_queries) { |
| 292 | size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used); | 292 | const size_t min_accumulation_limit = |
| 293 | std::min(first_accumulation_checkpoint, num_slots_used); | ||
| 294 | const size_t max_accumulation_limit = | ||
| 295 | std::max(last_accumulation_checkpoint, num_slots_used); | ||
| 296 | const size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used); | ||
| 293 | resolve_buffers.push_back(intermediary_buffer_index); | 297 | resolve_buffers.push_back(intermediary_buffer_index); |
| 294 | queries_prefix_scan_pass->Run(*accumulation_buffer, *buffers[intermediary_buffer_index], | 298 | queries_prefix_scan_pass->Run(*accumulation_buffer, *buffers[intermediary_buffer_index], |
| 295 | *buffers[resolve_buffer_index], num_slots_used, | 299 | *buffers[resolve_buffer_index], num_slots_used, |
| 296 | std::min(first_accumulation_checkpoint, num_slots_used), | 300 | min_accumulation_limit, max_accumulation_limit); |
| 297 | last_accumulation_checkpoint); | ||
| 298 | 301 | ||
| 299 | } else { | 302 | } else { |
| 300 | scheduler.RequestOutsideRenderPassOperationContext(); | 303 | scheduler.RequestOutsideRenderPassOperationContext(); |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 1fda0042d..a6fbca69e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -695,6 +695,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 695 | std::min(properties.properties.limits.maxVertexInputBindings, 16U); | 695 | std::min(properties.properties.limits.maxVertexInputBindings, 16U); |
| 696 | } | 696 | } |
| 697 | 697 | ||
| 698 | if (is_turnip) { | ||
| 699 | LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits"); | ||
| 700 | properties.properties.limits.maxVertexInputBindings = 32; | ||
| 701 | } | ||
| 702 | |||
| 698 | if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) { | 703 | if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) { |
| 699 | LOG_INFO(Render_Vulkan, | 704 | LOG_INFO(Render_Vulkan, |
| 700 | "Removing extendedDynamicState2 due to missing extendedDynamicState"); | 705 | "Removing extendedDynamicState2 due to missing extendedDynamicState"); |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4f3846345..701817086 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -665,6 +665,10 @@ public: | |||
| 665 | return properties.properties.limits.maxViewports; | 665 | return properties.properties.limits.maxViewports; |
| 666 | } | 666 | } |
| 667 | 667 | ||
| 668 | u32 GetMaxUserClipDistances() const { | ||
| 669 | return properties.properties.limits.maxClipDistances; | ||
| 670 | } | ||
| 671 | |||
| 668 | bool SupportsConditionalBarriers() const { | 672 | bool SupportsConditionalBarriers() const { |
| 669 | return supports_conditional_barriers; | 673 | return supports_conditional_barriers; |
| 670 | } | 674 | } |