diff options
| author | 2023-12-22 11:42:05 -0500 | |
|---|---|---|
| committer | 2023-12-22 11:42:05 -0500 | |
| commit | 91290b9be4e99a9890c6545e327f600484e39914 (patch) | |
| tree | 7a247dcb080ba01e97258c471c2b97f47ab87354 | |
| parent | Merge pull request #12435 from liamwhite/type-check (diff) | |
| parent | gl_rasterizer: Less spammy log for unimplemented resets (diff) | |
| download | yuzu-91290b9be4e99a9890c6545e327f600484e39914.tar.gz yuzu-91290b9be4e99a9890c6545e327f600484e39914.tar.xz yuzu-91290b9be4e99a9890c6545e327f600484e39914.zip | |
Merge pull request #12412 from ameerj/gl-query-prims
OpenGL: Add GL_PRIMITIVES_GENERATED and GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/query_cache.h | 39 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 |
6 files changed, 91 insertions, 45 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 592c28ba3..95ba4f76c 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 586 | } | 586 | } |
| 587 | 587 | ||
| 588 | void Maxwell3D::ProcessCounterReset() { | 588 | void Maxwell3D::ProcessCounterReset() { |
| 589 | switch (regs.clear_report_value) { | 589 | const auto query_type = [clear_report = regs.clear_report_value]() { |
| 590 | case Regs::ClearReport::ZPassPixelCount: | 590 | switch (clear_report) { |
| 591 | rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); | 591 | case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount: |
| 592 | break; | 592 | return VideoCommon::QueryType::ZPassPixelCount64; |
| 593 | default: | 593 | case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded: |
| 594 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); | 594 | return VideoCommon::QueryType::StreamingPrimitivesSucceeded; |
| 595 | break; | 595 | case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated: |
| 596 | } | 596 | return VideoCommon::QueryType::PrimitivesGenerated; |
| 597 | case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut: | ||
| 598 | return VideoCommon::QueryType::VtgPrimitivesOut; | ||
| 599 | default: | ||
| 600 | LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report); | ||
| 601 | return VideoCommon::QueryType::Payload; | ||
| 602 | } | ||
| 603 | }(); | ||
| 604 | rasterizer->ResetCounter(query_type); | ||
| 597 | } | 605 | } |
| 598 | 606 | ||
| 599 | void Maxwell3D::ProcessSyncPoint() { | 607 | void Maxwell3D::ProcessSyncPoint() { |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 9fcaeeac7..a64404ce4 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -28,8 +28,11 @@ | |||
| 28 | namespace VideoCore { | 28 | namespace VideoCore { |
| 29 | enum class QueryType { | 29 | enum class QueryType { |
| 30 | SamplesPassed, | 30 | SamplesPassed, |
| 31 | PrimitivesGenerated, | ||
| 32 | TfbPrimitivesWritten, | ||
| 33 | Count, | ||
| 31 | }; | 34 | }; |
| 32 | constexpr std::size_t NumQueryTypes = 1; | 35 | constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count); |
| 33 | } // namespace VideoCore | 36 | } // namespace VideoCore |
| 34 | 37 | ||
| 35 | namespace VideoCommon { | 38 | namespace VideoCommon { |
| @@ -44,15 +47,6 @@ public: | |||
| 44 | explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) | 47 | explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) |
| 45 | : cache{cache_}, type{type_} {} | 48 | : cache{cache_}, type{type_} {} |
| 46 | 49 | ||
| 47 | /// Updates the state of the stream, enabling or disabling as needed. | ||
| 48 | void Update(bool enabled) { | ||
| 49 | if (enabled) { | ||
| 50 | Enable(); | ||
| 51 | } else { | ||
| 52 | Disable(); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | /// Resets the stream to zero. It doesn't disable the query after resetting. | 50 | /// Resets the stream to zero. It doesn't disable the query after resetting. |
| 57 | void Reset() { | 51 | void Reset() { |
| 58 | if (current) { | 52 | if (current) { |
| @@ -80,7 +74,6 @@ public: | |||
| 80 | return current != nullptr; | 74 | return current != nullptr; |
| 81 | } | 75 | } |
| 82 | 76 | ||
| 83 | private: | ||
| 84 | /// Enables the stream. | 77 | /// Enables the stream. |
| 85 | void Enable() { | 78 | void Enable() { |
| 86 | if (current) { | 79 | if (current) { |
| @@ -97,6 +90,7 @@ private: | |||
| 97 | last = std::exchange(current, nullptr); | 90 | last = std::exchange(current, nullptr); |
| 98 | } | 91 | } |
| 99 | 92 | ||
| 93 | private: | ||
| 100 | QueryCache& cache; | 94 | QueryCache& cache; |
| 101 | const VideoCore::QueryType type; | 95 | const VideoCore::QueryType type; |
| 102 | 96 | ||
| @@ -112,8 +106,14 @@ public: | |||
| 112 | : rasterizer{rasterizer_}, | 106 | : rasterizer{rasterizer_}, |
| 113 | // Use reinterpret_cast instead of static_cast as workaround for | 107 | // Use reinterpret_cast instead of static_cast as workaround for |
| 114 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) | 108 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) |
| 115 | cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this), | 109 | cpu_memory{cpu_memory_}, streams{{ |
| 116 | VideoCore::QueryType::SamplesPassed}}} { | 110 | {CounterStream{reinterpret_cast<QueryCache&>(*this), |
| 111 | VideoCore::QueryType::SamplesPassed}}, | ||
| 112 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||
| 113 | VideoCore::QueryType::PrimitivesGenerated}}, | ||
| 114 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||
| 115 | VideoCore::QueryType::TfbPrimitivesWritten}}, | ||
| 116 | }} { | ||
| 117 | (void)slot_async_jobs.insert(); // Null value | 117 | (void)slot_async_jobs.insert(); // Null value |
| 118 | } | 118 | } |
| 119 | 119 | ||
| @@ -157,12 +157,11 @@ public: | |||
| 157 | AsyncFlushQuery(query, timestamp, lock); | 157 | AsyncFlushQuery(query, timestamp, lock); |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 160 | /// Enables all available GPU counters |
| 161 | void UpdateCounters() { | 161 | void EnableCounters() { |
| 162 | std::unique_lock lock{mutex}; | 162 | std::unique_lock lock{mutex}; |
| 163 | if (maxwell3d) { | 163 | for (auto& stream : streams) { |
| 164 | const auto& regs = maxwell3d->regs; | 164 | stream.Enable(); |
| 165 | Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable); | ||
| 166 | } | 165 | } |
| 167 | } | 166 | } |
| 168 | 167 | ||
| @@ -176,7 +175,7 @@ public: | |||
| 176 | void DisableStreams() { | 175 | void DisableStreams() { |
| 177 | std::unique_lock lock{mutex}; | 176 | std::unique_lock lock{mutex}; |
| 178 | for (auto& stream : streams) { | 177 | for (auto& stream : streams) { |
| 179 | stream.Update(false); | 178 | stream.Disable(); |
| 180 | } | 179 | } |
| 181 | } | 180 | } |
| 182 | 181 | ||
| @@ -353,7 +352,7 @@ private: | |||
| 353 | 352 | ||
| 354 | std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; | 353 | std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; |
| 355 | std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; | 354 | std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; |
| 356 | }; | 355 | }; // namespace VideoCommon |
| 357 | 356 | ||
| 358 | template <class QueryCache, class HostCounter> | 357 | template <class QueryCache, class HostCounter> |
| 359 | class HostCounterBase { | 358 | class HostCounterBase { |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index ec142d48e..fef7360ed 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -18,16 +18,27 @@ namespace OpenGL { | |||
| 18 | 18 | ||
| 19 | namespace { | 19 | namespace { |
| 20 | 20 | ||
| 21 | constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; | ||
| 22 | |||
| 23 | constexpr GLenum GetTarget(VideoCore::QueryType type) { | 21 | constexpr GLenum GetTarget(VideoCore::QueryType type) { |
| 24 | return QueryTargets[static_cast<std::size_t>(type)]; | 22 | switch (type) { |
| 23 | case VideoCore::QueryType::SamplesPassed: | ||
| 24 | return GL_SAMPLES_PASSED; | ||
| 25 | case VideoCore::QueryType::PrimitivesGenerated: | ||
| 26 | return GL_PRIMITIVES_GENERATED; | ||
| 27 | case VideoCore::QueryType::TfbPrimitivesWritten: | ||
| 28 | return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN; | ||
| 29 | default: | ||
| 30 | break; | ||
| 31 | } | ||
| 32 | UNIMPLEMENTED_MSG("Query type {}", type); | ||
| 33 | return 0; | ||
| 25 | } | 34 | } |
| 26 | 35 | ||
| 27 | } // Anonymous namespace | 36 | } // Anonymous namespace |
| 28 | 37 | ||
| 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | 38 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) |
| 30 | : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} | 39 | : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { |
| 40 | EnableCounters(); | ||
| 41 | } | ||
| 31 | 42 | ||
| 32 | QueryCache::~QueryCache() = default; | 43 | QueryCache::~QueryCache() = default; |
| 33 | 44 | ||
| @@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) { | |||
| 103 | auto& stream = cache->Stream(type); | 114 | auto& stream = cache->Stream(type); |
| 104 | const bool slice_counter = WaitPending() && stream.IsEnabled(); | 115 | const bool slice_counter = WaitPending() && stream.IsEnabled(); |
| 105 | if (slice_counter) { | 116 | if (slice_counter) { |
| 106 | stream.Update(false); | 117 | stream.Disable(); |
| 107 | } | 118 | } |
| 108 | 119 | ||
| 109 | auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); | 120 | auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); |
| 110 | 121 | ||
| 111 | if (slice_counter) { | 122 | if (slice_counter) { |
| 112 | stream.Update(true); | 123 | stream.Enable(); |
| 113 | } | 124 | } |
| 114 | 125 | ||
| 115 | return result; | 126 | return result; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4832c03c5..339950d2e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | |||
| 51 | void oglEnable(GLenum cap, bool state) { | 51 | void oglEnable(GLenum cap, bool state) { |
| 52 | (state ? glEnable : glDisable)(cap); | 52 | (state ? glEnable : glDisable)(cap); |
| 53 | } | 53 | } |
| 54 | |||
| 55 | std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) { | ||
| 56 | switch (type) { | ||
| 57 | case VideoCommon::QueryType::PrimitivesGenerated: | ||
| 58 | case VideoCommon::QueryType::VtgPrimitivesOut: | ||
| 59 | return VideoCore::QueryType::PrimitivesGenerated; | ||
| 60 | case VideoCommon::QueryType::ZPassPixelCount64: | ||
| 61 | return VideoCore::QueryType::SamplesPassed; | ||
| 62 | case VideoCommon::QueryType::StreamingPrimitivesSucceeded: | ||
| 63 | // case VideoCommon::QueryType::StreamingByteCount: | ||
| 64 | // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride | ||
| 65 | return VideoCore::QueryType::TfbPrimitivesWritten; | ||
| 66 | default: | ||
| 67 | return std::nullopt; | ||
| 68 | } | ||
| 69 | } | ||
| 54 | } // Anonymous namespace | 70 | } // Anonymous namespace |
| 55 | 71 | ||
| 56 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 72 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| @@ -216,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 216 | 232 | ||
| 217 | SCOPE_EXIT({ gpu.TickWork(); }); | 233 | SCOPE_EXIT({ gpu.TickWork(); }); |
| 218 | gpu_memory->FlushCaching(); | 234 | gpu_memory->FlushCaching(); |
| 219 | query_cache.UpdateCounters(); | ||
| 220 | 235 | ||
| 221 | GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; | 236 | GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; |
| 222 | if (!pipeline) { | 237 | if (!pipeline) { |
| @@ -334,7 +349,6 @@ void RasterizerOpenGL::DrawTexture() { | |||
| 334 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 349 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 335 | 350 | ||
| 336 | SCOPE_EXIT({ gpu.TickWork(); }); | 351 | SCOPE_EXIT({ gpu.TickWork(); }); |
| 337 | query_cache.UpdateCounters(); | ||
| 338 | 352 | ||
| 339 | texture_cache.SynchronizeGraphicsDescriptors(); | 353 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 340 | texture_cache.UpdateRenderTargets(false); | 354 | texture_cache.UpdateRenderTargets(false); |
| @@ -401,21 +415,28 @@ void RasterizerOpenGL::DispatchCompute() { | |||
| 401 | } | 415 | } |
| 402 | 416 | ||
| 403 | void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { | 417 | void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { |
| 404 | if (type == VideoCommon::QueryType::ZPassPixelCount64) { | 418 | const auto query_cache_type = MaxwellToVideoCoreQuery(type); |
| 405 | query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); | 419 | if (!query_cache_type.has_value()) { |
| 420 | UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type); | ||
| 421 | return; | ||
| 406 | } | 422 | } |
| 423 | query_cache.ResetCounter(*query_cache_type); | ||
| 407 | } | 424 | } |
| 408 | 425 | ||
| 409 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | 426 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 410 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | 427 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
| 411 | if (type == VideoCommon::QueryType::ZPassPixelCount64) { | 428 | const auto query_cache_type = MaxwellToVideoCoreQuery(type); |
| 412 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | 429 | if (!query_cache_type.has_value()) { |
| 413 | query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); | 430 | return QueryFallback(gpu_addr, type, flags, payload, subreport); |
| 414 | } else { | ||
| 415 | query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); | ||
| 416 | } | ||
| 417 | return; | ||
| 418 | } | 431 | } |
| 432 | const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout); | ||
| 433 | const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt; | ||
| 434 | query_cache.Query(gpu_addr, *query_cache_type, timestamp); | ||
| 435 | } | ||
| 436 | |||
| 437 | void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||
| 438 | VideoCommon::QueryPropertiesFlags flags, u32 payload, | ||
| 439 | u32 subreport) { | ||
| 419 | if (type != VideoCommon::QueryType::Payload) { | 440 | if (type != VideoCommon::QueryType::Payload) { |
| 420 | payload = 1u; | 441 | payload = 1u; |
| 421 | } | 442 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ceffe1f1e..b79d7a70c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -225,6 +225,9 @@ private: | |||
| 225 | /// End a transform feedback | 225 | /// End a transform feedback |
| 226 | void EndTransformFeedback(); | 226 | void EndTransformFeedback(); |
| 227 | 227 | ||
| 228 | void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||
| 229 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); | ||
| 230 | |||
| 228 | Tegra::GPU& gpu; | 231 | Tegra::GPU& gpu; |
| 229 | 232 | ||
| 230 | const Device& device; | 233 | const Device& device; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 59829c88b..241fc34be 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() { | |||
| 485 | } | 485 | } |
| 486 | 486 | ||
| 487 | void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { | 487 | void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { |
| 488 | if (type != VideoCommon::QueryType::ZPassPixelCount64) { | ||
| 489 | LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type); | ||
| 490 | return; | ||
| 491 | } | ||
| 488 | query_cache.CounterReset(type); | 492 | query_cache.CounterReset(type); |
| 489 | } | 493 | } |
| 490 | 494 | ||