summaryrefslogtreecommitdiff
path: root/src/video_core/query_cache
diff options
context:
space:
mode:
authorGravatar liamwhite2024-01-22 10:55:39 -0500
committerGravatar GitHub2024-01-22 10:55:39 -0500
commit8bd10473d60503c7acddc399604a51b9c9947541 (patch)
treef713f84942681321fca27ba028e31d6c74a09013 /src/video_core/query_cache
parentMerge pull request #12747 from t895/homescreen-widget (diff)
parentdevice_memory_manager: use unique_lock for update (diff)
downloadyuzu-8bd10473d60503c7acddc399604a51b9c9947541.tar.gz
yuzu-8bd10473d60503c7acddc399604a51b9c9947541.tar.xz
yuzu-8bd10473d60503c7acddc399604a51b9c9947541.zip
Merge pull request #12579 from FernandoS27/smmu
Core: Implement Device Mapping & GPU SMMU
Diffstat (limited to 'src/video_core/query_cache')
-rw-r--r--src/video_core/query_cache/query_base.h4
-rw-r--r--src/video_core/query_cache/query_cache.h37
-rw-r--r--src/video_core/query_cache/query_cache_base.h15
3 files changed, 27 insertions, 29 deletions
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 1d786b3a7..aca6a6447 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -23,7 +23,7 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
23 23
24class QueryBase { 24class QueryBase {
25public: 25public:
26 VAddr guest_address{}; 26 DAddr guest_address{};
27 QueryFlagBits flags{}; 27 QueryFlagBits flags{};
28 u64 value{}; 28 u64 value{};
29 29
@@ -32,7 +32,7 @@ protected:
32 QueryBase() = default; 32 QueryBase() = default;
33 33
34 // Parameterized constructor 34 // Parameterized constructor
35 QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) 35 QueryBase(DAddr address, QueryFlagBits flags_, u64 value_)
36 : guest_address(address), flags(flags_), value{value_} {} 36 : guest_address(address), flags(flags_), value{value_} {}
37}; 37};
38 38
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index 94f0c4466..08b779055 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -15,9 +15,9 @@
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "common/scope_exit.h" 16#include "common/scope_exit.h"
17#include "common/settings.h" 17#include "common/settings.h"
18#include "core/memory.h"
19#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h" 19#include "video_core/gpu.h"
20#include "video_core/host1x/gpu_device_memory_manager.h"
21#include "video_core/memory_manager.h" 21#include "video_core/memory_manager.h"
22#include "video_core/query_cache/bank_base.h" 22#include "video_core/query_cache/bank_base.h"
23#include "video_core/query_cache/query_base.h" 23#include "video_core/query_cache/query_base.h"
@@ -113,9 +113,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
113 using RuntimeType = typename Traits::RuntimeType; 113 using RuntimeType = typename Traits::RuntimeType;
114 114
115 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, 115 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
116 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) 116 Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_,
117 Tegra::GPU& gpu_)
117 : owner{owner_}, rasterizer{rasterizer_}, 118 : owner{owner_}, rasterizer{rasterizer_},
118 cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { 119 device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} {
119 streamer_mask = 0; 120 streamer_mask = 0;
120 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { 121 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
121 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); 122 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@@ -158,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
158 159
159 QueryCacheBase<Traits>* owner; 160 QueryCacheBase<Traits>* owner;
160 VideoCore::RasterizerInterface& rasterizer; 161 VideoCore::RasterizerInterface& rasterizer;
161 Core::Memory::Memory& cpu_memory; 162 Tegra::MaxwellDeviceMemoryManager& device_memory;
162 RuntimeType& runtime; 163 RuntimeType& runtime;
163 Tegra::GPU& gpu; 164 Tegra::GPU& gpu;
164 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; 165 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
@@ -171,10 +172,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
171template <typename Traits> 172template <typename Traits>
172QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, 173QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
173 VideoCore::RasterizerInterface& rasterizer_, 174 VideoCore::RasterizerInterface& rasterizer_,
174 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) 175 Tegra::MaxwellDeviceMemoryManager& device_memory_,
176 RuntimeType& runtime_)
175 : cached_queries{} { 177 : cached_queries{} {
176 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( 178 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
177 this, rasterizer_, cpu_memory_, runtime_, gpu_); 179 this, rasterizer_, device_memory_, runtime_, gpu_);
178} 180}
179 181
180template <typename Traits> 182template <typename Traits>
@@ -240,7 +242,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
240 if (!cpu_addr_opt) [[unlikely]] { 242 if (!cpu_addr_opt) [[unlikely]] {
241 return; 243 return;
242 } 244 }
243 VAddr cpu_addr = *cpu_addr_opt; 245 DAddr cpu_addr = *cpu_addr_opt;
244 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); 246 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport);
245 auto* query = streamer->GetQuery(new_query_id); 247 auto* query = streamer->GetQuery(new_query_id);
246 if (is_fence) { 248 if (is_fence) {
@@ -250,13 +252,12 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
250 query_location.stream_id.Assign(static_cast<u32>(streamer_id)); 252 query_location.stream_id.Assign(static_cast<u32>(streamer_id));
251 query_location.query_id.Assign(static_cast<u32>(new_query_id)); 253 query_location.query_id.Assign(static_cast<u32>(new_query_id));
252 const auto gen_caching_indexing = [](VAddr cur_addr) { 254 const auto gen_caching_indexing = [](VAddr cur_addr) {
253 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, 255 return std::make_pair<u64, u32>(cur_addr >> Core::DEVICE_PAGEBITS,
254 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); 256 static_cast<u32>(cur_addr & Core::DEVICE_PAGEMASK));
255 }; 257 };
256 u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); 258 u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
257 u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); 259 u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
258 bool is_synced = !Settings::IsGPULevelHigh() && is_fence; 260 bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
259
260 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, 261 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
261 pointer, pointer_timestamp] { 262 pointer, pointer_timestamp] {
262 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { 263 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
@@ -323,8 +324,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
323template <typename Traits> 324template <typename Traits>
324void QueryCacheBase<Traits>::UnregisterPending() { 325void QueryCacheBase<Traits>::UnregisterPending() {
325 const auto gen_caching_indexing = [](VAddr cur_addr) { 326 const auto gen_caching_indexing = [](VAddr cur_addr) {
326 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, 327 return std::make_pair<u64, u32>(cur_addr >> Core::DEVICE_PAGEBITS,
327 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); 328 static_cast<u32>(cur_addr & Core::DEVICE_PAGEMASK));
328 }; 329 };
329 std::scoped_lock lock(cache_mutex); 330 std::scoped_lock lock(cache_mutex);
330 for (QueryLocation loc : impl->pending_unregister) { 331 for (QueryLocation loc : impl->pending_unregister) {
@@ -388,7 +389,7 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
388 } 389 }
389 VAddr cpu_addr = *cpu_addr_opt; 390 VAddr cpu_addr = *cpu_addr_opt;
390 std::scoped_lock lock(cache_mutex); 391 std::scoped_lock lock(cache_mutex);
391 auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); 392 auto it1 = cached_queries.find(cpu_addr >> Core::DEVICE_PAGEBITS);
392 if (it1 == cached_queries.end()) { 393 if (it1 == cached_queries.end()) {
393 return VideoCommon::LookupData{ 394 return VideoCommon::LookupData{
394 .address = cpu_addr, 395 .address = cpu_addr,
@@ -396,10 +397,10 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
396 }; 397 };
397 } 398 }
398 auto& sub_container = it1->second; 399 auto& sub_container = it1->second;
399 auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); 400 auto it_current = sub_container.find(cpu_addr & Core::DEVICE_PAGEMASK);
400 401
401 if (it_current == sub_container.end()) { 402 if (it_current == sub_container.end()) {
402 auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); 403 auto it_current_2 = sub_container.find((cpu_addr & Core::DEVICE_PAGEMASK) + 4);
403 if (it_current_2 == sub_container.end()) { 404 if (it_current_2 == sub_container.end()) {
404 return VideoCommon::LookupData{ 405 return VideoCommon::LookupData{
405 .address = cpu_addr, 406 .address = cpu_addr,
@@ -559,7 +560,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
559 } 560 }
560 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && 561 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
561 False(query_base->flags & QueryFlagBits::IsGuestSynced)) { 562 False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
562 auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); 563 auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address);
563 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { 564 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
564 std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); 565 std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
565 return false; 566 return false;
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
index 07be421c6..c12fb75ef 100644
--- a/src/video_core/query_cache/query_cache_base.h
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -13,15 +13,11 @@
13#include "common/assert.h" 13#include "common/assert.h"
14#include "common/bit_field.h" 14#include "common/bit_field.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "core/memory.h"
17#include "video_core/control/channel_state_cache.h" 16#include "video_core/control/channel_state_cache.h"
17#include "video_core/host1x/gpu_device_memory_manager.h"
18#include "video_core/query_cache/query_base.h" 18#include "video_core/query_cache/query_base.h"
19#include "video_core/query_cache/types.h" 19#include "video_core/query_cache/types.h"
20 20
21namespace Core::Memory {
22class Memory;
23}
24
25namespace VideoCore { 21namespace VideoCore {
26class RasterizerInterface; 22class RasterizerInterface;
27} 23}
@@ -53,7 +49,8 @@ public:
53 }; 49 };
54 50
55 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, 51 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); 52 Tegra::MaxwellDeviceMemoryManager& device_memory_,
53 RuntimeType& runtime_);
57 54
58 ~QueryCacheBase(); 55 ~QueryCacheBase();
59 56
@@ -125,10 +122,10 @@ protected:
125 const u64 addr_begin = addr; 122 const u64 addr_begin = addr;
126 const u64 addr_end = addr_begin + size; 123 const u64 addr_end = addr_begin + size;
127 124
128 const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; 125 const u64 page_end = addr_end >> Core::DEVICE_PAGEBITS;
129 std::scoped_lock lock(cache_mutex); 126 std::scoped_lock lock(cache_mutex);
130 for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { 127 for (u64 page = addr_begin >> Core::DEVICE_PAGEBITS; page <= page_end; ++page) {
131 const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; 128 const u64 page_start = page << Core::DEVICE_PAGEBITS;
132 const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { 129 const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) {
133 const u64 cache_begin = page_start + query_location; 130 const u64 cache_begin = page_start + query_location;
134 const u64 cache_end = cache_begin + sizeof(u32); 131 const u64 cache_end = cache_begin + sizeof(u32);