diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/page_table.cpp | 2 | ||||
| -rw-r--r-- | src/common/page_table.h | 6 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 472 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 162 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 10 |
13 files changed, 481 insertions, 212 deletions
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp index 8eba1c3f1..69b7abc54 100644 --- a/src/common/page_table.cpp +++ b/src/common/page_table.cpp | |||
| @@ -16,6 +16,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 16 | 16 | ||
| 17 | pointers.resize(num_page_table_entries); | 17 | pointers.resize(num_page_table_entries); |
| 18 | attributes.resize(num_page_table_entries); | 18 | attributes.resize(num_page_table_entries); |
| 19 | backing_addr.resize(num_page_table_entries); | ||
| 19 | 20 | ||
| 20 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the | 21 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the |
| 21 | // vector size is subsequently decreased (via resize), the vector might not automatically | 22 | // vector size is subsequently decreased (via resize), the vector might not automatically |
| @@ -24,6 +25,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 24 | 25 | ||
| 25 | pointers.shrink_to_fit(); | 26 | pointers.shrink_to_fit(); |
| 26 | attributes.shrink_to_fit(); | 27 | attributes.shrink_to_fit(); |
| 28 | backing_addr.shrink_to_fit(); | ||
| 27 | } | 29 | } |
| 28 | 30 | ||
| 29 | } // namespace Common | 31 | } // namespace Common |
diff --git a/src/common/page_table.h b/src/common/page_table.h index 8339f2890..8b8ff0bb8 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h | |||
| @@ -21,6 +21,8 @@ enum class PageType : u8 { | |||
| 21 | RasterizerCachedMemory, | 21 | RasterizerCachedMemory, |
| 22 | /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. | 22 | /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. |
| 23 | Special, | 23 | Special, |
| 24 | /// Page is allocated for use. | ||
| 25 | Allocated, | ||
| 24 | }; | 26 | }; |
| 25 | 27 | ||
| 26 | struct SpecialRegion { | 28 | struct SpecialRegion { |
| @@ -66,7 +68,7 @@ struct PageTable { | |||
| 66 | * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is | 68 | * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is |
| 67 | * of type `Special`. | 69 | * of type `Special`. |
| 68 | */ | 70 | */ |
| 69 | boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions; | 71 | boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions; |
| 70 | 72 | ||
| 71 | /** | 73 | /** |
| 72 | * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then | 74 | * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then |
| @@ -74,6 +76,8 @@ struct PageTable { | |||
| 74 | */ | 76 | */ |
| 75 | std::vector<PageType> attributes; | 77 | std::vector<PageType> attributes; |
| 76 | 78 | ||
| 79 | std::vector<u64> backing_addr; | ||
| 80 | |||
| 77 | const std::size_t page_size_in_bits{}; | 81 | const std::size_t page_size_in_bits{}; |
| 78 | }; | 82 | }; |
| 79 | 83 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index b7964d66e..af62d33d2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -173,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 173 | return 0; | 173 | return 0; |
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | auto& system_instance = Core::System::GetInstance(); | 176 | params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset, |
| 177 | 177 | itr->second.size); | |
| 178 | // Remove this memory region from the rasterizer cache. | ||
| 179 | auto& gpu = system_instance.GPU(); | ||
| 180 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); | ||
| 181 | ASSERT(cpu_addr); | ||
| 182 | gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size); | ||
| 183 | |||
| 184 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); | ||
| 185 | |||
| 186 | buffer_mappings.erase(itr->second.offset); | 178 | buffer_mappings.erase(itr->second.offset); |
| 187 | 179 | ||
| 188 | std::memcpy(output.data(), ¶ms, output.size()); | 180 | std::memcpy(output.data(), ¶ms, output.size()); |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 27a36348c..6ab06518f 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/memory_manager.h" | ||
| 13 | 12 | ||
| 14 | namespace Tegra { | 13 | namespace Tegra { |
| 15 | 14 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0931b9626..e259bf46b 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -46,7 +46,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 46 | // contain a dirty surface that will have to be written back to memory. | 46 | // contain a dirty surface that will have to be written back to memory. |
| 47 | const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; | 47 | const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; |
| 48 | rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); | 48 | rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); |
| 49 | memory_manager.Write32(address, data); | 49 | memory_manager.Write<u32>(address, data); |
| 50 | 50 | ||
| 51 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 51 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
| 52 | 52 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c5d5be4ef..defcfbd3f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -307,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 307 | // Write the current query sequence to the sequence address. | 307 | // Write the current query sequence to the sequence address. |
| 308 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short | 308 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short |
| 309 | // query. | 309 | // query. |
| 310 | memory_manager.Write32(sequence_address, sequence); | 310 | memory_manager.Write<u32>(sequence_address, sequence); |
| 311 | } else { | 311 | } else { |
| 312 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast | 312 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast |
| 313 | // GPU, this command may actually take a while to complete in real hardware due to GPU | 313 | // GPU, this command may actually take a while to complete in real hardware due to GPU |
| @@ -395,7 +395,7 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 395 | 395 | ||
| 396 | u8* ptr{memory_manager.GetPointer(address)}; | 396 | u8* ptr{memory_manager.GetPointer(address)}; |
| 397 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 397 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); |
| 398 | memory_manager.Write32(address, value); | 398 | memory_manager.Write<u32>(address, value); |
| 399 | 399 | ||
| 400 | dirty_flags.OnMemoryWrite(); | 400 | dirty_flags.OnMemoryWrite(); |
| 401 | 401 | ||
| @@ -447,7 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | |||
| 447 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; | 447 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; |
| 448 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { | 448 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { |
| 449 | 449 | ||
| 450 | const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)}; | 450 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)}; |
| 451 | 451 | ||
| 452 | Texture::FullTextureInfo tex_info{}; | 452 | Texture::FullTextureInfo tex_info{}; |
| 453 | // TODO(Subv): Use the shader to determine which textures are actually accessed. | 453 | // TODO(Subv): Use the shader to determine which textures are actually accessed. |
| @@ -482,7 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | |||
| 482 | 482 | ||
| 483 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | 483 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); |
| 484 | 484 | ||
| 485 | const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)}; | 485 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; |
| 486 | 486 | ||
| 487 | Texture::FullTextureInfo tex_info{}; | 487 | Texture::FullTextureInfo tex_info{}; |
| 488 | tex_info.index = static_cast<u32>(offset); | 488 | tex_info.index = static_cast<u32>(offset); |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 66c690494..267a03f2d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | 13 | #include "video_core/engines/maxwell_dma.h" |
| 14 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 15 | #include "video_core/memory_manager.h" | ||
| 15 | #include "video_core/renderer_base.h" | 16 | #include "video_core/renderer_base.h" |
| 16 | 17 | ||
| 17 | namespace Tegra { | 18 | namespace Tegra { |
| @@ -287,7 +288,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 287 | block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); | 288 | block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); |
| 288 | memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block)); | 289 | memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block)); |
| 289 | } else { | 290 | } else { |
| 290 | const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())}; | 291 | const u32 word{memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress())}; |
| 291 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | 292 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || |
| 292 | (op == GpuSemaphoreOperation::AcquireGequal && | 293 | (op == GpuSemaphoreOperation::AcquireGequal && |
| 293 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | 294 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || |
| @@ -314,11 +315,11 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 314 | } | 315 | } |
| 315 | 316 | ||
| 316 | void GPU::ProcessSemaphoreRelease() { | 317 | void GPU::ProcessSemaphoreRelease() { |
| 317 | memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release); | 318 | memory_manager->Write<u32>(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release); |
| 318 | } | 319 | } |
| 319 | 320 | ||
| 320 | void GPU::ProcessSemaphoreAcquire() { | 321 | void GPU::ProcessSemaphoreAcquire() { |
| 321 | const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress()); | 322 | const u32 word = memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress()); |
| 322 | const auto value = regs.semaphore_acquire; | 323 | const auto value = regs.semaphore_acquire; |
| 323 | if (word != value) { | 324 | if (word != value) { |
| 324 | regs.acquire_active = true; | 325 | regs.acquire_active = true; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a14b95c30..c1830ac8d 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "core/hle/service/nvflinger/buffer_queue.h" | 10 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 11 | #include "video_core/dma_pusher.h" | 11 | #include "video_core/dma_pusher.h" |
| 12 | #include "video_core/memory_manager.h" | ||
| 13 | 12 | ||
| 14 | using CacheAddr = std::uintptr_t; | 13 | using CacheAddr = std::uintptr_t; |
| 15 | inline CacheAddr ToCacheAddr(const void* host_ptr) { | 14 | inline CacheAddr ToCacheAddr(const void* host_ptr) { |
| @@ -124,6 +123,8 @@ enum class EngineID { | |||
| 124 | MAXWELL_DMA_COPY_A = 0xB0B5, | 123 | MAXWELL_DMA_COPY_A = 0xB0B5, |
| 125 | }; | 124 | }; |
| 126 | 125 | ||
| 126 | class MemoryManager; | ||
| 127 | |||
| 127 | class GPU { | 128 | class GPU { |
| 128 | public: | 129 | public: |
| 129 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); | 130 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); |
| @@ -244,9 +245,8 @@ protected: | |||
| 244 | private: | 245 | private: |
| 245 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 246 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| 246 | 247 | ||
| 247 | /// Mapping of command subchannels to their bound engine ids. | 248 | /// Mapping of command subchannels to their bound engine ids |
| 248 | std::array<EngineID, 8> bound_engines = {}; | 249 | std::array<EngineID, 8> bound_engines = {}; |
| 249 | |||
| 250 | /// 3D engine | 250 | /// 3D engine |
| 251 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | 251 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; |
| 252 | /// 2D engine | 252 | /// 2D engine |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8e8f36f28..4c7faa067 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -5,218 +5,422 @@ | |||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | ||
| 8 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| 10 | #include "video_core/gpu.h" | ||
| 9 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 12 | #include "video_core/rasterizer_interface.h" | ||
| 13 | #include "video_core/renderer_base.h" | ||
| 10 | 14 | ||
| 11 | namespace Tegra { | 15 | namespace Tegra { |
| 12 | 16 | ||
| 13 | MemoryManager::MemoryManager() { | 17 | MemoryManager::MemoryManager() { |
| 14 | // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might | 18 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); |
| 15 | // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with | 19 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), |
| 16 | // Undertale using 0 for a render target. | 20 | Common::PageType::Unmapped); |
| 17 | PageSlot(0) = static_cast<u64>(PageStatus::Reserved); | 21 | page_table.Resize(address_space_width); |
| 22 | |||
| 23 | // Initialize the map with a single free region covering the entire managed space. | ||
| 24 | VirtualMemoryArea initial_vma; | ||
| 25 | initial_vma.size = address_space_end; | ||
| 26 | vma_map.emplace(initial_vma.base, initial_vma); | ||
| 27 | |||
| 28 | UpdatePageTableForVMA(initial_vma); | ||
| 18 | } | 29 | } |
| 19 | 30 | ||
| 20 | GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { | 31 | GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { |
| 21 | const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; | 32 | const GPUVAddr gpu_addr{ |
| 33 | FindFreeRegion(address_space_base, size, align, VirtualMemoryArea::Type::Unmapped)}; | ||
| 34 | AllocateMemory(gpu_addr, 0, size); | ||
| 35 | return gpu_addr; | ||
| 36 | } | ||
| 22 | 37 | ||
| 23 | ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); | 38 | GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { |
| 39 | AllocateMemory(gpu_addr, 0, size); | ||
| 40 | return gpu_addr; | ||
| 41 | } | ||
| 24 | 42 | ||
| 25 | for (u64 offset{}; offset < size; offset += PAGE_SIZE) { | 43 | GPUVAddr MemoryManager::MapBufferEx(GPUVAddr cpu_addr, u64 size) { |
| 26 | VAddr& slot{PageSlot(*gpu_addr + offset)}; | 44 | const GPUVAddr gpu_addr{ |
| 45 | FindFreeRegion(address_space_base, size, page_size, VirtualMemoryArea::Type::Unmapped)}; | ||
| 46 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), ((size + page_mask) & ~page_mask), | ||
| 47 | cpu_addr); | ||
| 48 | return gpu_addr; | ||
| 49 | } | ||
| 27 | 50 | ||
| 28 | ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); | 51 | GPUVAddr MemoryManager::MapBufferEx(GPUVAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { |
| 52 | ASSERT((gpu_addr & page_mask) == 0); | ||
| 29 | 53 | ||
| 30 | slot = static_cast<u64>(PageStatus::Allocated); | 54 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), ((size + page_mask) & ~page_mask), |
| 31 | } | 55 | cpu_addr); |
| 32 | 56 | ||
| 33 | return *gpu_addr; | 57 | return gpu_addr; |
| 34 | } | 58 | } |
| 35 | 59 | ||
| 36 | GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { | 60 | GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { |
| 37 | for (u64 offset{}; offset < size; offset += PAGE_SIZE) { | 61 | ASSERT((gpu_addr & page_mask) == 0); |
| 38 | VAddr& slot{PageSlot(gpu_addr + offset)}; | ||
| 39 | 62 | ||
| 40 | ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); | 63 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; |
| 64 | Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr, size); | ||
| 41 | 65 | ||
| 42 | slot = static_cast<u64>(PageStatus::Allocated); | 66 | UnmapRange(gpu_addr, ((size + page_mask) & ~page_mask)); |
| 43 | } | ||
| 44 | 67 | ||
| 45 | return gpu_addr; | 68 | return gpu_addr; |
| 46 | } | 69 | } |
| 47 | 70 | ||
| 48 | GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { | 71 | GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size, u64 align, |
| 49 | const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)}; | 72 | VirtualMemoryArea::Type vma_type) { |
| 50 | 73 | ||
| 51 | ASSERT_MSG(gpu_addr, "unable to find available GPU memory"); | 74 | align = (align + page_mask) & ~page_mask; |
| 52 | 75 | ||
| 53 | for (u64 offset{}; offset < size; offset += PAGE_SIZE) { | 76 | // Find the first Free VMA. |
| 54 | VAddr& slot{PageSlot(*gpu_addr + offset)}; | 77 | const GPUVAddr base = region_start; |
| 78 | const VMAHandle vma_handle = std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) { | ||
| 79 | if (vma.second.type != vma_type) | ||
| 80 | return false; | ||
| 55 | 81 | ||
| 56 | ASSERT(slot == static_cast<u64>(PageStatus::Unmapped)); | 82 | const VAddr vma_end = vma.second.base + vma.second.size; |
| 83 | return vma_end > base && vma_end >= base + size; | ||
| 84 | }); | ||
| 57 | 85 | ||
| 58 | slot = cpu_addr + offset; | 86 | if (vma_handle == vma_map.end()) { |
| 87 | return {}; | ||
| 59 | } | 88 | } |
| 60 | 89 | ||
| 61 | const MappedRegion region{cpu_addr, *gpu_addr, size}; | 90 | return std::max(base, vma_handle->second.base); |
| 62 | mapped_regions.push_back(region); | ||
| 63 | |||
| 64 | return *gpu_addr; | ||
| 65 | } | 91 | } |
| 66 | 92 | ||
| 67 | GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { | 93 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { |
| 68 | ASSERT((gpu_addr & PAGE_MASK) == 0); | 94 | VAddr cpu_addr = page_table.backing_addr[gpu_addr >> page_bits]; |
| 69 | 95 | if (cpu_addr) { | |
| 70 | if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) { | 96 | return cpu_addr + (gpu_addr & page_mask); |
| 71 | // Page has been already mapped. In this case, we must find a new area of memory to use that | ||
| 72 | // is different than the specified one. Super Mario Odyssey hits this scenario when changing | ||
| 73 | // areas, but we do not want to overwrite the old pages. | ||
| 74 | // TODO(bunnei): We need to write a hardware test to confirm this behavior. | ||
| 75 | |||
| 76 | LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr); | ||
| 77 | |||
| 78 | const std::optional<GPUVAddr> new_gpu_addr{ | ||
| 79 | FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)}; | ||
| 80 | |||
| 81 | ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory"); | ||
| 82 | |||
| 83 | gpu_addr = *new_gpu_addr; | ||
| 84 | } | 97 | } |
| 85 | 98 | ||
| 86 | for (u64 offset{}; offset < size; offset += PAGE_SIZE) { | 99 | return {}; |
| 87 | VAddr& slot{PageSlot(gpu_addr + offset)}; | 100 | } |
| 88 | |||
| 89 | ASSERT(slot == static_cast<u64>(PageStatus::Allocated)); | ||
| 90 | 101 | ||
| 91 | slot = cpu_addr + offset; | 102 | template <typename T> |
| 103 | T MemoryManager::Read(GPUVAddr vaddr) { | ||
| 104 | const u8* page_pointer = page_table.pointers[vaddr >> page_bits]; | ||
| 105 | if (page_pointer) { | ||
| 106 | // NOTE: Avoid adding any extra logic to this fast-path block | ||
| 107 | T value; | ||
| 108 | std::memcpy(&value, &page_pointer[vaddr & page_mask], sizeof(T)); | ||
| 109 | return value; | ||
| 92 | } | 110 | } |
| 93 | 111 | ||
| 94 | const MappedRegion region{cpu_addr, gpu_addr, size}; | 112 | Common::PageType type = page_table.attributes[vaddr >> page_bits]; |
| 95 | mapped_regions.push_back(region); | 113 | switch (type) { |
| 96 | 114 | case Common::PageType::Unmapped: | |
| 97 | return gpu_addr; | 115 | LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); |
| 116 | return 0; | ||
| 117 | case Common::PageType::Memory: | ||
| 118 | ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); | ||
| 119 | break; | ||
| 120 | default: | ||
| 121 | UNREACHABLE(); | ||
| 122 | } | ||
| 123 | return {}; | ||
| 98 | } | 124 | } |
| 99 | 125 | ||
| 100 | GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | 126 | template <typename T> |
| 101 | ASSERT((gpu_addr & PAGE_MASK) == 0); | 127 | void MemoryManager::Write(GPUVAddr vaddr, T data) { |
| 128 | u8* page_pointer = page_table.pointers[vaddr >> page_bits]; | ||
| 129 | if (page_pointer) { | ||
| 130 | // NOTE: Avoid adding any extra logic to this fast-path block | ||
| 131 | std::memcpy(&page_pointer[vaddr & page_mask], &data, sizeof(T)); | ||
| 132 | return; | ||
| 133 | } | ||
| 102 | 134 | ||
| 103 | for (u64 offset{}; offset < size; offset += PAGE_SIZE) { | 135 | Common::PageType type = page_table.attributes[vaddr >> page_bits]; |
| 104 | VAddr& slot{PageSlot(gpu_addr + offset)}; | 136 | switch (type) { |
| 137 | case Common::PageType::Unmapped: | ||
| 138 | LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, | ||
| 139 | static_cast<u32>(data), vaddr); | ||
| 140 | return; | ||
| 141 | case Common::PageType::Memory: | ||
| 142 | ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); | ||
| 143 | break; | ||
| 144 | default: | ||
| 145 | UNREACHABLE(); | ||
| 146 | } | ||
| 147 | } | ||
| 105 | 148 | ||
| 106 | ASSERT(slot != static_cast<u64>(PageStatus::Allocated) && | 149 | template u8 MemoryManager::Read<u8>(GPUVAddr addr); |
| 107 | slot != static_cast<u64>(PageStatus::Unmapped)); | 150 | template u16 MemoryManager::Read<u16>(GPUVAddr addr); |
| 151 | template u32 MemoryManager::Read<u32>(GPUVAddr addr); | ||
| 152 | template u64 MemoryManager::Read<u64>(GPUVAddr addr); | ||
| 153 | template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data); | ||
| 154 | template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data); | ||
| 155 | template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data); | ||
| 156 | template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data); | ||
| 108 | 157 | ||
| 109 | slot = static_cast<u64>(PageStatus::Unmapped); | 158 | u8* MemoryManager::GetPointer(GPUVAddr addr) { |
| 159 | u8* page_pointer = page_table.pointers[addr >> page_bits]; | ||
| 160 | if (page_pointer) { | ||
| 161 | return page_pointer + (addr & page_mask); | ||
| 110 | } | 162 | } |
| 111 | 163 | ||
| 112 | // Delete the region mappings that are contained within the unmapped region | 164 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); |
| 113 | mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(), | 165 | return {}; |
| 114 | [&](const MappedRegion& region) { | ||
| 115 | return region.gpu_addr <= gpu_addr && | ||
| 116 | region.gpu_addr + region.size < gpu_addr + size; | ||
| 117 | }), | ||
| 118 | mapped_regions.end()); | ||
| 119 | return gpu_addr; | ||
| 120 | } | 166 | } |
| 121 | 167 | ||
| 122 | GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const { | 168 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) { |
| 123 | for (const auto& region : mapped_regions) { | 169 | std::memcpy(dest_buffer, GetPointer(src_addr), size); |
| 124 | const GPUVAddr region_end{region.gpu_addr + region.size}; | 170 | } |
| 125 | if (region_start >= region.gpu_addr && region_start < region_end) { | 171 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { |
| 126 | return region_end; | 172 | std::memcpy(GetPointer(dest_addr), src_buffer, size); |
| 127 | } | 173 | } |
| 128 | } | 174 | |
| 129 | return {}; | 175 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { |
| 176 | std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size); | ||
| 130 | } | 177 | } |
| 131 | 178 | ||
| 132 | std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, | 179 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, |
| 133 | PageStatus status) { | 180 | VAddr backing_addr) { |
| 134 | GPUVAddr gpu_addr{region_start}; | 181 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, |
| 135 | u64 free_space{}; | 182 | (base + size) * page_size); |
| 136 | align = (align + PAGE_MASK) & ~PAGE_MASK; | 183 | |
| 137 | 184 | VAddr end = base + size; | |
| 138 | while (gpu_addr + free_space < MAX_ADDRESS) { | 185 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |
| 139 | if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) { | 186 | base + page_table.pointers.size()); |
| 140 | free_space += PAGE_SIZE; | 187 | |
| 141 | if (free_space >= size) { | 188 | std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); |
| 142 | return gpu_addr; | 189 | |
| 143 | } | 190 | if (memory == nullptr) { |
| 144 | } else { | 191 | std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); |
| 145 | gpu_addr += free_space + PAGE_SIZE; | 192 | std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end, |
| 146 | free_space = 0; | 193 | backing_addr); |
| 147 | gpu_addr = Common::AlignUp(gpu_addr, align); | 194 | } else { |
| 195 | while (base != end) { | ||
| 196 | page_table.pointers[base] = memory; | ||
| 197 | page_table.backing_addr[base] = backing_addr; | ||
| 198 | |||
| 199 | base += 1; | ||
| 200 | memory += page_size; | ||
| 201 | backing_addr += page_size; | ||
| 148 | } | 202 | } |
| 149 | } | 203 | } |
| 204 | } | ||
| 150 | 205 | ||
| 151 | return {}; | 206 | void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) { |
| 207 | ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size); | ||
| 208 | ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base); | ||
| 209 | MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr); | ||
| 152 | } | 210 | } |
| 153 | 211 | ||
| 154 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { | 212 | void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) { |
| 155 | const VAddr base_addr{PageSlot(gpu_addr)}; | 213 | ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size); |
| 214 | ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base); | ||
| 215 | MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped); | ||
| 216 | } | ||
| 156 | 217 | ||
| 157 | if (base_addr == static_cast<u64>(PageStatus::Allocated) || | 218 | bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { |
| 158 | base_addr == static_cast<u64>(PageStatus::Unmapped) || | 219 | ASSERT(base + size == next.base); |
| 159 | base_addr == static_cast<u64>(PageStatus::Reserved)) { | 220 | if (type != next.type) { |
| 160 | return {}; | 221 | return {}; |
| 161 | } | 222 | } |
| 162 | 223 | if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) { | |
| 163 | return base_addr + (gpu_addr & PAGE_MASK); | 224 | return {}; |
| 225 | } | ||
| 226 | if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) { | ||
| 227 | return {}; | ||
| 228 | } | ||
| 229 | return true; | ||
| 164 | } | 230 | } |
| 165 | 231 | ||
| 166 | u8 MemoryManager::Read8(GPUVAddr addr) { | 232 | MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const { |
| 167 | return Memory::Read8(*GpuToCpuAddress(addr)); | 233 | if (target >= address_space_end) { |
| 234 | return vma_map.end(); | ||
| 235 | } else { | ||
| 236 | return std::prev(vma_map.upper_bound(target)); | ||
| 237 | } | ||
| 168 | } | 238 | } |
| 169 | 239 | ||
| 170 | u16 MemoryManager::Read16(GPUVAddr addr) { | 240 | MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset, |
| 171 | return Memory::Read16(*GpuToCpuAddress(addr)); | 241 | u64 size) { |
| 172 | } | ||
| 173 | 242 | ||
| 174 | u32 MemoryManager::Read32(GPUVAddr addr) { | 243 | // This is the appropriately sized VMA that will turn into our allocation. |
| 175 | return Memory::Read32(*GpuToCpuAddress(addr)); | 244 | VMAIter vma_handle = CarveVMA(target, size); |
| 176 | } | 245 | VirtualMemoryArea& final_vma = vma_handle->second; |
| 246 | ASSERT(final_vma.size == size); | ||
| 247 | |||
| 248 | final_vma.type = VirtualMemoryArea::Type::Allocated; | ||
| 249 | final_vma.offset = offset; | ||
| 250 | UpdatePageTableForVMA(final_vma); | ||
| 177 | 251 | ||
| 178 | u64 MemoryManager::Read64(GPUVAddr addr) { | 252 | return MergeAdjacent(vma_handle); |
| 179 | return Memory::Read64(*GpuToCpuAddress(addr)); | ||
| 180 | } | 253 | } |
| 181 | 254 | ||
| 182 | void MemoryManager::Write8(GPUVAddr addr, u8 data) { | 255 | MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size, |
| 183 | Memory::Write8(*GpuToCpuAddress(addr), data); | 256 | VAddr backing_addr) { |
| 257 | // This is the appropriately sized VMA that will turn into our allocation. | ||
| 258 | VMAIter vma_handle = CarveVMA(target, size); | ||
| 259 | VirtualMemoryArea& final_vma = vma_handle->second; | ||
| 260 | ASSERT(final_vma.size == size); | ||
| 261 | |||
| 262 | final_vma.type = VirtualMemoryArea::Type::Mapped; | ||
| 263 | final_vma.backing_memory = memory; | ||
| 264 | final_vma.backing_addr = backing_addr; | ||
| 265 | UpdatePageTableForVMA(final_vma); | ||
| 266 | |||
| 267 | return MergeAdjacent(vma_handle); | ||
| 184 | } | 268 | } |
| 185 | 269 | ||
| 186 | void MemoryManager::Write16(GPUVAddr addr, u16 data) { | 270 | MemoryManager::VMAIter MemoryManager::Unmap(VMAIter vma_handle) { |
| 187 | Memory::Write16(*GpuToCpuAddress(addr), data); | 271 | VirtualMemoryArea& vma = vma_handle->second; |
| 272 | vma.type = VirtualMemoryArea::Type::Allocated; | ||
| 273 | vma.offset = 0; | ||
| 274 | vma.backing_memory = nullptr; | ||
| 275 | |||
| 276 | UpdatePageTableForVMA(vma); | ||
| 277 | |||
| 278 | return MergeAdjacent(vma_handle); | ||
| 188 | } | 279 | } |
| 189 | 280 | ||
| 190 | void MemoryManager::Write32(GPUVAddr addr, u32 data) { | 281 | void MemoryManager::UnmapRange(GPUVAddr target, u64 size) { |
| 191 | Memory::Write32(*GpuToCpuAddress(addr), data); | 282 | VMAIter vma = CarveVMARange(target, size); |
| 283 | const VAddr target_end = target + size; | ||
| 284 | |||
| 285 | const VMAIter end = vma_map.end(); | ||
| 286 | // The comparison against the end of the range must be done using addresses since VMAs can be | ||
| 287 | // merged during this process, causing invalidation of the iterators. | ||
| 288 | while (vma != end && vma->second.base < target_end) { | ||
| 289 | vma = std::next(Unmap(vma)); | ||
| 290 | } | ||
| 291 | |||
| 292 | ASSERT(FindVMA(target)->second.size >= size); | ||
| 192 | } | 293 | } |
| 193 | 294 | ||
| 194 | void MemoryManager::Write64(GPUVAddr addr, u64 data) { | 295 | MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) { |
| 195 | Memory::Write64(*GpuToCpuAddress(addr), data); | 296 | // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given |
| 297 | // non-const access to its container. | ||
| 298 | return vma_map.erase(iter, iter); // Erases an empty range of elements | ||
| 196 | } | 299 | } |
| 197 | 300 | ||
| 198 | u8* MemoryManager::GetPointer(GPUVAddr addr) { | 301 | MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) { |
| 199 | return Memory::GetPointer(*GpuToCpuAddress(addr)); | 302 | ASSERT_MSG((size & Tegra::MemoryManager::page_mask) == 0, "non-page aligned size: 0x{:016X}", |
| 303 | size); | ||
| 304 | ASSERT_MSG((base & Tegra::MemoryManager::page_mask) == 0, "non-page aligned base: 0x{:016X}", | ||
| 305 | base); | ||
| 306 | |||
| 307 | VMAIter vma_handle = StripIterConstness(FindVMA(base)); | ||
| 308 | if (vma_handle == vma_map.end()) { | ||
| 309 | // Target address is outside the range managed by the kernel | ||
| 310 | return {}; | ||
| 311 | } | ||
| 312 | |||
| 313 | const VirtualMemoryArea& vma = vma_handle->second; | ||
| 314 | if (vma.type == VirtualMemoryArea::Type::Mapped) { | ||
| 315 | // Region is already allocated | ||
| 316 | return {}; | ||
| 317 | } | ||
| 318 | |||
| 319 | const VAddr start_in_vma = base - vma.base; | ||
| 320 | const VAddr end_in_vma = start_in_vma + size; | ||
| 321 | |||
| 322 | if (end_in_vma < vma.size) { | ||
| 323 | // Split VMA at the end of the allocated region | ||
| 324 | SplitVMA(vma_handle, end_in_vma); | ||
| 325 | } | ||
| 326 | if (start_in_vma != 0) { | ||
| 327 | // Split VMA at the start of the allocated region | ||
| 328 | vma_handle = SplitVMA(vma_handle, start_in_vma); | ||
| 329 | } | ||
| 330 | |||
| 331 | return vma_handle; | ||
| 200 | } | 332 | } |
| 201 | 333 | ||
| 202 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) { | 334 | MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) { |
| 203 | std::memcpy(dest_buffer, GetPointer(src_addr), size); | 335 | ASSERT_MSG((size & Tegra::MemoryManager::page_mask) == 0, "non-page aligned size: 0x{:016X}", |
| 336 | size); | ||
| 337 | ASSERT_MSG((target & Tegra::MemoryManager::page_mask) == 0, "non-page aligned base: 0x{:016X}", | ||
| 338 | target); | ||
| 339 | |||
| 340 | const VAddr target_end = target + size; | ||
| 341 | ASSERT(target_end >= target); | ||
| 342 | ASSERT(size > 0); | ||
| 343 | |||
| 344 | VMAIter begin_vma = StripIterConstness(FindVMA(target)); | ||
| 345 | const VMAIter i_end = vma_map.lower_bound(target_end); | ||
| 346 | if (std::any_of(begin_vma, i_end, [](const auto& entry) { | ||
| 347 | return entry.second.type == VirtualMemoryArea::Type::Unmapped; | ||
| 348 | })) { | ||
| 349 | return {}; | ||
| 350 | } | ||
| 351 | |||
| 352 | if (target != begin_vma->second.base) { | ||
| 353 | begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base); | ||
| 354 | } | ||
| 355 | |||
| 356 | VMAIter end_vma = StripIterConstness(FindVMA(target_end)); | ||
| 357 | if (end_vma != vma_map.end() && target_end != end_vma->second.base) { | ||
| 358 | end_vma = SplitVMA(end_vma, target_end - end_vma->second.base); | ||
| 359 | } | ||
| 360 | |||
| 361 | return begin_vma; | ||
| 204 | } | 362 | } |
| 205 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { | 363 | |
| 206 | std::memcpy(GetPointer(dest_addr), src_buffer, size); | 364 | MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { |
| 365 | VirtualMemoryArea& old_vma = vma_handle->second; | ||
| 366 | VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA | ||
| 367 | |||
| 368 | // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably | ||
| 369 | // a bug. This restriction might be removed later. | ||
| 370 | ASSERT(offset_in_vma < old_vma.size); | ||
| 371 | ASSERT(offset_in_vma > 0); | ||
| 372 | |||
| 373 | old_vma.size = offset_in_vma; | ||
| 374 | new_vma.base += offset_in_vma; | ||
| 375 | new_vma.size -= offset_in_vma; | ||
| 376 | |||
| 377 | switch (new_vma.type) { | ||
| 378 | case VirtualMemoryArea::Type::Unmapped: | ||
| 379 | break; | ||
| 380 | case VirtualMemoryArea::Type::Allocated: | ||
| 381 | new_vma.offset += offset_in_vma; | ||
| 382 | break; | ||
| 383 | case VirtualMemoryArea::Type::Mapped: | ||
| 384 | new_vma.backing_memory += offset_in_vma; | ||
| 385 | break; | ||
| 386 | } | ||
| 387 | |||
| 388 | ASSERT(old_vma.CanBeMergedWith(new_vma)); | ||
| 389 | |||
| 390 | return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma); | ||
| 207 | } | 391 | } |
| 208 | 392 | ||
| 209 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { | 393 | MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) { |
| 210 | std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size); | 394 | const VMAIter next_vma = std::next(iter); |
| 395 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { | ||
| 396 | iter->second.size += next_vma->second.size; | ||
| 397 | vma_map.erase(next_vma); | ||
| 398 | } | ||
| 399 | |||
| 400 | if (iter != vma_map.begin()) { | ||
| 401 | VMAIter prev_vma = std::prev(iter); | ||
| 402 | if (prev_vma->second.CanBeMergedWith(iter->second)) { | ||
| 403 | prev_vma->second.size += iter->second.size; | ||
| 404 | vma_map.erase(iter); | ||
| 405 | iter = prev_vma; | ||
| 406 | } | ||
| 407 | } | ||
| 408 | |||
| 409 | return iter; | ||
| 211 | } | 410 | } |
| 212 | 411 | ||
| 213 | VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { | 412 | void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { |
| 214 | auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]}; | 413 | switch (vma.type) { |
| 215 | if (!block) { | 414 | case VirtualMemoryArea::Type::Unmapped: |
| 216 | block = std::make_unique<PageBlock>(); | 415 | UnmapRegion(vma.base, vma.size); |
| 217 | block->fill(static_cast<VAddr>(PageStatus::Unmapped)); | 416 | break; |
| 417 | case VirtualMemoryArea::Type::Allocated: | ||
| 418 | MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr); | ||
| 419 | break; | ||
| 420 | case VirtualMemoryArea::Type::Mapped: | ||
| 421 | MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr); | ||
| 422 | break; | ||
| 218 | } | 423 | } |
| 219 | return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK]; | ||
| 220 | } | 424 | } |
| 221 | 425 | ||
| 222 | } // namespace Tegra | 426 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index bb87fa24d..ac1b42936 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -1,79 +1,147 @@ | |||
| 1 | // Copyright 2018 yuzu emulator team | 1 | // Copyright 2018 yuzu emulator team |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <map> |
| 8 | #include <memory> | ||
| 9 | #include <optional> | 8 | #include <optional> |
| 10 | #include <vector> | ||
| 11 | 9 | ||
| 12 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/page_table.h" | ||
| 13 | 12 | ||
| 14 | namespace Tegra { | 13 | namespace Tegra { |
| 15 | 14 | ||
| 15 | /** | ||
| 16 | * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space | ||
| 17 | * with homogeneous attributes across its extents. In this particular implementation each VMA is | ||
| 18 | * also backed by a single host memory allocation. | ||
| 19 | */ | ||
| 20 | struct VirtualMemoryArea { | ||
| 21 | enum class Type : u8 { | ||
| 22 | Unmapped, | ||
| 23 | Allocated, | ||
| 24 | Mapped, | ||
| 25 | }; | ||
| 26 | |||
| 27 | /// Virtual base address of the region. | ||
| 28 | GPUVAddr base{}; | ||
| 29 | /// Size of the region. | ||
| 30 | u64 size{}; | ||
| 31 | /// Memory area mapping type. | ||
| 32 | Type type{Type::Unmapped}; | ||
| 33 | /// CPU memory mapped address corresponding to this memory area. | ||
| 34 | VAddr backing_addr{}; | ||
| 35 | /// Offset into the backing_memory the mapping starts from. | ||
| 36 | std::size_t offset{}; | ||
| 37 | /// Pointer backing this VMA. | ||
| 38 | u8* backing_memory{}; | ||
| 39 | |||
| 40 | /// Tests if this area can be merged to the right with `next`. | ||
| 41 | bool CanBeMergedWith(const VirtualMemoryArea& next) const; | ||
| 42 | }; | ||
| 43 | |||
| 16 | class MemoryManager final { | 44 | class MemoryManager final { |
| 17 | public: | 45 | public: |
| 18 | MemoryManager(); | 46 | MemoryManager(); |
| 19 | 47 | ||
| 20 | GPUVAddr AllocateSpace(u64 size, u64 align); | 48 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| 21 | GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); | 49 | GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); |
| 22 | GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); | 50 | GPUVAddr MapBufferEx(GPUVAddr cpu_addr, u64 size); |
| 23 | GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size); | 51 | GPUVAddr MapBufferEx(GPUVAddr cpu_addr, GPUVAddr gpu_addr, u64 size); |
| 24 | GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); | 52 | GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); |
| 25 | GPUVAddr GetRegionEnd(GPUVAddr region_start) const; | ||
| 26 | std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); | 53 | std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); |
| 27 | 54 | ||
| 28 | static constexpr u64 PAGE_BITS = 16; | 55 | template <typename T> |
| 29 | static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; | 56 | T Read(GPUVAddr vaddr); |
| 30 | static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; | ||
| 31 | |||
| 32 | u8 Read8(GPUVAddr addr); | ||
| 33 | u16 Read16(GPUVAddr addr); | ||
| 34 | u32 Read32(GPUVAddr addr); | ||
| 35 | u64 Read64(GPUVAddr addr); | ||
| 36 | 57 | ||
| 37 | void Write8(GPUVAddr addr, u8 data); | 58 | template <typename T> |
| 38 | void Write16(GPUVAddr addr, u16 data); | 59 | void Write(GPUVAddr vaddr, T data); |
| 39 | void Write32(GPUVAddr addr, u32 data); | ||
| 40 | void Write64(GPUVAddr addr, u64 data); | ||
| 41 | 60 | ||
| 42 | u8* GetPointer(GPUVAddr vaddr); | 61 | u8* GetPointer(GPUVAddr vaddr); |
| 43 | 62 | ||
| 44 | void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size); | 63 | void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size); |
| 45 | void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | 64 | void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); |
| 46 | void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size); | 65 | void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); |
| 47 | 66 | ||
| 48 | private: | 67 | private: |
| 49 | enum class PageStatus : u64 { | 68 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; |
| 50 | Unmapped = 0xFFFFFFFFFFFFFFFFULL, | 69 | using VMAHandle = VMAMap::const_iterator; |
| 51 | Allocated = 0xFFFFFFFFFFFFFFFEULL, | 70 | using VMAIter = VMAMap::iterator; |
| 52 | Reserved = 0xFFFFFFFFFFFFFFFDULL, | 71 | |
| 53 | }; | 72 | void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, |
| 54 | 73 | VAddr backing_addr = 0); | |
| 55 | std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, | 74 | void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr); |
| 56 | PageStatus status); | 75 | void UnmapRegion(GPUVAddr base, u64 size); |
| 57 | VAddr& PageSlot(GPUVAddr gpu_addr); | 76 | |
| 58 | 77 | /// Finds the VMA in which the given address is included in, or `vma_map.end()`. | |
| 59 | static constexpr u64 MAX_ADDRESS{0x10000000000ULL}; | 78 | VMAHandle FindVMA(GPUVAddr target) const; |
| 60 | static constexpr u64 PAGE_TABLE_BITS{10}; | 79 | |
| 61 | static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS}; | 80 | VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size); |
| 62 | static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1}; | 81 | |
| 63 | static constexpr u64 PAGE_BLOCK_BITS{14}; | 82 | /** |
| 64 | static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS}; | 83 | * Maps an unmanaged host memory pointer at a given address. |
| 65 | static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1}; | 84 | * |
| 66 | 85 | * @param target The guest address to start the mapping at. | |
| 67 | using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>; | 86 | * @param memory The memory to be mapped. |
| 68 | std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; | 87 | * @param size Size of the mapping. |
| 69 | 88 | * @param state MemoryState tag to attach to the VMA. | |
| 70 | struct MappedRegion { | 89 | */ |
| 71 | VAddr cpu_addr; | 90 | VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr); |
| 72 | GPUVAddr gpu_addr; | 91 | |
| 73 | u64 size; | 92 | /// Unmaps a range of addresses, splitting VMAs as necessary. |
| 74 | }; | 93 | void UnmapRange(GPUVAddr target, u64 size); |
| 94 | |||
| 95 | /// Converts a VMAHandle to a mutable VMAIter. | ||
| 96 | VMAIter StripIterConstness(const VMAHandle& iter); | ||
| 97 | |||
| 98 | /// Unmaps the given VMA. | ||
| 99 | VMAIter Unmap(VMAIter vma); | ||
| 100 | |||
| 101 | /** | ||
| 102 | * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing | ||
| 103 | * the appropriate error checking. | ||
| 104 | */ | ||
| 105 | VMAIter CarveVMA(GPUVAddr base, u64 size); | ||
| 106 | |||
| 107 | /** | ||
| 108 | * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each | ||
| 109 | * end of the range. | ||
| 110 | */ | ||
| 111 | VMAIter CarveVMARange(GPUVAddr base, u64 size); | ||
| 112 | |||
| 113 | /** | ||
| 114 | * Splits a VMA in two, at the specified offset. | ||
| 115 | * @returns the right side of the split, with the original iterator becoming the left side. | ||
| 116 | */ | ||
| 117 | VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma); | ||
| 118 | |||
| 119 | /** | ||
| 120 | * Checks for and merges the specified VMA with adjacent ones if possible. | ||
| 121 | * @returns the merged VMA or the original if no merging was possible. | ||
| 122 | */ | ||
| 123 | VMAIter MergeAdjacent(VMAIter vma); | ||
| 124 | |||
| 125 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. | ||
| 126 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); | ||
| 127 | |||
| 128 | GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size, u64 align, | ||
| 129 | VirtualMemoryArea::Type vma_type); | ||
| 75 | 130 | ||
| 76 | std::vector<MappedRegion> mapped_regions; | 131 | private: |
| 132 | static constexpr u64 page_bits{16}; | ||
| 133 | static constexpr u64 page_size{1 << page_bits}; | ||
| 134 | static constexpr u64 page_mask{page_size - 1}; | ||
| 135 | |||
| 136 | /// Address space in bits, this is fairly arbitrary but sufficiently large. | ||
| 137 | static constexpr u32 address_space_width = 39; | ||
| 138 | /// Start address for mapping, this is fairly arbitrary but must be non-zero. | ||
| 139 | static constexpr GPUVAddr address_space_base = 0x100000; | ||
| 140 | /// End of address space, based on address space in bits. | ||
| 141 | static constexpr GPUVAddr address_space_end = 1ULL << address_space_width; | ||
| 142 | |||
| 143 | Common::PageTable page_table{page_bits}; | ||
| 144 | VMAMap vma_map; | ||
| 77 | }; | 145 | }; |
| 78 | 146 | ||
| 79 | } // namespace Tegra | 147 | } // namespace Tegra |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 76e292e87..d7b86df38 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/fermi_2d.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 12 | #include "video_core/memory_manager.h" | ||
| 13 | 12 | ||
| 14 | namespace VideoCore { | 13 | namespace VideoCore { |
| 15 | 14 | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index ac030cfc9..0fbfbad55 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp | |||
| @@ -76,8 +76,8 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | |||
| 76 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; | 76 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; |
| 77 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + | 77 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + |
| 78 | global_region.GetCbufOffset()}; | 78 | global_region.GetCbufOffset()}; |
| 79 | const auto actual_addr{memory_manager.Read64(addr)}; | 79 | const auto actual_addr{memory_manager.Read<u64>(addr)}; |
| 80 | const auto size{memory_manager.Read32(addr + 8)}; | 80 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 81 | 81 | ||
| 82 | // Look up global region in the cache based on address | 82 | // Look up global region in the cache based on address |
| 83 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; | 83 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 1133fa1f9..b94446428 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -610,11 +610,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 610 | // check is necessary to prevent flushing from overwriting unmapped memory. | 610 | // check is necessary to prevent flushing from overwriting unmapped memory. |
| 611 | 611 | ||
| 612 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | 612 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; |
| 613 | const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr}; | 613 | // const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr}; |
| 614 | if (cached_size_in_bytes > max_size) { | 614 | // if (cached_size_in_bytes > max_size) { |
| 615 | LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); | 615 | // LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, |
| 616 | cached_size_in_bytes = max_size; | 616 | // max_size); cached_size_in_bytes = max_size; |
| 617 | } | 617 | //} |
| 618 | 618 | ||
| 619 | cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr); | 619 | cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr); |
| 620 | } | 620 | } |