Merge pull request #2244 from bunnei/gpu-mem-refactor

video_core: Refactor to use MemoryManager interface for all memory access.
author: bunnei 2019-03-16 21:59:45 -0400
committer: GitHub 2019-03-16 21:59:45 -0400
commit: 2392e146b09c2a4b3bb557bb3a20c4afc7f75957 (patch)
tree: 1eafa50be7af78d74b4781fbe858277ac8ab1d35 /src/video_core/engines
parent: Merge pull request #2243 from bunnei/mem-simplify-cache (diff)
parent: video_core: Refactor to use MemoryManager interface for all memory access. (diff)
download: yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.gz
yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.xz
yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.zip
3 files changed, 29 insertions, 55 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index daefa43a6..0931b9626 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
-    const GPUVAddr address = regs.dest.Address();
-    const auto dest_address =
-        memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
-    ASSERT_MSG(dest_address, "Invalid GPU address");
    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might contain
+    // We do this before actually writing the new data because the destination address might
-    // a dirty surface that will have to be written back to memory.
+    // contain a dirty surface that will have to be written back to memory.
-    system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)),
+    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
-                                                    sizeof(u32));
+    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
+    memory_manager.Write32(address, data);
-    Memory::Write32(*dest_address, data);
    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
    state.write_offset++;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 49979694e..c5d5be4ef 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
 }
 void Maxwell3D::ProcessQueryGet() {
-    GPUVAddr sequence_address = regs.query.QueryAddress();
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
    // VAddr before writing.
-    const auto address = memory_manager.GpuToCpuAddress(sequence_address);
-    ASSERT_MSG(address, "Invalid GPU address");
    // TODO(Subv): Support the other query units.
    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
            // Write the current query sequence to the sequence address.
            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
            // query.
-            Memory::Write32(*address, sequence);
+            memory_manager.Write32(sequence_address, sequence);
        } else {
            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
            // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
            query_result.value = result;
            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
            query_result.timestamp = system.CoreTiming().GetTicks();
-            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
        break;
@@ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
    // Don't allow writing past the end of the buffer.
    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
-    const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
+    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
-    ASSERT_MSG(address, "Invalid GPU address");
-    u8* ptr{Memory::GetPointer(*address)};
+    u8* ptr{memory_manager.GetPointer(address)};
    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
-    std::memcpy(ptr, &value, sizeof(u32));
+    memory_manager.Write32(address, value);
    dirty_flags.OnMemoryWrite();
@@ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }
 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    const GPUVAddr tic_base_address = regs.tic.TICAddress();
+    const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
-    const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
-    ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
    Texture::TICEntry tic_entry;
-    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }
 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
+    const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
-    const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
-    ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
    Texture::TSCEntry tsc_entry;
-    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
    return tsc_entry;
 }
@@ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
    for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
         current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
-        const auto address = memory_manager.GpuToCpuAddress(current_texture);
+        const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
-        ASSERT_MSG(address, "Invalid GPU address");
-        const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
        Texture::FullTextureInfo tex_info{};
        // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
-    const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+    const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
-    ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
-    const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
    Texture::FullTextureInfo tex_info{};
    tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 415a6319a..a0ded4c25 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
    const GPUVAddr source = regs.src_address.Address();
    const GPUVAddr dest = regs.dst_address.Address();
-    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
-    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
-    ASSERT_MSG(source_cpu, "Invalid source GPU address");
-    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
    // TODO(Subv): Perform more research and implement all features of this engine.
    ASSERT(regs.exec.enable_swizzle == 0);
    ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
        // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
        // y_count).
        if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
+            memory_manager.CopyBlock(dest, source, regs.x_count);
            return;
        }
@@ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
        // rectangle. There is no need to manually flush/invalidate the regions because
        // CopyBlock does that for us.
        for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = *source_cpu + line * regs.src_pitch;
+            const GPUVAddr source_line = source + line * regs.src_pitch;
-            const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
+            const GPUVAddr dest_line = dest + line * regs.dst_pitch;
-            Memory::CopyBlock(dest_line, source_line, regs.x_count);
+            memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
        }
        return;
    }
@@ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() {
    const std::size_t copy_size = regs.x_count * regs.y_count;
+    auto source_ptr{memory_manager.GetPointer(source)};
+    auto dst_ptr{memory_manager.GetPointer(dest)};
    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
        // copying.
-        Core::System::GetInstance().Renderer().Rasterizer().FlushRegion(
+        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
-            ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
        // We have to invalidate the destination region to evict any outdated surfaces from the
        // cache. We do this before actually writing the new data because the destination address
        // might contain a dirty surface that will have to be written back to memory.
-        Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion(
+        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
-            ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
    };
    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() {
                           copy_size * src_bytes_per_pixel);
        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
+                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
-                                  *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
+                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                  regs.src_params.pos_y);
    } else {
        ASSERT(regs.dst_params.size_z == 1);
@@ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() {
        // If the input is linear and the output is tiled, swizzle the input and copy it over.
        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
    }
 }
author	bunnei	2019-03-16 21:59:45 -0400
committer	GitHub	2019-03-16 21:59:45 -0400
commit	2392e146b09c2a4b3bb557bb3a20c4afc7f75957 (patch)
tree	1eafa50be7af78d74b4781fbe858277ac8ab1d35 /src/video_core/engines
parent	Merge pull request #2243 from bunnei/mem-simplify-cache (diff)
parent	video_core: Refactor to use MemoryManager interface for all memory access. (diff)
download	yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.gz yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.xz yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.zip