diff options
| author | 2019-03-16 21:59:45 -0400 | |
|---|---|---|
| committer | 2019-03-16 21:59:45 -0400 | |
| commit | 2392e146b09c2a4b3bb557bb3a20c4afc7f75957 (patch) | |
| tree | 1eafa50be7af78d74b4781fbe858277ac8ab1d35 /src/video_core/engines | |
| parent | Merge pull request #2243 from bunnei/mem-simplify-cache (diff) | |
| parent | video_core: Refactor to use MemoryManager interface for all memory access. (diff) | |
| download | yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.gz yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.xz yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.zip | |
Merge pull request #2244 from bunnei/gpu-mem-refactor
video_core: Refactor to use MemoryManager interface for all memory access.
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 41 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 28 |
3 files changed, 29 insertions, 55 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index daefa43a6..0931b9626 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 41 | ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | 41 | ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); |
| 42 | ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | 42 | ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); |
| 43 | 43 | ||
| 44 | const GPUVAddr address = regs.dest.Address(); | ||
| 45 | const auto dest_address = | ||
| 46 | memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32)); | ||
| 47 | ASSERT_MSG(dest_address, "Invalid GPU address"); | ||
| 48 | |||
| 49 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 44 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. |
| 50 | // We do this before actually writing the new data because the destination address might contain | 45 | // We do this before actually writing the new data because the destination address might |
| 51 | // a dirty surface that will have to be written back to memory. | 46 | // contain a dirty surface that will have to be written back to memory. |
| 52 | system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), | 47 | const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; |
| 53 | sizeof(u32)); | 48 | rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); |
| 49 | memory_manager.Write32(address, data); | ||
| 54 | 50 | ||
| 55 | Memory::Write32(*dest_address, data); | ||
| 56 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 51 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
| 57 | 52 | ||
| 58 | state.write_offset++; | 53 | state.write_offset++; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 49979694e..c5d5be4ef 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) { | |||
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | void Maxwell3D::ProcessQueryGet() { | 272 | void Maxwell3D::ProcessQueryGet() { |
| 273 | GPUVAddr sequence_address = regs.query.QueryAddress(); | 273 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 274 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application | 274 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application |
| 275 | // VAddr before writing. | 275 | // VAddr before writing. |
| 276 | const auto address = memory_manager.GpuToCpuAddress(sequence_address); | ||
| 277 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 278 | 276 | ||
| 279 | // TODO(Subv): Support the other query units. | 277 | // TODO(Subv): Support the other query units. |
| 280 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 278 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, |
| @@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 309 | // Write the current query sequence to the sequence address. | 307 | // Write the current query sequence to the sequence address. |
| 310 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short | 308 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short |
| 311 | // query. | 309 | // query. |
| 312 | Memory::Write32(*address, sequence); | 310 | memory_manager.Write32(sequence_address, sequence); |
| 313 | } else { | 311 | } else { |
| 314 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast | 312 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast |
| 315 | // GPU, this command may actually take a while to complete in real hardware due to GPU | 313 | // GPU, this command may actually take a while to complete in real hardware due to GPU |
| @@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 318 | query_result.value = result; | 316 | query_result.value = result; |
| 319 | // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming | 317 | // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming |
| 320 | query_result.timestamp = system.CoreTiming().GetTicks(); | 318 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 321 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); | 319 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 322 | } | 320 | } |
| 323 | dirty_flags.OnMemoryWrite(); | 321 | dirty_flags.OnMemoryWrite(); |
| 324 | break; | 322 | break; |
| @@ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 393 | // Don't allow writing past the end of the buffer. | 391 | // Don't allow writing past the end of the buffer. |
| 394 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 392 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); |
| 395 | 393 | ||
| 396 | const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | 394 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; |
| 397 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 398 | 395 | ||
| 399 | u8* ptr{Memory::GetPointer(*address)}; | 396 | u8* ptr{memory_manager.GetPointer(address)}; |
| 400 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 397 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); |
| 401 | std::memcpy(ptr, &value, sizeof(u32)); | 398 | memory_manager.Write32(address, value); |
| 402 | 399 | ||
| 403 | dirty_flags.OnMemoryWrite(); | 400 | dirty_flags.OnMemoryWrite(); |
| 404 | 401 | ||
| @@ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 407 | } | 404 | } |
| 408 | 405 | ||
| 409 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 406 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
| 410 | const GPUVAddr tic_base_address = regs.tic.TICAddress(); | 407 | const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; |
| 411 | |||
| 412 | const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); | ||
| 413 | const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); | ||
| 414 | ASSERT_MSG(tic_address_cpu, "Invalid GPU address"); | ||
| 415 | 408 | ||
| 416 | Texture::TICEntry tic_entry; | 409 | Texture::TICEntry tic_entry; |
| 417 | Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); | 410 | memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 418 | 411 | ||
| 419 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || | 412 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || |
| 420 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, | 413 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, |
| @@ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 432 | } | 425 | } |
| 433 | 426 | ||
| 434 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | 427 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { |
| 435 | const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); | 428 | const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; |
| 436 | |||
| 437 | const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); | ||
| 438 | const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); | ||
| 439 | ASSERT_MSG(tsc_address_cpu, "Invalid GPU address"); | ||
| 440 | 429 | ||
| 441 | Texture::TSCEntry tsc_entry; | 430 | Texture::TSCEntry tsc_entry; |
| 442 | Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); | 431 | memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); |
| 443 | return tsc_entry; | 432 | return tsc_entry; |
| 444 | } | 433 | } |
| 445 | 434 | ||
| @@ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | |||
| 458 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; | 447 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; |
| 459 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { | 448 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { |
| 460 | 449 | ||
| 461 | const auto address = memory_manager.GpuToCpuAddress(current_texture); | 450 | const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)}; |
| 462 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 463 | |||
| 464 | const Texture::TextureHandle tex_handle{Memory::Read32(*address)}; | ||
| 465 | 451 | ||
| 466 | Texture::FullTextureInfo tex_info{}; | 452 | Texture::FullTextureInfo tex_info{}; |
| 467 | // TODO(Subv): Use the shader to determine which textures are actually accessed. | 453 | // TODO(Subv): Use the shader to determine which textures are actually accessed. |
| @@ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | |||
| 496 | 482 | ||
| 497 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | 483 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); |
| 498 | 484 | ||
| 499 | const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); | 485 | const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)}; |
| 500 | ASSERT_MSG(tex_address_cpu, "Invalid GPU address"); | ||
| 501 | |||
| 502 | const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)}; | ||
| 503 | 486 | ||
| 504 | Texture::FullTextureInfo tex_info{}; | 487 | Texture::FullTextureInfo tex_info{}; |
| 505 | tex_info.index = static_cast<u32>(offset); | 488 | tex_info.index = static_cast<u32>(offset); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 415a6319a..a0ded4c25 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() { | |||
| 43 | const GPUVAddr source = regs.src_address.Address(); | 43 | const GPUVAddr source = regs.src_address.Address(); |
| 44 | const GPUVAddr dest = regs.dst_address.Address(); | 44 | const GPUVAddr dest = regs.dst_address.Address(); |
| 45 | 45 | ||
| 46 | const auto source_cpu = memory_manager.GpuToCpuAddress(source); | ||
| 47 | const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); | ||
| 48 | ASSERT_MSG(source_cpu, "Invalid source GPU address"); | ||
| 49 | ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); | ||
| 50 | |||
| 51 | // TODO(Subv): Perform more research and implement all features of this engine. | 46 | // TODO(Subv): Perform more research and implement all features of this engine. |
| 52 | ASSERT(regs.exec.enable_swizzle == 0); | 47 | ASSERT(regs.exec.enable_swizzle == 0); |
| 53 | ASSERT(regs.exec.query_mode == Regs::QueryMode::None); | 48 | ASSERT(regs.exec.query_mode == Regs::QueryMode::None); |
| @@ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 70 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, | 65 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, |
| 71 | // y_count). | 66 | // y_count). |
| 72 | if (!regs.exec.enable_2d) { | 67 | if (!regs.exec.enable_2d) { |
| 73 | Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); | 68 | memory_manager.CopyBlock(dest, source, regs.x_count); |
| 74 | return; | 69 | return; |
| 75 | } | 70 | } |
| 76 | 71 | ||
| @@ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() { | |||
| 79 | // rectangle. There is no need to manually flush/invalidate the regions because | 74 | // rectangle. There is no need to manually flush/invalidate the regions because |
| 80 | // CopyBlock does that for us. | 75 | // CopyBlock does that for us. |
| 81 | for (u32 line = 0; line < regs.y_count; ++line) { | 76 | for (u32 line = 0; line < regs.y_count; ++line) { |
| 82 | const VAddr source_line = *source_cpu + line * regs.src_pitch; | 77 | const GPUVAddr source_line = source + line * regs.src_pitch; |
| 83 | const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; | 78 | const GPUVAddr dest_line = dest + line * regs.dst_pitch; |
| 84 | Memory::CopyBlock(dest_line, source_line, regs.x_count); | 79 | memory_manager.CopyBlock(dest_line, source_line, regs.x_count); |
| 85 | } | 80 | } |
| 86 | return; | 81 | return; |
| 87 | } | 82 | } |
| @@ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() { | |||
| 90 | 85 | ||
| 91 | const std::size_t copy_size = regs.x_count * regs.y_count; | 86 | const std::size_t copy_size = regs.x_count * regs.y_count; |
| 92 | 87 | ||
| 88 | auto source_ptr{memory_manager.GetPointer(source)}; | ||
| 89 | auto dst_ptr{memory_manager.GetPointer(dest)}; | ||
| 90 | |||
| 93 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 91 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |
| 94 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 92 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated |
| 95 | // copying. | 93 | // copying. |
| 96 | Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( | 94 | rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); |
| 97 | ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size); | ||
| 98 | 95 | ||
| 99 | // We have to invalidate the destination region to evict any outdated surfaces from the | 96 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 100 | // cache. We do this before actually writing the new data because the destination address | 97 | // cache. We do this before actually writing the new data because the destination address |
| 101 | // might contain a dirty surface that will have to be written back to memory. | 98 | // might contain a dirty surface that will have to be written back to memory. |
| 102 | Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( | 99 | rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); |
| 103 | ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size); | ||
| 104 | }; | 100 | }; |
| 105 | 101 | ||
| 106 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 102 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| @@ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() { | |||
| 113 | copy_size * src_bytes_per_pixel); | 109 | copy_size * src_bytes_per_pixel); |
| 114 | 110 | ||
| 115 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | 111 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, |
| 116 | regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, | 112 | regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, |
| 117 | *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, | 113 | regs.src_params.BlockHeight(), regs.src_params.pos_x, |
| 118 | regs.src_params.pos_y); | 114 | regs.src_params.pos_y); |
| 119 | } else { | 115 | } else { |
| 120 | ASSERT(regs.dst_params.size_z == 1); | 116 | ASSERT(regs.dst_params.size_z == 1); |
| @@ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 127 | 123 | ||
| 128 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 124 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 129 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | 125 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, |
| 130 | src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); | 126 | src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); |
| 131 | } | 127 | } |
| 132 | } | 128 | } |
| 133 | 129 | ||