diff options
| author | 2019-02-24 00:15:35 -0500 | |
|---|---|---|
| committer | 2019-03-16 00:38:48 -0400 | |
| commit | 574e89d924b484b846f4eb522c5a62af9d63e801 (patch) | |
| tree | 24b2d6e21b9e3aff77f2242eb94ec49a2b8e67c9 /src | |
| parent | Merge pull request #2237 from bunnei/cache-host-addr (diff) | |
| download | yuzu-574e89d924b484b846f4eb522c5a62af9d63e801.tar.gz yuzu-574e89d924b484b846f4eb522c5a62af9d63e801.tar.xz yuzu-574e89d924b484b846f4eb522c5a62af9d63e801.zip | |
video_core: Refactor to use MemoryManager interface for all memory access.
# Conflicts:
# src/video_core/engines/kepler_memory.cpp
# src/video_core/engines/maxwell_3d.cpp
# src/video_core/morton.cpp
# src/video_core/morton.h
# src/video_core/renderer_opengl/gl_global_cache.cpp
# src/video_core/renderer_opengl/gl_global_cache.h
# src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
Diffstat (limited to '')
20 files changed, 196 insertions, 189 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index bff1a37ff..8b1bea1ae 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -55,12 +55,9 @@ bool DmaPusher::Step() { | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | // Push buffer non-empty, read a word | 57 | // Push buffer non-empty, read a word |
| 58 | const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); | ||
| 59 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 60 | |||
| 61 | command_headers.resize(command_list_header.size); | 58 | command_headers.resize(command_list_header.size); |
| 62 | 59 | gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), | |
| 63 | Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32)); | 60 | command_list_header.size * sizeof(u32)); |
| 64 | 61 | ||
| 65 | for (const CommandHeader& command_header : command_headers) { | 62 | for (const CommandHeader& command_header : command_headers) { |
| 66 | 63 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index daefa43a6..0931b9626 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 41 | ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | 41 | ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); |
| 42 | ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | 42 | ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); |
| 43 | 43 | ||
| 44 | const GPUVAddr address = regs.dest.Address(); | ||
| 45 | const auto dest_address = | ||
| 46 | memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32)); | ||
| 47 | ASSERT_MSG(dest_address, "Invalid GPU address"); | ||
| 48 | |||
| 49 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 44 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. |
| 50 | // We do this before actually writing the new data because the destination address might contain | 45 | // We do this before actually writing the new data because the destination address might |
| 51 | // a dirty surface that will have to be written back to memory. | 46 | // contain a dirty surface that will have to be written back to memory. |
| 52 | system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), | 47 | const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; |
| 53 | sizeof(u32)); | 48 | rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); |
| 49 | memory_manager.Write32(address, data); | ||
| 54 | 50 | ||
| 55 | Memory::Write32(*dest_address, data); | ||
| 56 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 51 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
| 57 | 52 | ||
| 58 | state.write_offset++; | 53 | state.write_offset++; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 49979694e..c5d5be4ef 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) { | |||
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | void Maxwell3D::ProcessQueryGet() { | 272 | void Maxwell3D::ProcessQueryGet() { |
| 273 | GPUVAddr sequence_address = regs.query.QueryAddress(); | 273 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 274 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application | 274 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application |
| 275 | // VAddr before writing. | 275 | // VAddr before writing. |
| 276 | const auto address = memory_manager.GpuToCpuAddress(sequence_address); | ||
| 277 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 278 | 276 | ||
| 279 | // TODO(Subv): Support the other query units. | 277 | // TODO(Subv): Support the other query units. |
| 280 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 278 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, |
| @@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 309 | // Write the current query sequence to the sequence address. | 307 | // Write the current query sequence to the sequence address. |
| 310 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short | 308 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short |
| 311 | // query. | 309 | // query. |
| 312 | Memory::Write32(*address, sequence); | 310 | memory_manager.Write32(sequence_address, sequence); |
| 313 | } else { | 311 | } else { |
| 314 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast | 312 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast |
| 315 | // GPU, this command may actually take a while to complete in real hardware due to GPU | 313 | // GPU, this command may actually take a while to complete in real hardware due to GPU |
| @@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 318 | query_result.value = result; | 316 | query_result.value = result; |
| 319 | // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming | 317 | // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming |
| 320 | query_result.timestamp = system.CoreTiming().GetTicks(); | 318 | query_result.timestamp = system.CoreTiming().GetTicks(); |
| 321 | Memory::WriteBlock(*address, &query_result, sizeof(query_result)); | 319 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); |
| 322 | } | 320 | } |
| 323 | dirty_flags.OnMemoryWrite(); | 321 | dirty_flags.OnMemoryWrite(); |
| 324 | break; | 322 | break; |
| @@ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 393 | // Don't allow writing past the end of the buffer. | 391 | // Don't allow writing past the end of the buffer. |
| 394 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); | 392 | ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); |
| 395 | 393 | ||
| 396 | const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | 394 | const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; |
| 397 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 398 | 395 | ||
| 399 | u8* ptr{Memory::GetPointer(*address)}; | 396 | u8* ptr{memory_manager.GetPointer(address)}; |
| 400 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | 397 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); |
| 401 | std::memcpy(ptr, &value, sizeof(u32)); | 398 | memory_manager.Write32(address, value); |
| 402 | 399 | ||
| 403 | dirty_flags.OnMemoryWrite(); | 400 | dirty_flags.OnMemoryWrite(); |
| 404 | 401 | ||
| @@ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 407 | } | 404 | } |
| 408 | 405 | ||
| 409 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 406 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
| 410 | const GPUVAddr tic_base_address = regs.tic.TICAddress(); | 407 | const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; |
| 411 | |||
| 412 | const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); | ||
| 413 | const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); | ||
| 414 | ASSERT_MSG(tic_address_cpu, "Invalid GPU address"); | ||
| 415 | 408 | ||
| 416 | Texture::TICEntry tic_entry; | 409 | Texture::TICEntry tic_entry; |
| 417 | Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); | 410 | memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 418 | 411 | ||
| 419 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || | 412 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || |
| 420 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, | 413 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, |
| @@ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 432 | } | 425 | } |
| 433 | 426 | ||
| 434 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | 427 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { |
| 435 | const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); | 428 | const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; |
| 436 | |||
| 437 | const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); | ||
| 438 | const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); | ||
| 439 | ASSERT_MSG(tsc_address_cpu, "Invalid GPU address"); | ||
| 440 | 429 | ||
| 441 | Texture::TSCEntry tsc_entry; | 430 | Texture::TSCEntry tsc_entry; |
| 442 | Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); | 431 | memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); |
| 443 | return tsc_entry; | 432 | return tsc_entry; |
| 444 | } | 433 | } |
| 445 | 434 | ||
| @@ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | |||
| 458 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; | 447 | for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; |
| 459 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { | 448 | current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { |
| 460 | 449 | ||
| 461 | const auto address = memory_manager.GpuToCpuAddress(current_texture); | 450 | const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)}; |
| 462 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 463 | |||
| 464 | const Texture::TextureHandle tex_handle{Memory::Read32(*address)}; | ||
| 465 | 451 | ||
| 466 | Texture::FullTextureInfo tex_info{}; | 452 | Texture::FullTextureInfo tex_info{}; |
| 467 | // TODO(Subv): Use the shader to determine which textures are actually accessed. | 453 | // TODO(Subv): Use the shader to determine which textures are actually accessed. |
| @@ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | |||
| 496 | 482 | ||
| 497 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); | 483 | ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); |
| 498 | 484 | ||
| 499 | const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); | 485 | const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)}; |
| 500 | ASSERT_MSG(tex_address_cpu, "Invalid GPU address"); | ||
| 501 | |||
| 502 | const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)}; | ||
| 503 | 486 | ||
| 504 | Texture::FullTextureInfo tex_info{}; | 487 | Texture::FullTextureInfo tex_info{}; |
| 505 | tex_info.index = static_cast<u32>(offset); | 488 | tex_info.index = static_cast<u32>(offset); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 415a6319a..a0ded4c25 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() { | |||
| 43 | const GPUVAddr source = regs.src_address.Address(); | 43 | const GPUVAddr source = regs.src_address.Address(); |
| 44 | const GPUVAddr dest = regs.dst_address.Address(); | 44 | const GPUVAddr dest = regs.dst_address.Address(); |
| 45 | 45 | ||
| 46 | const auto source_cpu = memory_manager.GpuToCpuAddress(source); | ||
| 47 | const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); | ||
| 48 | ASSERT_MSG(source_cpu, "Invalid source GPU address"); | ||
| 49 | ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); | ||
| 50 | |||
| 51 | // TODO(Subv): Perform more research and implement all features of this engine. | 46 | // TODO(Subv): Perform more research and implement all features of this engine. |
| 52 | ASSERT(regs.exec.enable_swizzle == 0); | 47 | ASSERT(regs.exec.enable_swizzle == 0); |
| 53 | ASSERT(regs.exec.query_mode == Regs::QueryMode::None); | 48 | ASSERT(regs.exec.query_mode == Regs::QueryMode::None); |
| @@ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 70 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, | 65 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, |
| 71 | // y_count). | 66 | // y_count). |
| 72 | if (!regs.exec.enable_2d) { | 67 | if (!regs.exec.enable_2d) { |
| 73 | Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); | 68 | memory_manager.CopyBlock(dest, source, regs.x_count); |
| 74 | return; | 69 | return; |
| 75 | } | 70 | } |
| 76 | 71 | ||
| @@ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() { | |||
| 79 | // rectangle. There is no need to manually flush/invalidate the regions because | 74 | // rectangle. There is no need to manually flush/invalidate the regions because |
| 80 | // CopyBlock does that for us. | 75 | // CopyBlock does that for us. |
| 81 | for (u32 line = 0; line < regs.y_count; ++line) { | 76 | for (u32 line = 0; line < regs.y_count; ++line) { |
| 82 | const VAddr source_line = *source_cpu + line * regs.src_pitch; | 77 | const GPUVAddr source_line = source + line * regs.src_pitch; |
| 83 | const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; | 78 | const GPUVAddr dest_line = dest + line * regs.dst_pitch; |
| 84 | Memory::CopyBlock(dest_line, source_line, regs.x_count); | 79 | memory_manager.CopyBlock(dest_line, source_line, regs.x_count); |
| 85 | } | 80 | } |
| 86 | return; | 81 | return; |
| 87 | } | 82 | } |
| @@ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() { | |||
| 90 | 85 | ||
| 91 | const std::size_t copy_size = regs.x_count * regs.y_count; | 86 | const std::size_t copy_size = regs.x_count * regs.y_count; |
| 92 | 87 | ||
| 88 | auto source_ptr{memory_manager.GetPointer(source)}; | ||
| 89 | auto dst_ptr{memory_manager.GetPointer(dest)}; | ||
| 90 | |||
| 93 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 91 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |
| 94 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 92 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated |
| 95 | // copying. | 93 | // copying. |
| 96 | Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( | 94 | rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); |
| 97 | ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size); | ||
| 98 | 95 | ||
| 99 | // We have to invalidate the destination region to evict any outdated surfaces from the | 96 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 100 | // cache. We do this before actually writing the new data because the destination address | 97 | // cache. We do this before actually writing the new data because the destination address |
| 101 | // might contain a dirty surface that will have to be written back to memory. | 98 | // might contain a dirty surface that will have to be written back to memory. |
| 102 | Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( | 99 | rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); |
| 103 | ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size); | ||
| 104 | }; | 100 | }; |
| 105 | 101 | ||
| 106 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 102 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| @@ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() { | |||
| 113 | copy_size * src_bytes_per_pixel); | 109 | copy_size * src_bytes_per_pixel); |
| 114 | 110 | ||
| 115 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | 111 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, |
| 116 | regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, | 112 | regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, |
| 117 | *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, | 113 | regs.src_params.BlockHeight(), regs.src_params.pos_x, |
| 118 | regs.src_params.pos_y); | 114 | regs.src_params.pos_y); |
| 119 | } else { | 115 | } else { |
| 120 | ASSERT(regs.dst_params.size_z == 1); | 116 | ASSERT(regs.dst_params.size_z == 1); |
| @@ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 127 | 123 | ||
| 128 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 124 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 129 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | 125 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, |
| 130 | src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); | 126 | src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); |
| 131 | } | 127 | } |
| 132 | } | 128 | } |
| 133 | 129 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08abf8ac9..66c690494 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -274,7 +274,6 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 274 | const auto op = | 274 | const auto op = |
| 275 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | 275 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); |
| 276 | if (op == GpuSemaphoreOperation::WriteLong) { | 276 | if (op == GpuSemaphoreOperation::WriteLong) { |
| 277 | auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 278 | struct Block { | 277 | struct Block { |
| 279 | u32 sequence; | 278 | u32 sequence; |
| 280 | u32 zeros = 0; | 279 | u32 zeros = 0; |
| @@ -286,11 +285,9 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 286 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | 285 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of |
| 287 | // CoreTiming | 286 | // CoreTiming |
| 288 | block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); | 287 | block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); |
| 289 | Memory::WriteBlock(*address, &block, sizeof(block)); | 288 | memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block)); |
| 290 | } else { | 289 | } else { |
| 291 | const auto address = | 290 | const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())}; |
| 292 | memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 293 | const u32 word = Memory::Read32(*address); | ||
| 294 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | 291 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || |
| 295 | (op == GpuSemaphoreOperation::AcquireGequal && | 292 | (op == GpuSemaphoreOperation::AcquireGequal && |
| 296 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | 293 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || |
| @@ -317,13 +314,11 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 317 | } | 314 | } |
| 318 | 315 | ||
| 319 | void GPU::ProcessSemaphoreRelease() { | 316 | void GPU::ProcessSemaphoreRelease() { |
| 320 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | 317 | memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release); |
| 321 | Memory::Write32(*address, regs.semaphore_release); | ||
| 322 | } | 318 | } |
| 323 | 319 | ||
| 324 | void GPU::ProcessSemaphoreAcquire() { | 320 | void GPU::ProcessSemaphoreAcquire() { |
| 325 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | 321 | const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress()); |
| 326 | const u32 word = Memory::Read32(*address); | ||
| 327 | const auto value = regs.semaphore_acquire; | 322 | const auto value = regs.semaphore_acquire; |
| 328 | if (word != value) { | 323 | if (word != value) { |
| 329 | regs.acquire_active = true; | 324 | regs.acquire_active = true; |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 54abe5298..8e8f36f28 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/memory.h" | ||
| 8 | #include "video_core/memory_manager.h" | 9 | #include "video_core/memory_manager.h" |
| 9 | 10 | ||
| 10 | namespace Tegra { | 11 | namespace Tegra { |
| @@ -162,15 +163,51 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { | |||
| 162 | return base_addr + (gpu_addr & PAGE_MASK); | 163 | return base_addr + (gpu_addr & PAGE_MASK); |
| 163 | } | 164 | } |
| 164 | 165 | ||
| 165 | std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { | 166 | u8 MemoryManager::Read8(GPUVAddr addr) { |
| 166 | std::vector<GPUVAddr> results; | 167 | return Memory::Read8(*GpuToCpuAddress(addr)); |
| 167 | for (const auto& region : mapped_regions) { | 168 | } |
| 168 | if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { | 169 | |
| 169 | const u64 offset{cpu_addr - region.cpu_addr}; | 170 | u16 MemoryManager::Read16(GPUVAddr addr) { |
| 170 | results.push_back(region.gpu_addr + offset); | 171 | return Memory::Read16(*GpuToCpuAddress(addr)); |
| 171 | } | 172 | } |
| 172 | } | 173 | |
| 173 | return results; | 174 | u32 MemoryManager::Read32(GPUVAddr addr) { |
| 175 | return Memory::Read32(*GpuToCpuAddress(addr)); | ||
| 176 | } | ||
| 177 | |||
| 178 | u64 MemoryManager::Read64(GPUVAddr addr) { | ||
| 179 | return Memory::Read64(*GpuToCpuAddress(addr)); | ||
| 180 | } | ||
| 181 | |||
| 182 | void MemoryManager::Write8(GPUVAddr addr, u8 data) { | ||
| 183 | Memory::Write8(*GpuToCpuAddress(addr), data); | ||
| 184 | } | ||
| 185 | |||
| 186 | void MemoryManager::Write16(GPUVAddr addr, u16 data) { | ||
| 187 | Memory::Write16(*GpuToCpuAddress(addr), data); | ||
| 188 | } | ||
| 189 | |||
| 190 | void MemoryManager::Write32(GPUVAddr addr, u32 data) { | ||
| 191 | Memory::Write32(*GpuToCpuAddress(addr), data); | ||
| 192 | } | ||
| 193 | |||
| 194 | void MemoryManager::Write64(GPUVAddr addr, u64 data) { | ||
| 195 | Memory::Write64(*GpuToCpuAddress(addr), data); | ||
| 196 | } | ||
| 197 | |||
| 198 | u8* MemoryManager::GetPointer(GPUVAddr addr) { | ||
| 199 | return Memory::GetPointer(*GpuToCpuAddress(addr)); | ||
| 200 | } | ||
| 201 | |||
| 202 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) { | ||
| 203 | std::memcpy(dest_buffer, GetPointer(src_addr), size); | ||
| 204 | } | ||
| 205 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { | ||
| 206 | std::memcpy(GetPointer(dest_addr), src_buffer, size); | ||
| 207 | } | ||
| 208 | |||
| 209 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { | ||
| 210 | std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size); | ||
| 174 | } | 211 | } |
| 175 | 212 | ||
| 176 | VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { | 213 | VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index fb03497ca..425e2f31c 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -27,12 +27,27 @@ public: | |||
| 27 | GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); | 27 | GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); |
| 28 | GPUVAddr GetRegionEnd(GPUVAddr region_start) const; | 28 | GPUVAddr GetRegionEnd(GPUVAddr region_start) const; |
| 29 | std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); | 29 | std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); |
| 30 | std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const; | ||
| 31 | 30 | ||
| 32 | static constexpr u64 PAGE_BITS = 16; | 31 | static constexpr u64 PAGE_BITS = 16; |
| 33 | static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; | 32 | static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; |
| 34 | static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; | 33 | static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; |
| 35 | 34 | ||
| 35 | u8 Read8(GPUVAddr addr); | ||
| 36 | u16 Read16(GPUVAddr addr); | ||
| 37 | u32 Read32(GPUVAddr addr); | ||
| 38 | u64 Read64(GPUVAddr addr); | ||
| 39 | |||
| 40 | void Write8(GPUVAddr addr, u8 data); | ||
| 41 | void Write16(GPUVAddr addr, u16 data); | ||
| 42 | void Write32(GPUVAddr addr, u32 data); | ||
| 43 | void Write64(GPUVAddr addr, u64 data); | ||
| 44 | |||
| 45 | u8* GetPointer(GPUVAddr vaddr); | ||
| 46 | |||
| 47 | void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size); | ||
| 48 | void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | ||
| 49 | void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size); | ||
| 50 | |||
| 36 | private: | 51 | private: |
| 37 | enum class PageStatus : u64 { | 52 | enum class PageStatus : u64 { |
| 38 | Unmapped = 0xFFFFFFFFFFFFFFFFULL, | 53 | Unmapped = 0xFFFFFFFFFFFFFFFFULL, |
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 9692ce143..3e91cbc83 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "core/memory.h" | ||
| 10 | #include "video_core/morton.h" | 9 | #include "video_core/morton.h" |
| 11 | #include "video_core/surface.h" | 10 | #include "video_core/surface.h" |
| 12 | #include "video_core/textures/decoders.h" | 11 | #include "video_core/textures/decoders.h" |
| @@ -16,12 +15,12 @@ namespace VideoCore { | |||
| 16 | using Surface::GetBytesPerPixel; | 15 | using Surface::GetBytesPerPixel; |
| 17 | using Surface::PixelFormat; | 16 | using Surface::PixelFormat; |
| 18 | 17 | ||
| 19 | using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, VAddr); | 18 | using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); |
| 20 | using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; | 19 | using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; |
| 21 | 20 | ||
| 22 | template <bool morton_to_linear, PixelFormat format> | 21 | template <bool morton_to_linear, PixelFormat format> |
| 23 | static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, | 22 | static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, |
| 24 | u32 tile_width_spacing, u8* buffer, VAddr addr) { | 23 | u32 tile_width_spacing, u8* buffer, u8* addr) { |
| 25 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | 24 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); |
| 26 | 25 | ||
| 27 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | 26 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual |
| @@ -34,10 +33,10 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth | |||
| 34 | stride, height, depth, block_height, block_depth, | 33 | stride, height, depth, block_height, block_depth, |
| 35 | tile_width_spacing); | 34 | tile_width_spacing); |
| 36 | } else { | 35 | } else { |
| 37 | Tegra::Texture::CopySwizzledData( | 36 | Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, |
| 38 | (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, | 37 | (height + tile_size_y - 1) / tile_size_y, depth, |
| 39 | depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, | 38 | bytes_per_pixel, bytes_per_pixel, addr, buffer, false, |
| 40 | block_height, block_depth, tile_width_spacing); | 39 | block_height, block_depth, tile_width_spacing); |
| 41 | } | 40 | } |
| 42 | } | 41 | } |
| 43 | 42 | ||
| @@ -282,7 +281,7 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | |||
| 282 | 281 | ||
| 283 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | 282 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, |
| 284 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | 283 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |
| 285 | u8* buffer, VAddr addr) { | 284 | u8* buffer, u8* addr) { |
| 286 | GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, | 285 | GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, |
| 287 | tile_width_spacing, buffer, addr); | 286 | tile_width_spacing, buffer, addr); |
| 288 | } | 287 | } |
diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b565204b5..ee5b45555 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h | |||
| @@ -13,7 +13,7 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; | |||
| 13 | 13 | ||
| 14 | void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, | 14 | void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, |
| 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, | 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, |
| 16 | u8* buffer, VAddr addr); | 16 | u8* buffer, u8* addr); |
| 17 | 17 | ||
| 18 | void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, | 18 | void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, |
| 19 | u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); | 19 | u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index a4eea61a6..5048ed6ce 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -24,14 +24,12 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | |||
| 24 | GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, | 24 | GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, |
| 25 | std::size_t alignment, bool cache) { | 25 | std::size_t alignment, bool cache) { |
| 26 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | 26 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |
| 27 | const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; | ||
| 28 | ASSERT_MSG(cpu_addr, "Invalid GPU address"); | ||
| 29 | 27 | ||
| 30 | // Cache management is a big overhead, so only cache entries with a given size. | 28 | // Cache management is a big overhead, so only cache entries with a given size. |
| 31 | // TODO: Figure out which size is the best for given games. | 29 | // TODO: Figure out which size is the best for given games. |
| 32 | cache &= size >= 2048; | 30 | cache &= size >= 2048; |
| 33 | 31 | ||
| 34 | const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; | 32 | const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; |
| 35 | if (cache) { | 33 | if (cache) { |
| 36 | auto entry = TryGet(host_ptr); | 34 | auto entry = TryGet(host_ptr); |
| 37 | if (entry) { | 35 | if (entry) { |
| @@ -54,8 +52,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size | |||
| 54 | buffer_offset += size; | 52 | buffer_offset += size; |
| 55 | 53 | ||
| 56 | if (cache) { | 54 | if (cache) { |
| 57 | auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, | 55 | auto entry = std::make_shared<CachedBufferEntry>( |
| 58 | alignment, host_ptr); | 56 | *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); |
| 59 | Register(entry); | 57 | Register(entry); |
| 60 | } | 58 | } |
| 61 | 59 | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index a2c509c24..c8dbcacbd 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/memory.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_global_cache.h" | 10 | #include "video_core/renderer_opengl/gl_global_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -39,7 +38,7 @@ void CachedGlobalRegion::Reload(u32 size_) { | |||
| 39 | glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); | 38 | glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); |
| 40 | } | 39 | } |
| 41 | 40 | ||
| 42 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { |
| 43 | const auto search{reserve.find(addr)}; | 42 | const auto search{reserve.find(addr)}; |
| 44 | if (search == reserve.end()) { | 43 | if (search == reserve.end()) { |
| 45 | return {}; | 44 | return {}; |
| @@ -47,11 +46,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 | |||
| 47 | return search->second; | 46 | return search->second; |
| 48 | } | 47 | } |
| 49 | 48 | ||
| 50 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) { | 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, |
| 51 | GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; | 50 | u8* host_ptr) { |
| 51 | GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; | ||
| 52 | if (!region) { | 52 | if (!region) { |
| 53 | // No reserved surface available, create a new one and reserve it | 53 | // No reserved surface available, create a new one and reserve it |
| 54 | region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr); | 54 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; |
| 55 | const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); | ||
| 56 | region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); | ||
| 55 | ReserveGlobalRegion(region); | 57 | ReserveGlobalRegion(region); |
| 56 | } | 58 | } |
| 57 | region->Reload(size); | 59 | region->Reload(size); |
| @@ -59,7 +61,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si | |||
| 59 | } | 61 | } |
| 60 | 62 | ||
| 61 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | 63 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { |
| 62 | reserve.insert_or_assign(region->GetCpuAddr(), std::move(region)); | 64 | reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); |
| 63 | } | 65 | } |
| 64 | 66 | ||
| 65 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | 67 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) |
| @@ -70,23 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | |||
| 70 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | 72 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { |
| 71 | 73 | ||
| 72 | auto& gpu{Core::System::GetInstance().GPU()}; | 74 | auto& gpu{Core::System::GetInstance().GPU()}; |
| 73 | const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; | 75 | auto& memory_manager{gpu.MemoryManager()}; |
| 74 | const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( | 76 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; |
| 75 | cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); | 77 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + |
| 76 | ASSERT(cbuf_addr); | 78 | global_region.GetCbufOffset()}; |
| 77 | 79 | const auto actual_addr{memory_manager.Read64(addr)}; | |
| 78 | const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); | 80 | const auto size{memory_manager.Read32(addr + 8)}; |
| 79 | const auto size = Memory::Read32(*cbuf_addr + 8); | ||
| 80 | const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu); | ||
| 81 | ASSERT(actual_addr); | ||
| 82 | 81 | ||
| 83 | // Look up global region in the cache based on address | 82 | // Look up global region in the cache based on address |
| 84 | const auto& host_ptr{Memory::GetPointer(*actual_addr)}; | 83 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; |
| 85 | GlobalRegion region{TryGet(host_ptr)}; | 84 | GlobalRegion region{TryGet(host_ptr)}; |
| 86 | 85 | ||
| 87 | if (!region) { | 86 | if (!region) { |
| 88 | // No global region found - create a new one | 87 | // No global region found - create a new one |
| 89 | region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr); | 88 | region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); |
| 90 | Register(region); | 89 | Register(region); |
| 91 | } | 90 | } |
| 92 | 91 | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index e497a0619..a840491f7 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h | |||
| @@ -65,11 +65,11 @@ public: | |||
| 65 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | 65 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); |
| 66 | 66 | ||
| 67 | private: | 67 | private: |
| 68 | GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; | 68 | GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; |
| 69 | GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr); | 69 | GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr); |
| 70 | void ReserveGlobalRegion(GlobalRegion region); | 70 | void ReserveGlobalRegion(GlobalRegion region); |
| 71 | 71 | ||
| 72 | std::unordered_map<VAddr, GlobalRegion> reserve; | 72 | std::unordered_map<CacheAddr, GlobalRegion> reserve; |
| 73 | }; | 73 | }; |
| 74 | 74 | ||
| 75 | } // namespace OpenGL | 75 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp index 77d5cedd2..75d816795 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp | |||
| @@ -46,10 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size | |||
| 46 | auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); | 46 | auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); |
| 47 | 47 | ||
| 48 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | 48 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |
| 49 | const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; | 49 | const u8* source{memory_manager.GetPointer(gpu_addr)}; |
| 50 | ASSERT_MSG(cpu_addr, "Invalid GPU address"); | ||
| 51 | |||
| 52 | const u8* source{Memory::GetPointer(*cpu_addr)}; | ||
| 53 | 50 | ||
| 54 | for (u32 primitive = 0; primitive < count / 4; ++primitive) { | 51 | for (u32 primitive = 0; primitive < count / 4; ++primitive) { |
| 55 | for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { | 52 | for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { |
| @@ -64,4 +61,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size | |||
| 64 | return index_offset; | 61 | return index_offset; |
| 65 | } | 62 | } |
| 66 | 63 | ||
| 67 | } // namespace OpenGL \ No newline at end of file | 64 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 451de00e8..57329cd61 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -57,11 +57,9 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) { | |||
| 57 | 57 | ||
| 58 | void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { | 58 | void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { |
| 59 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | 59 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; |
| 60 | const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)}; | ||
| 61 | 60 | ||
| 62 | addr = cpu_addr ? *cpu_addr : 0; | ||
| 63 | gpu_addr = gpu_addr_; | 61 | gpu_addr = gpu_addr_; |
| 64 | host_ptr = Memory::GetPointer(addr); | 62 | host_ptr = memory_manager.GetPointer(gpu_addr_); |
| 65 | size_in_bytes = SizeInBytesRaw(); | 63 | size_in_bytes = SizeInBytesRaw(); |
| 66 | 64 | ||
| 67 | if (IsPixelFormatASTC(pixel_format)) { | 65 | if (IsPixelFormatASTC(pixel_format)) { |
| @@ -447,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, | |||
| 447 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | 445 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |
| 448 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | 446 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |
| 449 | params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, | 447 | params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, |
| 450 | gl_buffer.data() + offset_gl, params.addr + offset); | 448 | gl_buffer.data() + offset_gl, params.host_ptr + offset); |
| 451 | offset += layer_size; | 449 | offset += layer_size; |
| 452 | offset_gl += gl_size; | 450 | offset_gl += gl_size; |
| 453 | } | 451 | } |
| @@ -456,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, | |||
| 456 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | 454 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |
| 457 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | 455 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |
| 458 | params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, | 456 | params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, |
| 459 | gl_buffer.data(), params.addr + offset); | 457 | gl_buffer.data(), params.host_ptr + offset); |
| 460 | } | 458 | } |
| 461 | } | 459 | } |
| 462 | 460 | ||
| @@ -514,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac | |||
| 514 | "reinterpretation but the texture is tiled."); | 512 | "reinterpretation but the texture is tiled."); |
| 515 | } | 513 | } |
| 516 | const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; | 514 | const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; |
| 517 | 515 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | |
| 518 | glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, | 516 | glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, |
| 519 | Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); | 517 | memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes)); |
| 520 | } | 518 | } |
| 521 | 519 | ||
| 522 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | 520 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); |
| @@ -604,7 +602,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 604 | 602 | ||
| 605 | ApplyTextureDefaults(texture.handle, params.max_mip_level); | 603 | ApplyTextureDefaults(texture.handle, params.max_mip_level); |
| 606 | 604 | ||
| 607 | OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); | 605 | OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); |
| 608 | 606 | ||
| 609 | // Clamp size to mapped GPU memory region | 607 | // Clamp size to mapped GPU memory region |
| 610 | // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 | 608 | // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 |
| @@ -617,6 +615,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 617 | LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); | 615 | LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); |
| 618 | cached_size_in_bytes = max_size; | 616 | cached_size_in_bytes = max_size; |
| 619 | } | 617 | } |
| 618 | |||
| 619 | cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr); | ||
| 620 | } | 620 | } |
| 621 | 621 | ||
| 622 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | 622 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); |
| @@ -925,7 +925,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { | |||
| 925 | } | 925 | } |
| 926 | 926 | ||
| 927 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { | 927 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { |
| 928 | if (params.addr == 0 || params.height * params.width == 0) { | 928 | if (params.gpu_addr == 0 || params.height * params.width == 0) { |
| 929 | return {}; | 929 | return {}; |
| 930 | } | 930 | } |
| 931 | 931 | ||
| @@ -979,14 +979,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | |||
| 979 | const Surface& dst_surface) { | 979 | const Surface& dst_surface) { |
| 980 | const auto& init_params{src_surface->GetSurfaceParams()}; | 980 | const auto& init_params{src_surface->GetSurfaceParams()}; |
| 981 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | 981 | const auto& dst_params{dst_surface->GetSurfaceParams()}; |
| 982 | VAddr address = init_params.addr; | 982 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; |
| 983 | const std::size_t layer_size = dst_params.LayerMemorySize(); | 983 | Tegra::GPUVAddr address{init_params.gpu_addr}; |
| 984 | const std::size_t layer_size{dst_params.LayerMemorySize()}; | ||
| 984 | for (u32 layer = 0; layer < dst_params.depth; layer++) { | 985 | for (u32 layer = 0; layer < dst_params.depth; layer++) { |
| 985 | for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { | 986 | for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { |
| 986 | const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); | 987 | const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)}; |
| 987 | const Surface& copy = TryGet(Memory::GetPointer(sub_address)); | 988 | const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))}; |
| 988 | if (!copy) | 989 | if (!copy) { |
| 989 | continue; | 990 | continue; |
| 991 | } | ||
| 990 | const auto& src_params{copy->GetSurfaceParams()}; | 992 | const auto& src_params{copy->GetSurfaceParams()}; |
| 991 | const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; | 993 | const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; |
| 992 | const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; | 994 | const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; |
| @@ -1242,9 +1244,10 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar | |||
| 1242 | return {}; | 1244 | return {}; |
| 1243 | } | 1245 | } |
| 1244 | 1246 | ||
| 1245 | static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { | 1247 | static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params, |
| 1246 | const std::size_t size = params.LayerMemorySize(); | 1248 | u32 mipmap) { |
| 1247 | VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); | 1249 | const std::size_t size{params.LayerMemorySize()}; |
| 1250 | Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)}; | ||
| 1248 | for (u32 i = 0; i < params.depth; i++) { | 1251 | for (u32 i = 0; i < params.depth; i++) { |
| 1249 | if (start == addr) { | 1252 | if (start == addr) { |
| 1250 | return {i}; | 1253 | return {i}; |
| @@ -1266,7 +1269,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa | |||
| 1266 | src_params.height == dst_params.MipHeight(*level) && | 1269 | src_params.height == dst_params.MipHeight(*level) && |
| 1267 | src_params.block_height >= dst_params.MipBlockHeight(*level)) { | 1270 | src_params.block_height >= dst_params.MipBlockHeight(*level)) { |
| 1268 | const std::optional<u32> slot = | 1271 | const std::optional<u32> slot = |
| 1269 | TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level); | 1272 | TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level); |
| 1270 | if (slot.has_value()) { | 1273 | if (slot.has_value()) { |
| 1271 | glCopyImageSubData(render_surface->Texture().handle, | 1274 | glCopyImageSubData(render_surface->Texture().handle, |
| 1272 | SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, | 1275 | SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index b3afad139..9366f47f2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -296,7 +296,6 @@ struct SurfaceParams { | |||
| 296 | bool is_array; | 296 | bool is_array; |
| 297 | bool srgb_conversion; | 297 | bool srgb_conversion; |
| 298 | // Parameters used for caching | 298 | // Parameters used for caching |
| 299 | VAddr addr; | ||
| 300 | u8* host_ptr; | 299 | u8* host_ptr; |
| 301 | Tegra::GPUVAddr gpu_addr; | 300 | Tegra::GPUVAddr gpu_addr; |
| 302 | std::size_t size_in_bytes; | 301 | std::size_t size_in_bytes; |
| @@ -349,7 +348,7 @@ public: | |||
| 349 | explicit CachedSurface(const SurfaceParams& params); | 348 | explicit CachedSurface(const SurfaceParams& params); |
| 350 | 349 | ||
| 351 | VAddr GetCpuAddr() const override { | 350 | VAddr GetCpuAddr() const override { |
| 352 | return params.addr; | 351 | return cpu_addr; |
| 353 | } | 352 | } |
| 354 | 353 | ||
| 355 | std::size_t GetSizeInBytes() const override { | 354 | std::size_t GetSizeInBytes() const override { |
| @@ -433,6 +432,7 @@ private: | |||
| 433 | std::size_t memory_size; | 432 | std::size_t memory_size; |
| 434 | bool reinterpreted = false; | 433 | bool reinterpreted = false; |
| 435 | bool must_reload = false; | 434 | bool must_reload = false; |
| 435 | VAddr cpu_addr{}; | ||
| 436 | }; | 436 | }; |
| 437 | 437 | ||
| 438 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | 438 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 60a04e146..1ed740877 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -32,13 +32,10 @@ struct UnspecializedShader { | |||
| 32 | namespace { | 32 | namespace { |
| 33 | 33 | ||
| 34 | /// Gets the address for the specified shader stage program | 34 | /// Gets the address for the specified shader stage program |
| 35 | VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 35 | Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 36 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 36 | const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; |
| 37 | const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; | 37 | const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; |
| 38 | const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + | 38 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; |
| 39 | shader_config.offset); | ||
| 40 | ASSERT_MSG(address, "Invalid GPU address"); | ||
| 41 | return *address; | ||
| 42 | } | 39 | } |
| 43 | 40 | ||
| 44 | /// Gets the shader program code from memory for the specified address | 41 | /// Gets the shader program code from memory for the specified address |
| @@ -214,11 +211,11 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 214 | 211 | ||
| 215 | } // namespace | 212 | } // namespace |
| 216 | 213 | ||
| 217 | CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | 214 | CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, |
| 218 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 215 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 219 | const PrecompiledPrograms& precompiled_programs, | 216 | const PrecompiledPrograms& precompiled_programs, |
| 220 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) | 217 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) |
| 221 | : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier}, | 218 | : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, |
| 222 | program_type{program_type}, disk_cache{disk_cache}, | 219 | program_type{program_type}, disk_cache{disk_cache}, |
| 223 | precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { | 220 | precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { |
| 224 | 221 | ||
| @@ -244,11 +241,11 @@ CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | |||
| 244 | disk_cache.SaveRaw(raw); | 241 | disk_cache.SaveRaw(raw); |
| 245 | } | 242 | } |
| 246 | 243 | ||
| 247 | CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | 244 | CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, |
| 248 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 245 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 249 | const PrecompiledPrograms& precompiled_programs, | 246 | const PrecompiledPrograms& precompiled_programs, |
| 250 | GLShader::ProgramResult result, u8* host_ptr) | 247 | GLShader::ProgramResult result, u8* host_ptr) |
| 251 | : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type}, | 248 | : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type}, |
| 252 | disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ | 249 | disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ |
| 253 | host_ptr} { | 250 | host_ptr} { |
| 254 | 251 | ||
| @@ -273,7 +270,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive | |||
| 273 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | 270 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |
| 274 | } | 271 | } |
| 275 | 272 | ||
| 276 | LabelGLObject(GL_PROGRAM, program->handle, guest_addr); | 273 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |
| 277 | } | 274 | } |
| 278 | 275 | ||
| 279 | handle = program->handle; | 276 | handle = program->handle; |
| @@ -325,7 +322,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind | |||
| 325 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | 322 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |
| 326 | } | 323 | } |
| 327 | 324 | ||
| 328 | LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name); | 325 | LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); |
| 329 | 326 | ||
| 330 | return target_program->handle; | 327 | return target_program->handle; |
| 331 | }; | 328 | }; |
| @@ -488,31 +485,31 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 488 | return last_shaders[static_cast<u32>(program)]; | 485 | return last_shaders[static_cast<u32>(program)]; |
| 489 | } | 486 | } |
| 490 | 487 | ||
| 491 | const VAddr program_addr{GetShaderAddress(program)}; | 488 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; |
| 489 | const Tegra::GPUVAddr program_addr{GetShaderAddress(program)}; | ||
| 492 | 490 | ||
| 493 | // Look up shader in the cache based on address | 491 | // Look up shader in the cache based on address |
| 494 | const auto& host_ptr{Memory::GetPointer(program_addr)}; | 492 | const auto& host_ptr{memory_manager.GetPointer(program_addr)}; |
| 495 | Shader shader{TryGet(host_ptr)}; | 493 | Shader shader{TryGet(host_ptr)}; |
| 496 | 494 | ||
| 497 | if (!shader) { | 495 | if (!shader) { |
| 498 | // No shader found - create a new one | 496 | // No shader found - create a new one |
| 499 | const auto& host_ptr{Memory::GetPointer(program_addr)}; | ||
| 500 | ProgramCode program_code{GetShaderCode(host_ptr)}; | 497 | ProgramCode program_code{GetShaderCode(host_ptr)}; |
| 501 | ProgramCode program_code_b; | 498 | ProgramCode program_code_b; |
| 502 | if (program == Maxwell::ShaderProgram::VertexA) { | 499 | if (program == Maxwell::ShaderProgram::VertexA) { |
| 503 | program_code_b = GetShaderCode( | 500 | program_code_b = GetShaderCode( |
| 504 | Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); | 501 | memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); |
| 505 | } | 502 | } |
| 506 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 503 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); |
| 507 | 504 | const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | |
| 508 | const auto found = precompiled_shaders.find(unique_identifier); | 505 | const auto found = precompiled_shaders.find(unique_identifier); |
| 509 | if (found != precompiled_shaders.end()) { | 506 | if (found != precompiled_shaders.end()) { |
| 510 | shader = | 507 | shader = |
| 511 | std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, | 508 | std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, |
| 512 | precompiled_programs, found->second, host_ptr); | 509 | precompiled_programs, found->second, host_ptr); |
| 513 | } else { | 510 | } else { |
| 514 | shader = std::make_shared<CachedShader>( | 511 | shader = std::make_shared<CachedShader>( |
| 515 | program_addr, unique_identifier, program, disk_cache, precompiled_programs, | 512 | cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, |
| 516 | std::move(program_code), std::move(program_code_b), host_ptr); | 513 | std::move(program_code), std::move(program_code_b), host_ptr); |
| 517 | } | 514 | } |
| 518 | Register(shader); | 515 | Register(shader); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 81fe716b4..fd1c85115 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | |||
| 39 | 39 | ||
| 40 | class CachedShader final : public RasterizerCacheObject { | 40 | class CachedShader final : public RasterizerCacheObject { |
| 41 | public: | 41 | public: |
| 42 | explicit CachedShader(VAddr guest_addr, u64 unique_identifier, | 42 | explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, |
| 43 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 43 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 44 | const PrecompiledPrograms& precompiled_programs, | 44 | const PrecompiledPrograms& precompiled_programs, |
| 45 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); | 45 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); |
| 46 | 46 | ||
| 47 | explicit CachedShader(VAddr guest_addr, u64 unique_identifier, | 47 | explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, |
| 48 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 48 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 49 | const PrecompiledPrograms& precompiled_programs, | 49 | const PrecompiledPrograms& precompiled_programs, |
| 50 | GLShader::ProgramResult result, u8* host_ptr); | 50 | GLShader::ProgramResult result, u8* host_ptr); |
| 51 | 51 | ||
| 52 | VAddr GetCpuAddr() const override { | 52 | VAddr GetCpuAddr() const override { |
| 53 | return guest_addr; | 53 | return cpu_addr; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | std::size_t GetSizeInBytes() const override { | 56 | std::size_t GetSizeInBytes() const override { |
| @@ -92,7 +92,7 @@ private: | |||
| 92 | ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; | 92 | ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; |
| 93 | 93 | ||
| 94 | u8* host_ptr{}; | 94 | u8* host_ptr{}; |
| 95 | VAddr guest_addr{}; | 95 | VAddr cpu_addr{}; |
| 96 | u64 unique_identifier{}; | 96 | u64 unique_identifier{}; |
| 97 | Maxwell::ShaderProgram program_type{}; | 97 | Maxwell::ShaderProgram program_type{}; |
| 98 | ShaderDiskCacheOpenGL& disk_cache; | 98 | ShaderDiskCacheOpenGL& disk_cache; |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index cad7340f5..995d0e068 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "core/memory.h" | ||
| 10 | #include "video_core/gpu.h" | 9 | #include "video_core/gpu.h" |
| 11 | #include "video_core/textures/decoders.h" | 10 | #include "video_core/textures/decoders.h" |
| 12 | #include "video_core/textures/texture.h" | 11 | #include "video_core/textures/texture.h" |
| @@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) { | |||
| 230 | } | 229 | } |
| 231 | } | 230 | } |
| 232 | 231 | ||
| 233 | void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, | 232 | void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, |
| 234 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, | 233 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, |
| 235 | u32 block_depth, u32 width_spacing) { | 234 | u32 block_depth, u32 width_spacing) { |
| 236 | CopySwizzledData((width + tile_size_x - 1) / tile_size_x, | 235 | CopySwizzledData((width + tile_size_x - 1) / tile_size_x, |
| 237 | (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, | 236 | (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, |
| 238 | bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, | 237 | bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, |
| 239 | block_height, block_depth, width_spacing); | 238 | width_spacing); |
| 240 | } | 239 | } |
| 241 | 240 | ||
| 242 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, | 241 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, |
| 243 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 242 | u32 width, u32 height, u32 depth, u32 block_height, |
| 244 | u32 block_height, u32 block_depth, u32 width_spacing) { | 243 | u32 block_depth, u32 width_spacing) { |
| 245 | std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); | 244 | std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); |
| 246 | UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, | 245 | UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, |
| 247 | width, height, depth, block_height, block_depth, width_spacing); | 246 | width, height, depth, block_height, block_depth, width_spacing); |
| @@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y | |||
| 249 | } | 248 | } |
| 250 | 249 | ||
| 251 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 250 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 252 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 251 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { |
| 253 | u32 block_height) { | ||
| 254 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 252 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 255 | gob_size_x}; | 253 | gob_size_x}; |
| 256 | for (u32 line = 0; line < subrect_height; ++line) { | 254 | for (u32 line = 0; line < subrect_height; ++line) { |
| @@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 262 | const u32 gob_address = | 260 | const u32 gob_address = |
| 263 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 261 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; |
| 264 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | 262 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; |
| 265 | const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 263 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |
| 266 | const VAddr dest_addr = swizzled_data + swizzled_offset; | 264 | u8* dest_addr = swizzled_data + swizzled_offset; |
| 267 | 265 | ||
| 268 | Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel); | 266 | std::memcpy(dest_addr, source_line, bytes_per_pixel); |
| 269 | } | 267 | } |
| 270 | } | 268 | } |
| 271 | } | 269 | } |
| 272 | 270 | ||
| 273 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 271 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 274 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 272 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 275 | u32 block_height, u32 offset_x, u32 offset_y) { | 273 | u32 offset_x, u32 offset_y) { |
| 276 | for (u32 line = 0; line < subrect_height; ++line) { | 274 | for (u32 line = 0; line < subrect_height; ++line) { |
| 277 | const u32 y2 = line + offset_y; | 275 | const u32 y2 = line + offset_y; |
| 278 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 276 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + |
| @@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 282 | const u32 x2 = (x + offset_x) * bytes_per_pixel; | 280 | const u32 x2 = (x + offset_x) * bytes_per_pixel; |
| 283 | const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; | 281 | const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; |
| 284 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; | 282 | const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; |
| 285 | const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; | 283 | u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; |
| 286 | const VAddr source_addr = swizzled_data + swizzled_offset; | 284 | u8* source_addr = swizzled_data + swizzled_offset; |
| 287 | 285 | ||
| 288 | Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel); | 286 | std::memcpy(dest_line, source_addr, bytes_per_pixel); |
| 289 | } | 287 | } |
| 290 | } | 288 | } |
| 291 | } | 289 | } |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 65df86890..e078fa274 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -17,14 +17,14 @@ inline std::size_t GetGOBSize() { | |||
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /// Unswizzles a swizzled texture without changing its format. | 19 | /// Unswizzles a swizzled texture without changing its format. |
| 20 | void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, | 20 | void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, |
| 21 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 21 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 22 | u32 block_height = TICEntry::DefaultBlockHeight, | 22 | u32 block_height = TICEntry::DefaultBlockHeight, |
| 23 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | 23 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); |
| 24 | 24 | ||
| 25 | /// Unswizzles a swizzled texture without changing its format. | 25 | /// Unswizzles a swizzled texture without changing its format. |
| 26 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, | 26 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, |
| 27 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 27 | u32 width, u32 height, u32 depth, |
| 28 | u32 block_height = TICEntry::DefaultBlockHeight, | 28 | u32 block_height = TICEntry::DefaultBlockHeight, |
| 29 | u32 block_depth = TICEntry::DefaultBlockHeight, | 29 | u32 block_depth = TICEntry::DefaultBlockHeight, |
| 30 | u32 width_spacing = 0); | 30 | u32 width_spacing = 0); |
| @@ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 44 | 44 | ||
| 45 | /// Copies an untiled subrectangle into a tiled surface. | 45 | /// Copies an untiled subrectangle into a tiled surface. |
| 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 47 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 47 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); |
| 48 | u32 block_height); | ||
| 49 | 48 | ||
| 50 | /// Copies a tiled subrectangle into a linear surface. | 49 | /// Copies a tiled subrectangle into a linear surface. |
| 51 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 50 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 52 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 53 | u32 block_height, u32 offset_x, u32 offset_y); | 52 | u32 offset_x, u32 offset_y); |
| 54 | 53 | ||
| 55 | } // namespace Tegra::Texture | 54 | } // namespace Tegra::Texture |
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp index 71683da8e..29f01dfb2 100644 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ b/src/yuzu/debugger/graphics/graphics_surface.cpp | |||
| @@ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() { | |||
| 383 | // TODO: Implement a good way to visualize alpha components! | 383 | // TODO: Implement a good way to visualize alpha components! |
| 384 | 384 | ||
| 385 | QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); | 385 | QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); |
| 386 | std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address); | ||
| 387 | 386 | ||
| 388 | // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. | 387 | // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. |
| 389 | // Needs to be fixed if we plan to use this feature more, otherwise we may remove it. | 388 | // Needs to be fixed if we plan to use this feature more, otherwise we may remove it. |
| 390 | auto unswizzled_data = Tegra::Texture::UnswizzleTexture( | 389 | auto unswizzled_data = Tegra::Texture::UnswizzleTexture( |
| 391 | *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width, | 390 | gpu.MemoryManager().GetPointer(surface_address), 1, 1, |
| 392 | surface_height, 1U); | 391 | Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U); |
| 393 | 392 | ||
| 394 | auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, | 393 | auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, |
| 395 | surface_width, surface_height); | 394 | surface_width, surface_height); |