summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar bunnei2019-03-16 21:59:45 -0400
committerGravatar GitHub2019-03-16 21:59:45 -0400
commit2392e146b09c2a4b3bb557bb3a20c4afc7f75957 (patch)
tree1eafa50be7af78d74b4781fbe858277ac8ab1d35 /src/video_core/engines
parentMerge pull request #2243 from bunnei/mem-simplify-cache (diff)
parentvideo_core: Refactor to use MemoryManager interface for all memory access. (diff)
downloadyuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.gz
yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.tar.xz
yuzu-2392e146b09c2a4b3bb557bb3a20c4afc7f75957.zip
Merge pull request #2244 from bunnei/gpu-mem-refactor
video_core: Refactor to use MemoryManager interface for all memory access.
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/kepler_memory.cpp15
-rw-r--r--src/video_core/engines/maxwell_3d.cpp41
-rw-r--r--src/video_core/engines/maxwell_dma.cpp28
3 files changed, 29 insertions, 55 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index daefa43a6..0931b9626 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); 41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); 42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
43 43
44 const GPUVAddr address = regs.dest.Address();
45 const auto dest_address =
46 memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
47 ASSERT_MSG(dest_address, "Invalid GPU address");
48
49 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 44 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
50 // We do this before actually writing the new data because the destination address might contain 45 // We do this before actually writing the new data because the destination address might
51 // a dirty surface that will have to be written back to memory. 46 // contain a dirty surface that will have to be written back to memory.
52 system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), 47 const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
53 sizeof(u32)); 48 rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
49 memory_manager.Write32(address, data);
54 50
55 Memory::Write32(*dest_address, data);
56 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 51 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
57 52
58 state.write_offset++; 53 state.write_offset++;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 49979694e..c5d5be4ef 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
270} 270}
271 271
272void Maxwell3D::ProcessQueryGet() { 272void Maxwell3D::ProcessQueryGet() {
273 GPUVAddr sequence_address = regs.query.QueryAddress(); 273 const GPUVAddr sequence_address{regs.query.QueryAddress()};
274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
275 // VAddr before writing. 275 // VAddr before writing.
276 const auto address = memory_manager.GpuToCpuAddress(sequence_address);
277 ASSERT_MSG(address, "Invalid GPU address");
278 276
279 // TODO(Subv): Support the other query units. 277 // TODO(Subv): Support the other query units.
280 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 278 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
309 // Write the current query sequence to the sequence address. 307 // Write the current query sequence to the sequence address.
310 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short 308 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
311 // query. 309 // query.
312 Memory::Write32(*address, sequence); 310 memory_manager.Write32(sequence_address, sequence);
313 } else { 311 } else {
314 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast 312 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
315 // GPU, this command may actually take a while to complete in real hardware due to GPU 313 // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
318 query_result.value = result; 316 query_result.value = result;
319 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming 317 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
320 query_result.timestamp = system.CoreTiming().GetTicks(); 318 query_result.timestamp = system.CoreTiming().GetTicks();
321 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 319 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
322 } 320 }
323 dirty_flags.OnMemoryWrite(); 321 dirty_flags.OnMemoryWrite();
324 break; 322 break;
@@ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
393 // Don't allow writing past the end of the buffer. 391 // Don't allow writing past the end of the buffer.
394 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 392 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
395 393
396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 394 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
397 ASSERT_MSG(address, "Invalid GPU address");
398 395
399 u8* ptr{Memory::GetPointer(*address)}; 396 u8* ptr{memory_manager.GetPointer(address)};
400 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 397 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
401 std::memcpy(ptr, &value, sizeof(u32)); 398 memory_manager.Write32(address, value);
402 399
403 dirty_flags.OnMemoryWrite(); 400 dirty_flags.OnMemoryWrite();
404 401
@@ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
407} 404}
408 405
409Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 406Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
410 const GPUVAddr tic_base_address = regs.tic.TICAddress(); 407 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
411
412 const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
413 const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
414 ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
415 408
416 Texture::TICEntry tic_entry; 409 Texture::TICEntry tic_entry;
417 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); 410 memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
418 411
419 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || 412 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
420 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 413 tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
432} 425}
433 426
434Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 427Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
435 const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); 428 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
436
437 const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
438 const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
439 ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
440 429
441 Texture::TSCEntry tsc_entry; 430 Texture::TSCEntry tsc_entry;
442 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); 431 memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
443 return tsc_entry; 432 return tsc_entry;
444} 433}
445 434
@@ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
458 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; 447 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
459 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 448 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
460 449
461 const auto address = memory_manager.GpuToCpuAddress(current_texture); 450 const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
462 ASSERT_MSG(address, "Invalid GPU address");
463
464 const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
465 451
466 Texture::FullTextureInfo tex_info{}; 452 Texture::FullTextureInfo tex_info{};
467 // TODO(Subv): Use the shader to determine which textures are actually accessed. 453 // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
496 482
497 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); 483 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
498 484
499 const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); 485 const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
500 ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
501
502 const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
503 486
504 Texture::FullTextureInfo tex_info{}; 487 Texture::FullTextureInfo tex_info{};
505 tex_info.index = static_cast<u32>(offset); 488 tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 415a6319a..a0ded4c25 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
43 const GPUVAddr source = regs.src_address.Address(); 43 const GPUVAddr source = regs.src_address.Address();
44 const GPUVAddr dest = regs.dst_address.Address(); 44 const GPUVAddr dest = regs.dst_address.Address();
45 45
46 const auto source_cpu = memory_manager.GpuToCpuAddress(source);
47 const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
48 ASSERT_MSG(source_cpu, "Invalid source GPU address");
49 ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
50
51 // TODO(Subv): Perform more research and implement all features of this engine. 46 // TODO(Subv): Perform more research and implement all features of this engine.
52 ASSERT(regs.exec.enable_swizzle == 0); 47 ASSERT(regs.exec.enable_swizzle == 0);
53 ASSERT(regs.exec.query_mode == Regs::QueryMode::None); 48 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
70 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 65 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
71 // y_count). 66 // y_count).
72 if (!regs.exec.enable_2d) { 67 if (!regs.exec.enable_2d) {
73 Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); 68 memory_manager.CopyBlock(dest, source, regs.x_count);
74 return; 69 return;
75 } 70 }
76 71
@@ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
79 // rectangle. There is no need to manually flush/invalidate the regions because 74 // rectangle. There is no need to manually flush/invalidate the regions because
80 // CopyBlock does that for us. 75 // CopyBlock does that for us.
81 for (u32 line = 0; line < regs.y_count; ++line) { 76 for (u32 line = 0; line < regs.y_count; ++line) {
82 const VAddr source_line = *source_cpu + line * regs.src_pitch; 77 const GPUVAddr source_line = source + line * regs.src_pitch;
83 const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; 78 const GPUVAddr dest_line = dest + line * regs.dst_pitch;
84 Memory::CopyBlock(dest_line, source_line, regs.x_count); 79 memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
85 } 80 }
86 return; 81 return;
87 } 82 }
@@ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() {
90 85
91 const std::size_t copy_size = regs.x_count * regs.y_count; 86 const std::size_t copy_size = regs.x_count * regs.y_count;
92 87
88 auto source_ptr{memory_manager.GetPointer(source)};
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90
93 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 91 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
94 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 92 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
95 // copying. 93 // copying.
96 Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( 94 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
97 ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
98 95
99 // We have to invalidate the destination region to evict any outdated surfaces from the 96 // We have to invalidate the destination region to evict any outdated surfaces from the
100 // cache. We do this before actually writing the new data because the destination address 97 // cache. We do this before actually writing the new data because the destination address
101 // might contain a dirty surface that will have to be written back to memory. 98 // might contain a dirty surface that will have to be written back to memory.
102 Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( 99 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
103 ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
104 }; 100 };
105 101
106 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 102 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() {
113 copy_size * src_bytes_per_pixel); 109 copy_size * src_bytes_per_pixel);
114 110
115 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 111 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
116 regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, 112 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
117 *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, 113 regs.src_params.BlockHeight(), regs.src_params.pos_x,
118 regs.src_params.pos_y); 114 regs.src_params.pos_y);
119 } else { 115 } else {
120 ASSERT(regs.dst_params.size_z == 1); 116 ASSERT(regs.dst_params.size_z == 1);
@@ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() {
127 123
128 // If the input is linear and the output is tiled, swizzle the input and copy it over. 124 // If the input is linear and the output is tiled, swizzle the input and copy it over.
129 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 125 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
130 src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); 126 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
131 } 127 }
132} 128}
133 129