summaryrefslogtreecommitdiff
path: root/src/core/hw/gpu.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/hw/gpu.cpp')
-rw-r--r--src/core/hw/gpu.cpp151
1 files changed, 86 insertions, 65 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index a4dfb7e43..0e6b91e3a 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -5,34 +5,26 @@
5#include <cstring> 5#include <cstring>
6#include <numeric> 6#include <numeric>
7#include <type_traits> 7#include <type_traits>
8
9#include "common/color.h" 8#include "common/color.h"
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "common/logging/log.h" 10#include "common/logging/log.h"
12#include "common/microprofile.h" 11#include "common/microprofile.h"
13#include "common/vector_math.h" 12#include "common/vector_math.h"
14
15#include "core/settings.h"
16#include "core/memory.h"
17#include "core/core_timing.h" 13#include "core/core_timing.h"
18
19#include "core/hle/service/gsp_gpu.h" 14#include "core/hle/service/gsp_gpu.h"
20#include "core/hle/service/hid/hid.h" 15#include "core/hle/service/hid/hid.h"
21
22#include "core/hw/hw.h"
23#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
24 17#include "core/hw/hw.h"
18#include "core/memory.h"
19#include "core/settings.h"
25#include "core/tracer/recorder.h" 20#include "core/tracer/recorder.h"
26
27#include "video_core/command_processor.h" 21#include "video_core/command_processor.h"
22#include "video_core/debug_utils/debug_utils.h"
28#include "video_core/rasterizer_interface.h" 23#include "video_core/rasterizer_interface.h"
29#include "video_core/renderer_base.h" 24#include "video_core/renderer_base.h"
30#include "video_core/utils.h" 25#include "video_core/utils.h"
31#include "video_core/video_core.h" 26#include "video_core/video_core.h"
32 27
33#include "video_core/debug_utils/debug_utils.h"
34
35
36namespace GPU { 28namespace GPU {
37 29
38Regs g_regs; 30Regs g_regs;
@@ -49,7 +41,7 @@ static u64 frame_count;
49static bool last_skip_frame; 41static bool last_skip_frame;
50 42
51template <typename T> 43template <typename T>
52inline void Read(T &var, const u32 raw_addr) { 44inline void Read(T& var, const u32 raw_addr) {
53 u32 addr = raw_addr - HW::VADDR_GPU; 45 u32 addr = raw_addr - HW::VADDR_GPU;
54 u32 index = addr / 4; 46 u32 index = addr / 4;
55 47
@@ -105,8 +97,7 @@ inline void Write(u32 addr, const T data) {
105 97
106 // Memory fills are triggered once the fill value is written. 98 // Memory fills are triggered once the fill value is written.
107 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): 99 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
108 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): 100 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
109 {
110 const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); 101 const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
111 auto& config = g_regs.memory_fill_config[is_second_filler]; 102 auto& config = g_regs.memory_fill_config[is_second_filler];
112 103
@@ -125,7 +116,9 @@ inline void Write(u32 addr, const T data) {
125 // regions that were between surfaces or within the touching 116 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here. 117 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { 118 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); 119 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
120 config.GetEndAddress() -
121 config.GetStartAddress());
129 122
130 if (config.fill_24bit) { 123 if (config.fill_24bit) {
131 // fill with 24-bit values 124 // fill with 24-bit values
@@ -150,7 +143,8 @@ inline void Write(u32 addr, const T data) {
150 } 143 }
151 } 144 }
152 145
153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 146 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(),
147 config.GetEndAddress());
154 148
155 if (!is_second_filler) { 149 if (!is_second_filler) {
156 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); 150 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
@@ -167,15 +161,15 @@ inline void Write(u32 addr, const T data) {
167 break; 161 break;
168 } 162 }
169 163
170 case GPU_REG_INDEX(display_transfer_config.trigger): 164 case GPU_REG_INDEX(display_transfer_config.trigger): {
171 {
172 MICROPROFILE_SCOPE(GPU_DisplayTransfer); 165 MICROPROFILE_SCOPE(GPU_DisplayTransfer);
173 166
174 const auto& config = g_regs.display_transfer_config; 167 const auto& config = g_regs.display_transfer_config;
175 if (config.trigger & 1) { 168 if (config.trigger & 1) {
176 169
177 if (Pica::g_debug_context) 170 if (Pica::g_debug_context)
178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 171 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
172 nullptr);
179 173
180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) { 174 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 175 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
@@ -187,17 +181,23 @@ inline void Write(u32 addr, const T data) {
187 u32 output_width = config.texture_copy.output_width * 16; 181 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16; 182 u32 output_gap = config.texture_copy.output_gap * 16;
189 183
190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); 184 size_t contiguous_input_size =
191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size)); 185 config.texture_copy.size / input_width * (input_width + input_gap);
186 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
187 static_cast<u32>(contiguous_input_size));
192 188
193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 189 size_t contiguous_output_size =
194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size)); 190 config.texture_copy.size / output_width * (output_width + output_gap);
191 Memory::RasterizerFlushAndInvalidateRegion(
192 config.GetPhysicalOutputAddress(),
193 static_cast<u32>(contiguous_output_size));
195 194
196 u32 remaining_size = config.texture_copy.size; 195 u32 remaining_size = config.texture_copy.size;
197 u32 remaining_input = input_width; 196 u32 remaining_input = input_width;
198 u32 remaining_output = output_width; 197 u32 remaining_output = output_width;
199 while (remaining_size > 0) { 198 while (remaining_size > 0) {
200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); 199 u32 copy_size =
200 std::min({remaining_input, remaining_output, remaining_size});
201 201
202 std::memcpy(dst_pointer, src_pointer, copy_size); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
203 src_pointer += copy_size; 203 src_pointer += copy_size;
@@ -217,10 +217,11 @@ inline void Write(u32 addr, const T data) {
217 } 217 }
218 } 218 }
219 219
220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 220 LOG_TRACE(
221 config.texture_copy.size, 221 HW_GPU,
222 config.GetPhysicalInputAddress(), input_width, input_gap, 222 "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
223 config.GetPhysicalOutputAddress(), output_width, output_gap, 223 config.texture_copy.size, config.GetPhysicalInputAddress(), input_width,
224 input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap,
224 config.flags); 225 config.flags);
225 226
226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 227 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
@@ -228,7 +229,8 @@ inline void Write(u32 addr, const T data) {
228 } 229 }
229 230
230 if (config.scaling > config.ScaleXY) { 231 if (config.scaling > config.ScaleXY) {
231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 232 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u",
233 config.scaling.Value());
232 UNIMPLEMENTED(); 234 UNIMPLEMENTED();
233 break; 235 break;
234 } 236 }
@@ -245,11 +247,14 @@ inline void Write(u32 addr, const T data) {
245 u32 output_width = config.output_width >> horizontal_scale; 247 u32 output_width = config.output_width >> horizontal_scale;
246 u32 output_height = config.output_height >> vertical_scale; 248 u32 output_height = config.output_height >> vertical_scale;
247 249
248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 250 u32 input_size = config.input_width * config.input_height *
249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 251 GPU::Regs::BytesPerPixel(config.input_format);
252 u32 output_size =
253 output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
250 254
251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); 255 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); 256 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
257 output_size);
253 258
254 for (u32 y = 0; y < output_height; ++y) { 259 for (u32 y = 0; y < output_height; ++y) {
255 for (u32 x = 0; x < output_width; ++x) { 260 for (u32 x = 0; x < output_width; ++x) {
@@ -278,11 +283,14 @@ inline void Write(u32 addr, const T data) {
278 u32 coarse_y = y & ~7; 283 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel; 284 u32 stride = output_width * dst_bytes_per_pixel;
280 285
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 286 src_offset =
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 287 (input_x + input_y * config.input_width) * src_bytes_per_pixel;
288 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
289 coarse_y * stride;
283 } else { 290 } else {
284 // Both input and output are linear 291 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 292 src_offset =
293 (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 294 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 } 295 }
288 } else { 296 } else {
@@ -291,7 +299,9 @@ inline void Write(u32 addr, const T data) {
291 u32 coarse_y = input_y & ~7; 299 u32 coarse_y = input_y & ~7;
292 u32 stride = config.input_width * src_bytes_per_pixel; 300 u32 stride = config.input_width * src_bytes_per_pixel;
293 301
294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 302 src_offset = VideoCore::GetMortonOffset(input_x, input_y,
303 src_bytes_per_pixel) +
304 coarse_y * stride;
295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 305 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
296 } else { 306 } else {
297 // Both input and output are tiled 307 // Both input and output are tiled
@@ -301,20 +311,27 @@ inline void Write(u32 addr, const T data) {
301 u32 in_coarse_y = input_y & ~7; 311 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel; 312 u32 in_stride = config.input_width * src_bytes_per_pixel;
303 313
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 314 src_offset = VideoCore::GetMortonOffset(input_x, input_y,
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 315 src_bytes_per_pixel) +
316 in_coarse_y * in_stride;
317 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
318 out_coarse_y * out_stride;
306 } 319 }
307 } 320 }
308 321
309 const u8* src_pixel = src_pointer + src_offset; 322 const u8* src_pixel = src_pointer + src_offset;
310 src_color = DecodePixel(config.input_format, src_pixel); 323 src_color = DecodePixel(config.input_format, src_pixel);
311 if (config.scaling == config.ScaleX) { 324 if (config.scaling == config.ScaleX) {
312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 325 Math::Vec4<u8> pixel =
326 DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
313 src_color = ((src_color + pixel) / 2).Cast<u8>(); 327 src_color = ((src_color + pixel) / 2).Cast<u8>();
314 } else if (config.scaling == config.ScaleXY) { 328 } else if (config.scaling == config.ScaleXY) {
315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 329 Math::Vec4<u8> pixel1 = DecodePixel(
316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 330 config.input_format, src_pixel + 1 * src_bytes_per_pixel);
317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 331 Math::Vec4<u8> pixel2 = DecodePixel(
332 config.input_format, src_pixel + 2 * src_bytes_per_pixel);
333 Math::Vec4<u8> pixel3 = DecodePixel(
334 config.input_format, src_pixel + 3 * src_bytes_per_pixel);
318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 335 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
319 } 336 }
320 337
@@ -341,17 +358,20 @@ inline void Write(u32 addr, const T data) {
341 break; 358 break;
342 359
343 default: 360 default:
344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 361 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x",
362 config.output_format.Value());
345 break; 363 break;
346 } 364 }
347 } 365 }
348 } 366 }
349 367
350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 368 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> "
351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 369 "0x%08x(%ux%u), dst format %x, flags 0x%08X",
352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 370 config.output_height * output_width *
353 config.GetPhysicalOutputAddress(), output_width, output_height, 371 GPU::Regs::BytesPerPixel(config.output_format),
354 config.output_format.Value(), config.flags); 372 config.GetPhysicalInputAddress(), config.input_width.Value(),
373 config.input_height.Value(), config.GetPhysicalOutputAddress(),
374 output_width, output_height, config.output_format.Value(), config.flags);
355 } 375 }
356 376
357 g_regs.display_transfer_config.trigger = 0; 377 g_regs.display_transfer_config.trigger = 0;
@@ -361,17 +381,16 @@ inline void Write(u32 addr, const T data) {
361 } 381 }
362 382
363 // Seems like writing to this register triggers processing 383 // Seems like writing to this register triggers processing
364 case GPU_REG_INDEX(command_processor_config.trigger): 384 case GPU_REG_INDEX(command_processor_config.trigger): {
365 {
366 const auto& config = g_regs.command_processor_config; 385 const auto& config = g_regs.command_processor_config;
367 if (config.trigger & 1) 386 if (config.trigger & 1) {
368 {
369 MICROPROFILE_SCOPE(GPU_CmdlistProcessing); 387 MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
370 388
371 u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); 389 u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
372 390
373 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 391 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
374 Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); 392 Pica::g_debug_context->recorder->MemoryAccessed(
393 (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
375 } 394 }
376 395
377 Pica::CommandProcessor::ProcessCommandList(buffer, config.size); 396 Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
@@ -389,16 +408,17 @@ inline void Write(u32 addr, const T data) {
389 // This is happening *after* handling the write to make sure we properly catch all memory reads. 408 // This is happening *after* handling the write to make sure we properly catch all memory reads.
390 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 409 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
391 // addr + GPU VBase - IO VBase + IO PBase 410 // addr + GPU VBase - IO VBase + IO PBase
392 Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); 411 Pica::g_debug_context->recorder->RegisterWritten<T>(
412 addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
393 } 413 }
394} 414}
395 415
396// Explicitly instantiate template functions because we aren't defining this in the header: 416// Explicitly instantiate template functions because we aren't defining this in the header:
397 417
398template void Read<u64>(u64 &var, const u32 addr); 418template void Read<u64>(u64& var, const u32 addr);
399template void Read<u32>(u32 &var, const u32 addr); 419template void Read<u32>(u32& var, const u32 addr);
400template void Read<u16>(u16 &var, const u32 addr); 420template void Read<u16>(u16& var, const u32 addr);
401template void Read<u8>(u8 &var, const u32 addr); 421template void Read<u8>(u8& var, const u32 addr);
402 422
403template void Write<u64>(u32 addr, const u64 data); 423template void Write<u64>(u32 addr, const u64 data);
404template void Write<u32>(u32 addr, const u32 data); 424template void Write<u32>(u32 addr, const u32 data);
@@ -417,8 +437,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
417 // - If frameskip == 0 (disabled), always swap buffers 437 // - If frameskip == 0 (disabled), always swap buffers
418 // - If frameskip == 1, swap buffers every other frame (starting from the first frame) 438 // - If frameskip == 1, swap buffers every other frame (starting from the first frame)
419 // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) 439 // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame)
420 if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || 440 if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) &&
421 Settings::values.frame_skip == 0) { 441 last_skip_frame != g_skip_frame) ||
442 Settings::values.frame_skip == 0) {
422 VideoCore::g_renderer->SwapBuffers(); 443 VideoCore::g_renderer->SwapBuffers();
423 } 444 }
424 445
@@ -448,12 +469,12 @@ void Init() {
448 // .. or at least these are the ones used by system applets. 469 // .. or at least these are the ones used by system applets.
449 // There's probably a smarter way to come up with addresses 470 // There's probably a smarter way to come up with addresses
450 // like this which does not require hardcoding. 471 // like this which does not require hardcoding.
451 framebuffer_top.address_left1 = 0x181E6000; 472 framebuffer_top.address_left1 = 0x181E6000;
452 framebuffer_top.address_left2 = 0x1822C800; 473 framebuffer_top.address_left2 = 0x1822C800;
453 framebuffer_top.address_right1 = 0x18273000; 474 framebuffer_top.address_right1 = 0x18273000;
454 framebuffer_top.address_right2 = 0x182B9800; 475 framebuffer_top.address_right2 = 0x182B9800;
455 framebuffer_sub.address_left1 = 0x1848F000; 476 framebuffer_sub.address_left1 = 0x1848F000;
456 framebuffer_sub.address_left2 = 0x184C7800; 477 framebuffer_sub.address_left2 = 0x184C7800;
457 478
458 framebuffer_top.width.Assign(240); 479 framebuffer_top.width.Assign(240);
459 framebuffer_top.height.Assign(400); 480 framebuffer_top.height.Assign(400);