summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
m---------externals/boost0
-rw-r--r--src/citra/config.cpp1
-rw-r--r--src/citra/default_ini.h4
-rw-r--r--src/citra_qt/config.cpp2
-rw-r--r--src/citra_qt/configure_general.cpp2
-rw-r--r--src/citra_qt/configure_general.ui7
-rw-r--r--src/core/hle/service/fs/archive.cpp1
-rw-r--r--src/core/hle/service/gsp_gpu.cpp31
-rw-r--r--src/core/hle/service/y2r_u.cpp10
-rw-r--r--src/core/hw/gpu.cpp327
-rw-r--r--src/core/hw/gpu.h4
-rw-r--r--src/core/memory.cpp140
-rw-r--r--src/core/memory.h16
-rw-r--r--src/core/settings.cpp2
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp4
-rw-r--r--src/video_core/pica.h2
-rw-r--r--src/video_core/rasterizer_interface.h31
-rw-r--r--src/video_core/renderer_base.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp839
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h76
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp699
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h209
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_state.h27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp128
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h44
-rw-r--r--src/video_core/swrasterizer.h6
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/video_core/video_core.h1
30 files changed, 1739 insertions, 941 deletions
diff --git a/externals/boost b/externals/boost
Subproject d81b9269900ae183d0dc98403eea4c971590a80 Subproject 2dcb9d979665b6aabb1635c617973e02914e60e
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 6b6617352..9e2ecd307 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -65,6 +65,7 @@ void Config::ReadValues() {
65 // Renderer 65 // Renderer
66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false); 66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false);
67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); 67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
68 Settings::values.use_scaled_resolution = sdl2_config->GetBoolean("Renderer", "use_scaled_resolution", false);
68 69
69 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0); 70 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0);
70 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0); 71 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0);
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index c9b490a00..1f1aa716b 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -46,6 +46,10 @@ use_hw_renderer =
46# 0 : Interpreter (slow), 1 (default): JIT (fast) 46# 0 : Interpreter (slow), 1 (default): JIT (fast)
47use_shader_jit = 47use_shader_jit =
48 48
49# Whether to use native 3DS screen resolution or to scale rendering resolution to the displayed screen size.
50# 0 (default): Native, 1: Scaled
51use_scaled_resolution =
52
49# The clear color for the renderer. What shows up on the sides of the bottom screen. 53# The clear color for the renderer. What shows up on the sides of the bottom screen.
50# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 54# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
51bg_red = 55bg_red =
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index e363be38a..7dc61fe40 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -45,6 +45,7 @@ void Config::ReadValues() {
45 qt_config->beginGroup("Renderer"); 45 qt_config->beginGroup("Renderer");
46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); 46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool();
47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); 47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
48 Settings::values.use_scaled_resolution = qt_config->value("use_scaled_resolution", false).toBool();
48 49
49 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); 50 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat();
50 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); 51 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat();
@@ -129,6 +130,7 @@ void Config::SaveValues() {
129 qt_config->beginGroup("Renderer"); 130 qt_config->beginGroup("Renderer");
130 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); 131 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
131 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); 132 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
133 qt_config->setValue("use_scaled_resolution", Settings::values.use_scaled_resolution);
132 134
133 // Cast to double because Qt's written float values are not human-readable 135 // Cast to double because Qt's written float values are not human-readable
134 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 136 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/citra_qt/configure_general.cpp b/src/citra_qt/configure_general.cpp
index a27d0d26c..62648e665 100644
--- a/src/citra_qt/configure_general.cpp
+++ b/src/citra_qt/configure_general.cpp
@@ -25,6 +25,7 @@ void ConfigureGeneral::setConfiguration() {
25 ui->region_combobox->setCurrentIndex(Settings::values.region_value); 25 ui->region_combobox->setCurrentIndex(Settings::values.region_value);
26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer); 26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit); 27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit);
28 ui->toogle_scaled_resolution->setChecked(Settings::values.use_scaled_resolution);
28} 29}
29 30
30void ConfigureGeneral::applyConfiguration() { 31void ConfigureGeneral::applyConfiguration() {
@@ -33,5 +34,6 @@ void ConfigureGeneral::applyConfiguration() {
33 Settings::values.region_value = ui->region_combobox->currentIndex(); 34 Settings::values.region_value = ui->region_combobox->currentIndex();
34 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked(); 35 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked();
35 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked(); 36 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked();
37 Settings::values.use_scaled_resolution = ui->toogle_scaled_resolution->isChecked();
36 Settings::Apply(); 38 Settings::Apply();
37} 39}
diff --git a/src/citra_qt/configure_general.ui b/src/citra_qt/configure_general.ui
index 47184c5c6..5eb309793 100644
--- a/src/citra_qt/configure_general.ui
+++ b/src/citra_qt/configure_general.ui
@@ -128,6 +128,13 @@
128 </property> 128 </property>
129 </widget> 129 </widget>
130 </item> 130 </item>
131 <item>
132 <widget class="QCheckBox" name="toogle_scaled_resolution">
133 <property name="text">
134 <string>Enable scaled resolution</string>
135 </property>
136 </widget>
137 </item>
131 </layout> 138 </layout>
132 </item> 139 </item>
133 </layout> 140 </layout>
diff --git a/src/core/hle/service/fs/archive.cpp b/src/core/hle/service/fs/archive.cpp
index e9588cb72..cc51ede0c 100644
--- a/src/core/hle/service/fs/archive.cpp
+++ b/src/core/hle/service/fs/archive.cpp
@@ -114,6 +114,7 @@ ResultVal<bool> File::SyncRequest() {
114 return read.Code(); 114 return read.Code();
115 } 115 }
116 cmd_buff[2] = static_cast<u32>(*read); 116 cmd_buff[2] = static_cast<u32>(*read);
117 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(address), length);
117 break; 118 break;
118 } 119 }
119 120
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 0c655395e..211fcf599 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/bit_field.h" 5#include "common/bit_field.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "common/profiler.h"
7 8
8#include "core/memory.h" 9#include "core/memory.h"
9#include "core/hle/kernel/event.h" 10#include "core/hle/kernel/event.h"
@@ -15,8 +16,6 @@
15 16
16#include "video_core/gpu_debugger.h" 17#include "video_core/gpu_debugger.h"
17#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20 19
21#include "gsp_gpu.h" 20#include "gsp_gpu.h"
22 21
@@ -291,8 +290,6 @@ static void FlushDataCache(Service::Interface* self) {
291 u32 size = cmd_buff[2]; 290 u32 size = cmd_buff[2];
292 u32 process = cmd_buff[4]; 291 u32 process = cmd_buff[4];
293 292
294 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(address), size);
295
296 // TODO(purpasmart96): Verify return header on HW 293 // TODO(purpasmart96): Verify return header on HW
297 294
298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 295 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -408,6 +405,8 @@ void SignalInterrupt(InterruptId interrupt_id) {
408 g_interrupt_event->Signal(); 405 g_interrupt_event->Signal();
409} 406}
410 407
408MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
409
411/// Executes the next GSP command 410/// Executes the next GSP command
412static void ExecuteCommand(const Command& command, u32 thread_id) { 411static void ExecuteCommand(const Command& command, u32 thread_id) {
413 // Utility function to convert register ID to address 412 // Utility function to convert register ID to address
@@ -419,18 +418,21 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
419 418
420 // GX request DMA - typically used for copying memory from GSP heap to VRAM 419 // GX request DMA - typically used for copying memory from GSP heap to VRAM
421 case CommandId::REQUEST_DMA: 420 case CommandId::REQUEST_DMA:
422 VideoCore::g_renderer->Rasterizer()->FlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address), 421 {
423 command.dma_request.size); 422 MICROPROFILE_SCOPE(GPU_GSP_DMA);
423
424 // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever possible/likely
425 Memory::RasterizerFlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address),
426 command.dma_request.size);
427 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
428 command.dma_request.size);
424 429
425 memcpy(Memory::GetPointer(command.dma_request.dest_address), 430 memcpy(Memory::GetPointer(command.dma_request.dest_address),
426 Memory::GetPointer(command.dma_request.source_address), 431 Memory::GetPointer(command.dma_request.source_address),
427 command.dma_request.size); 432 command.dma_request.size);
428 SignalInterrupt(InterruptId::DMA); 433 SignalInterrupt(InterruptId::DMA);
429
430 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
431 command.dma_request.size);
432 break; 434 break;
433 435 }
434 // TODO: This will need some rework in the future. (why?) 436 // TODO: This will need some rework in the future. (why?)
435 case CommandId::SUBMIT_GPU_CMDLIST: 437 case CommandId::SUBMIT_GPU_CMDLIST:
436 { 438 {
@@ -517,13 +519,8 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
517 519
518 case CommandId::CACHE_FLUSH: 520 case CommandId::CACHE_FLUSH:
519 { 521 {
520 for (auto& region : command.cache_flush.regions) { 522 // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
521 if (region.size == 0) 523 // Use command.cache_flush.regions to implement this handler
522 break;
523
524 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
525 Memory::VirtualToPhysicalAddress(region.address), region.size);
526 }
527 break; 524 break;
528 } 525 }
529 526
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 22f373adf..1672ad775 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -12,9 +12,6 @@
12#include "core/hle/service/y2r_u.h" 12#include "core/hle/service/y2r_u.h"
13#include "core/hw/y2r.h" 13#include "core/hw/y2r.h"
14 14
15#include "video_core/renderer_base.h"
16#include "video_core/video_core.h"
17
18//////////////////////////////////////////////////////////////////////////////////////////////////// 15////////////////////////////////////////////////////////////////////////////////////////////////////
19// Namespace Y2R_U 16// Namespace Y2R_U
20 17
@@ -262,13 +259,12 @@ static void SetAlpha(Service::Interface* self) {
262static void StartConversion(Service::Interface* self) { 259static void StartConversion(Service::Interface* self) {
263 u32* cmd_buff = Kernel::GetCommandBuffer(); 260 u32* cmd_buff = Kernel::GetCommandBuffer();
264 261
265 HW::Y2R::PerformConversion(conversion);
266
267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :( 262 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
268 u32 total_output_size = conversion.input_lines * 263 u32 total_output_size = conversion.input_lines *
269 (conversion.dst.transfer_unit + conversion.dst.gap); 264 (conversion.dst.transfer_unit + conversion.dst.gap);
270 VideoCore::g_renderer->Rasterizer()->InvalidateRegion( 265 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); 266
267 HW::Y2R::PerformConversion(conversion);
272 268
273 LOG_DEBUG(Service_Y2R, "called"); 269 LOG_DEBUG(Service_Y2R, "called");
274 completion_event->Signal(); 270 completion_event->Signal();
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7e2f9cdfa..2fe856293 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -115,21 +115,39 @@ inline void Write(u32 addr, const T data) {
115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); 115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); 116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
117 117
118 if (config.fill_24bit) { 118 // TODO: Consider always accelerating and returning vector of
119 // fill with 24-bit values 119 // regions that the accelerated fill did not cover to
120 for (u8* ptr = start; ptr < end; ptr += 3) { 120 // reduce/eliminate the fill that the cpu has to do.
121 ptr[0] = config.value_24bit_r; 121 // This would also mean that the flush below is not needed.
122 ptr[1] = config.value_24bit_g; 122 // Fill should first flush all surfaces that touch but are
123 ptr[2] = config.value_24bit_b; 123 // not completely within the fill range.
124 // Then fill all completely covered surfaces, and return the
125 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
129
130 if (config.fill_24bit) {
131 // fill with 24-bit values
132 for (u8* ptr = start; ptr < end; ptr += 3) {
133 ptr[0] = config.value_24bit_r;
134 ptr[1] = config.value_24bit_g;
135 ptr[2] = config.value_24bit_b;
136 }
137 } else if (config.fill_32bit) {
138 // fill with 32-bit values
139 if (end > start) {
140 u32 value = config.value_32bit;
141 size_t len = (end - start) / sizeof(u32);
142 for (size_t i = 0; i < len; ++i)
143 memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
144 }
145 } else {
146 // fill with 16-bit values
147 u16 value_16bit = config.value_16bit.Value();
148 for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
149 memcpy(ptr, &value_16bit, sizeof(u16));
124 } 150 }
125 } else if (config.fill_32bit) {
126 // fill with 32-bit values
127 for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
128 *ptr = config.value_32bit;
129 } else {
130 // fill with 16-bit values
131 for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
132 *ptr = config.value_16bit;
133 } 151 }
134 152
135 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
@@ -139,8 +157,6 @@ inline void Write(u32 addr, const T data) {
139 } else { 157 } else {
140 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); 158 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
141 } 159 }
142
143 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
144 } 160 }
145 161
146 // Reset "trigger" flag and set the "finish" flag 162 // Reset "trigger" flag and set the "finish" flag
@@ -161,184 +177,185 @@ inline void Write(u32 addr, const T data) {
161 if (Pica::g_debug_context) 177 if (Pica::g_debug_context)
162 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
163 179
164 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
165 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
166 182 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
167 if (config.is_texture_copy) {
168 u32 input_width = config.texture_copy.input_width * 16;
169 u32 input_gap = config.texture_copy.input_gap * 16;
170 u32 output_width = config.texture_copy.output_width * 16;
171 u32 output_gap = config.texture_copy.output_gap * 16;
172
173 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
174 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
175
176 u32 remaining_size = config.texture_copy.size;
177 u32 remaining_input = input_width;
178 u32 remaining_output = output_width;
179 while (remaining_size > 0) {
180 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
181 183
182 std::memcpy(dst_pointer, src_pointer, copy_size); 184 if (config.is_texture_copy) {
183 src_pointer += copy_size; 185 u32 input_width = config.texture_copy.input_width * 16;
184 dst_pointer += copy_size; 186 u32 input_gap = config.texture_copy.input_gap * 16;
187 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16;
185 189
186 remaining_input -= copy_size; 190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
187 remaining_output -= copy_size; 191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
188 remaining_size -= copy_size;
189 192
190 if (remaining_input == 0) { 193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
191 remaining_input = input_width; 194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size);
192 src_pointer += input_gap;
193 }
194 if (remaining_output == 0) {
195 remaining_output = output_width;
196 dst_pointer += output_gap;
197 }
198 }
199 195
200 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 196 u32 remaining_size = config.texture_copy.size;
201 config.texture_copy.size, 197 u32 remaining_input = input_width;
202 config.GetPhysicalInputAddress(), input_width, input_gap, 198 u32 remaining_output = output_width;
203 config.GetPhysicalOutputAddress(), output_width, output_gap, 199 while (remaining_size > 0) {
204 config.flags); 200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
205 201
206 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
207 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); 203 src_pointer += copy_size;
204 dst_pointer += copy_size;
208 205
209 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 206 remaining_input -= copy_size;
210 break; 207 remaining_output -= copy_size;
211 } 208 remaining_size -= copy_size;
212 209
213 if (config.scaling > config.ScaleXY) { 210 if (remaining_input == 0) {
214 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 211 remaining_input = input_width;
215 UNIMPLEMENTED(); 212 src_pointer += input_gap;
216 break; 213 }
217 } 214 if (remaining_output == 0) {
215 remaining_output = output_width;
216 dst_pointer += output_gap;
217 }
218 }
218 219
219 if (config.input_linear && config.scaling != config.NoScale) { 220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
220 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 221 config.texture_copy.size,
221 UNIMPLEMENTED(); 222 config.GetPhysicalInputAddress(), input_width, input_gap,
222 break; 223 config.GetPhysicalOutputAddress(), output_width, output_gap,
223 } 224 config.flags);
224 225
225 bool horizontal_scale = config.scaling != config.NoScale; 226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
226 bool vertical_scale = config.scaling == config.ScaleXY; 227 break;
228 }
227 229
228 u32 output_width = config.output_width >> horizontal_scale; 230 if (config.scaling > config.ScaleXY) {
229 u32 output_height = config.output_height >> vertical_scale; 231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
232 UNIMPLEMENTED();
233 break;
234 }
230 235
231 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 236 if (config.input_linear && config.scaling != config.NoScale) {
232 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 237 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
238 UNIMPLEMENTED();
239 break;
240 }
233 241
234 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), input_size); 242 int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
243 int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
235 244
236 for (u32 y = 0; y < output_height; ++y) { 245 u32 output_width = config.output_width >> horizontal_scale;
237 for (u32 x = 0; x < output_width; ++x) { 246 u32 output_height = config.output_height >> vertical_scale;
238 Math::Vec4<u8> src_color;
239 247
240 // Calculate the [x,y] position of the input image 248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
241 // based on the current output position and the scale 249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
242 u32 input_x = x << horizontal_scale;
243 u32 input_y = y << vertical_scale;
244 250
245 if (config.flip_vertically) { 251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
246 // Flip the y value of the output data, 252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
247 // we do this after calculating the [x,y] position of the input image
248 // to account for the scaling options.
249 y = output_height - y - 1;
250 }
251 253
252 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); 254 for (u32 y = 0; y < output_height; ++y) {
253 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); 255 for (u32 x = 0; x < output_width; ++x) {
254 u32 src_offset; 256 Math::Vec4<u8> src_color;
255 u32 dst_offset;
256 257
257 if (config.input_linear) { 258 // Calculate the [x,y] position of the input image
258 if (!config.dont_swizzle) { 259 // based on the current output position and the scale
259 // Interpret the input as linear and the output as tiled 260 u32 input_x = x << horizontal_scale;
260 u32 coarse_y = y & ~7; 261 u32 input_y = y << vertical_scale;
261 u32 stride = output_width * dst_bytes_per_pixel;
262 262
263 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 263 if (config.flip_vertically) {
264 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 264 // Flip the y value of the output data,
265 } else { 265 // we do this after calculating the [x,y] position of the input image
266 // Both input and output are linear 266 // to account for the scaling options.
267 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 267 y = output_height - y - 1;
268 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
269 } 268 }
270 } else {
271 if (!config.dont_swizzle) {
272 // Interpret the input as tiled and the output as linear
273 u32 coarse_y = input_y & ~7;
274 u32 stride = config.input_width * src_bytes_per_pixel;
275 269
276 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 270 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
277 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 271 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
272 u32 src_offset;
273 u32 dst_offset;
274
275 if (config.input_linear) {
276 if (!config.dont_swizzle) {
277 // Interpret the input as linear and the output as tiled
278 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel;
280
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
283 } else {
284 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 }
278 } else { 288 } else {
279 // Both input and output are tiled 289 if (!config.dont_swizzle) {
280 u32 out_coarse_y = y & ~7; 290 // Interpret the input as tiled and the output as linear
281 u32 out_stride = output_width * dst_bytes_per_pixel; 291 u32 coarse_y = input_y & ~7;
282 292 u32 stride = config.input_width * src_bytes_per_pixel;
283 u32 in_coarse_y = input_y & ~7; 293
284 u32 in_stride = config.input_width * src_bytes_per_pixel; 294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
285 295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
286 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 296 } else {
287 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 297 // Both input and output are tiled
298 u32 out_coarse_y = y & ~7;
299 u32 out_stride = output_width * dst_bytes_per_pixel;
300
301 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel;
303
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
306 }
288 } 307 }
289 }
290 308
291 const u8* src_pixel = src_pointer + src_offset; 309 const u8* src_pixel = src_pointer + src_offset;
292 src_color = DecodePixel(config.input_format, src_pixel); 310 src_color = DecodePixel(config.input_format, src_pixel);
293 if (config.scaling == config.ScaleX) { 311 if (config.scaling == config.ScaleX) {
294 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
295 src_color = ((src_color + pixel) / 2).Cast<u8>(); 313 src_color = ((src_color + pixel) / 2).Cast<u8>();
296 } else if (config.scaling == config.ScaleXY) { 314 } else if (config.scaling == config.ScaleXY) {
297 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
298 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
299 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
300 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
301 } 319 }
302 320
303 u8* dst_pixel = dst_pointer + dst_offset; 321 u8* dst_pixel = dst_pointer + dst_offset;
304 switch (config.output_format) { 322 switch (config.output_format) {
305 case Regs::PixelFormat::RGBA8: 323 case Regs::PixelFormat::RGBA8:
306 Color::EncodeRGBA8(src_color, dst_pixel); 324 Color::EncodeRGBA8(src_color, dst_pixel);
307 break; 325 break;
308 326
309 case Regs::PixelFormat::RGB8: 327 case Regs::PixelFormat::RGB8:
310 Color::EncodeRGB8(src_color, dst_pixel); 328 Color::EncodeRGB8(src_color, dst_pixel);
311 break; 329 break;
312 330
313 case Regs::PixelFormat::RGB565: 331 case Regs::PixelFormat::RGB565:
314 Color::EncodeRGB565(src_color, dst_pixel); 332 Color::EncodeRGB565(src_color, dst_pixel);
315 break; 333 break;
316 334
317 case Regs::PixelFormat::RGB5A1: 335 case Regs::PixelFormat::RGB5A1:
318 Color::EncodeRGB5A1(src_color, dst_pixel); 336 Color::EncodeRGB5A1(src_color, dst_pixel);
319 break; 337 break;
320 338
321 case Regs::PixelFormat::RGBA4: 339 case Regs::PixelFormat::RGBA4:
322 Color::EncodeRGBA4(src_color, dst_pixel); 340 Color::EncodeRGBA4(src_color, dst_pixel);
323 break; 341 break;
324 342
325 default: 343 default:
326 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
327 break; 345 break;
346 }
328 } 347 }
329 } 348 }
330 }
331 349
332 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
333 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
334 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
335 config.GetPhysicalOutputAddress(), output_width, output_height, 353 config.GetPhysicalOutputAddress(), output_width, output_height,
336 config.output_format.Value(), config.flags); 354 config.output_format.Value(), config.flags);
355 }
337 356
338 g_regs.display_transfer_config.trigger = 0; 357 g_regs.display_transfer_config.trigger = 0;
339 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 358 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
340
341 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
342 } 359 }
343 break; 360 break;
344 } 361 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index a00adbf53..da4c345b4 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -78,7 +78,7 @@ struct Regs {
78 78
79 INSERT_PADDING_WORDS(0x4); 79 INSERT_PADDING_WORDS(0x4);
80 80
81 struct { 81 struct MemoryFillConfig {
82 u32 address_start; 82 u32 address_start;
83 u32 address_end; 83 u32 address_end;
84 84
@@ -165,7 +165,7 @@ struct Regs {
165 165
166 INSERT_PADDING_WORDS(0x169); 166 INSERT_PADDING_WORDS(0x169);
167 167
168 struct { 168 struct DisplayTransferConfig {
169 u32 input_address; 169 u32 input_address;
170 u32 output_address; 170 u32 output_address;
171 171
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 7de5bd15d..ee9b69f81 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -15,6 +15,9 @@
15#include "core/memory_setup.h" 15#include "core/memory_setup.h"
16#include "core/mmio.h" 16#include "core/mmio.h"
17 17
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20
18namespace Memory { 21namespace Memory {
19 22
20enum class PageType { 23enum class PageType {
@@ -22,8 +25,12 @@ enum class PageType {
22 Unmapped, 25 Unmapped,
23 /// Page is mapped to regular memory. This is the only type you can get pointers to. 26 /// Page is mapped to regular memory. This is the only type you can get pointers to.
24 Memory, 27 Memory,
28 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and invalidation
29 RasterizerCachedMemory,
25 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. 30 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
26 Special, 31 Special,
32 /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and invalidation
33 RasterizerCachedSpecial,
27}; 34};
28 35
29struct SpecialRegion { 36struct SpecialRegion {
@@ -57,6 +64,12 @@ struct PageTable {
57 * the corresponding entry in `pointers` MUST be set to null. 64 * the corresponding entry in `pointers` MUST be set to null.
58 */ 65 */
59 std::array<PageType, NUM_ENTRIES> attributes; 66 std::array<PageType, NUM_ENTRIES> attributes;
67
68 /**
69 * Indicates the number of externally cached resources touching a page that should be
70 * flushed before the memory is accessed
71 */
72 std::array<u8, NUM_ENTRIES> cached_res_count;
60}; 73};
61 74
62/// Singular page table used for the singleton process 75/// Singular page table used for the singleton process
@@ -72,8 +85,15 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
72 while (base != end) { 85 while (base != end) {
73 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base); 86 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base);
74 87
88 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be null here
89 if (current_page_table->attributes[base] == PageType::RasterizerCachedMemory ||
90 current_page_table->attributes[base] == PageType::RasterizerCachedSpecial) {
91 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(base << PAGE_BITS), PAGE_SIZE);
92 }
93
75 current_page_table->attributes[base] = type; 94 current_page_table->attributes[base] = type;
76 current_page_table->pointers[base] = memory; 95 current_page_table->pointers[base] = memory;
96 current_page_table->cached_res_count[base] = 0;
77 97
78 base += 1; 98 base += 1;
79 if (memory != nullptr) 99 if (memory != nullptr)
@@ -84,6 +104,7 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
84void InitMemoryMap() { 104void InitMemoryMap() {
85 main_page_table.pointers.fill(nullptr); 105 main_page_table.pointers.fill(nullptr);
86 main_page_table.attributes.fill(PageType::Unmapped); 106 main_page_table.attributes.fill(PageType::Unmapped);
107 main_page_table.cached_res_count.fill(0);
87} 108}
88 109
89void MapMemoryRegion(VAddr base, u32 size, u8* target) { 110void MapMemoryRegion(VAddr base, u32 size, u8* target) {
@@ -107,6 +128,28 @@ void UnmapRegion(VAddr base, u32 size) {
107} 128}
108 129
109/** 130/**
131 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
132 * using a VMA from the current process
133 */
134static u8* GetPointerFromVMA(VAddr vaddr) {
135 u8* direct_pointer = nullptr;
136
137 auto& vma = Kernel::g_current_process->vm_manager.FindVMA(vaddr)->second;
138 switch (vma.type) {
139 case Kernel::VMAType::AllocatedMemoryBlock:
140 direct_pointer = vma.backing_block->data() + vma.offset;
141 break;
142 case Kernel::VMAType::BackingMemory:
143 direct_pointer = vma.backing_memory;
144 break;
145 default:
146 UNREACHABLE();
147 }
148
149 return direct_pointer + (vaddr - vma.base);
150}
151
152/**
110 * This function should only be called for virtual addreses with attribute `PageType::Special`. 153 * This function should only be called for virtual addreses with attribute `PageType::Special`.
111 */ 154 */
112static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { 155static MMIORegionPointer GetMMIOHandler(VAddr vaddr) {
@@ -126,6 +169,7 @@ template <typename T>
126T Read(const VAddr vaddr) { 169T Read(const VAddr vaddr) {
127 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 170 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
128 if (page_pointer) { 171 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block
129 T value; 173 T value;
130 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); 174 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
131 return value; 175 return value;
@@ -139,8 +183,22 @@ T Read(const VAddr vaddr) {
139 case PageType::Memory: 183 case PageType::Memory:
140 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 184 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
141 break; 185 break;
186 case PageType::RasterizerCachedMemory:
187 {
188 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
189
190 T value;
191 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
192 return value;
193 }
142 case PageType::Special: 194 case PageType::Special:
143 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr); 195 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
196 case PageType::RasterizerCachedSpecial:
197 {
198 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
199
200 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
201 }
144 default: 202 default:
145 UNREACHABLE(); 203 UNREACHABLE();
146 } 204 }
@@ -153,6 +211,7 @@ template <typename T>
153void Write(const VAddr vaddr, const T data) { 211void Write(const VAddr vaddr, const T data) {
154 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 212 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
155 if (page_pointer) { 213 if (page_pointer) {
214 // NOTE: Avoid adding any extra logic to this fast-path block
156 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); 215 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
157 return; 216 return;
158 } 217 }
@@ -165,9 +224,23 @@ void Write(const VAddr vaddr, const T data) {
165 case PageType::Memory: 224 case PageType::Memory:
166 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 225 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
167 break; 226 break;
227 case PageType::RasterizerCachedMemory:
228 {
229 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
230
231 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
232 break;
233 }
168 case PageType::Special: 234 case PageType::Special:
169 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); 235 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
170 break; 236 break;
237 case PageType::RasterizerCachedSpecial:
238 {
239 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
240
241 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
242 break;
243 }
171 default: 244 default:
172 UNREACHABLE(); 245 UNREACHABLE();
173 } 246 }
@@ -179,6 +252,10 @@ u8* GetPointer(const VAddr vaddr) {
179 return page_pointer + (vaddr & PAGE_MASK); 252 return page_pointer + (vaddr & PAGE_MASK);
180 } 253 }
181 254
255 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
256 return GetPointerFromVMA(vaddr);
257 }
258
182 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); 259 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr);
183 return nullptr; 260 return nullptr;
184} 261}
@@ -187,6 +264,69 @@ u8* GetPhysicalPointer(PAddr address) {
187 return GetPointer(PhysicalToVirtualAddress(address)); 264 return GetPointer(PhysicalToVirtualAddress(address));
188} 265}
189 266
267void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
268 if (start == 0) {
269 return;
270 }
271
272 u32 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1;
273 PAddr paddr = start;
274
275 for (unsigned i = 0; i < num_pages; ++i) {
276 VAddr vaddr = PhysicalToVirtualAddress(paddr);
277 u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
278 ASSERT_MSG(count_delta <= UINT8_MAX - res_count, "Rasterizer resource cache counter overflow!");
279 ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
280
281 // Switch page type to cached if now cached
282 if (res_count == 0) {
283 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
284 switch (page_type) {
285 case PageType::Memory:
286 page_type = PageType::RasterizerCachedMemory;
287 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
288 break;
289 case PageType::Special:
290 page_type = PageType::RasterizerCachedSpecial;
291 break;
292 default:
293 UNREACHABLE();
294 }
295 }
296
297 res_count += count_delta;
298
299 // Switch page type to uncached if now uncached
300 if (res_count == 0) {
301 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
302 switch (page_type) {
303 case PageType::RasterizerCachedMemory:
304 page_type = PageType::Memory;
305 current_page_table->pointers[vaddr >> PAGE_BITS] = GetPointerFromVMA(vaddr & ~PAGE_MASK);
306 break;
307 case PageType::RasterizerCachedSpecial:
308 page_type = PageType::Special;
309 break;
310 default:
311 UNREACHABLE();
312 }
313 }
314 paddr += PAGE_SIZE;
315 }
316}
317
318void RasterizerFlushRegion(PAddr start, u32 size) {
319 if (VideoCore::g_renderer != nullptr) {
320 VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
321 }
322}
323
324void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
325 if (VideoCore::g_renderer != nullptr) {
326 VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
327 }
328}
329
190u8 Read8(const VAddr addr) { 330u8 Read8(const VAddr addr) {
191 return Read<u8>(addr); 331 return Read<u8>(addr);
192} 332}
diff --git a/src/core/memory.h b/src/core/memory.h
index 5af72b7a7..9caa3c3f5 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -148,4 +148,20 @@ VAddr PhysicalToVirtualAddress(PAddr addr);
148 */ 148 */
149u8* GetPhysicalPointer(PAddr address); 149u8* GetPhysicalPointer(PAddr address);
150 150
151/**
152 * Adds the supplied value to the rasterizer resource cache counter of each
153 * page touching the region.
154 */
155void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
156
157/**
158 * Flushes any externally cached rasterizer resources touching the given region.
159 */
160void RasterizerFlushRegion(PAddr start, u32 size);
161
162/**
163 * Flushes and invalidates any externally cached rasterizer resources touching the given region.
164 */
165void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
166
151} 167}
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 1aa26fbd2..eaf5c8461 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -19,7 +19,7 @@ void Apply() {
19 19
20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; 20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer;
21 VideoCore::g_shader_jit_enabled = values.use_shader_jit; 21 VideoCore::g_shader_jit_enabled = values.use_shader_jit;
22 22 VideoCore::g_scaled_resolution_enabled = values.use_scaled_resolution;
23} 23}
24 24
25} // namespace 25} // namespace
diff --git a/src/core/settings.h b/src/core/settings.h
index 4933a516d..d620d8461 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -55,6 +55,7 @@ struct Values {
55 // Renderer 55 // Renderer
56 bool use_hw_renderer; 56 bool use_hw_renderer;
57 bool use_shader_jit; 57 bool use_shader_jit;
58 bool use_scaled_resolution;
58 59
59 float bg_red; 60 float bg_red;
60 float bg_green; 61 float bg_green;
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index c3a9c9598..1f058c4e2 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -47,8 +47,8 @@ void DebugContext::OnEvent(Event event, void* data) {
47 { 47 {
48 std::unique_lock<std::mutex> lock(breakpoint_mutex); 48 std::unique_lock<std::mutex> lock(breakpoint_mutex);
49 49
50 // Commit the hardware renderer's framebuffer so it will show on debug widgets 50 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets
51 VideoCore::g_renderer->Rasterizer()->FlushFramebuffer(); 51 VideoCore::g_renderer->Rasterizer()->FlushAll();
52 52
53 // TODO: Should stop the CPU thread here once we multithread emulation. 53 // TODO: Should stop the CPU thread here once we multithread emulation.
54 54
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 4552ff81c..1810eca98 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -577,7 +577,7 @@ struct Regs {
577 } 577 }
578 } 578 }
579 579
580 struct { 580 struct FramebufferConfig {
581 INSERT_PADDING_WORDS(0x3); 581 INSERT_PADDING_WORDS(0x3);
582 582
583 union { 583 union {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 008c5827b..bf7101665 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9#include "core/hw/gpu.h"
10
11struct ScreenInfo;
12
9namespace Pica { 13namespace Pica {
10namespace Shader { 14namespace Shader {
11struct OutputVertex; 15struct OutputVertex;
@@ -18,12 +22,6 @@ class RasterizerInterface {
18public: 22public:
19 virtual ~RasterizerInterface() {} 23 virtual ~RasterizerInterface() {}
20 24
21 /// Initialize API-specific GPU objects
22 virtual void InitObjects() = 0;
23
24 /// Reset the rasterizer, such as flushing all caches and updating all state
25 virtual void Reset() = 0;
26
27 /// Queues the primitive formed by the given vertices for rendering 25 /// Queues the primitive formed by the given vertices for rendering
28 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, 26 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
29 const Pica::Shader::OutputVertex& v1, 27 const Pica::Shader::OutputVertex& v1,
@@ -32,17 +30,26 @@ public:
32 /// Draw the current batch of triangles 30 /// Draw the current batch of triangles
33 virtual void DrawTriangles() = 0; 31 virtual void DrawTriangles() = 0;
34 32
35 /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer
36 virtual void FlushFramebuffer() = 0;
37
38 /// Notify rasterizer that the specified PICA register has been changed 33 /// Notify rasterizer that the specified PICA register has been changed
39 virtual void NotifyPicaRegisterChanged(u32 id) = 0; 34 virtual void NotifyPicaRegisterChanged(u32 id) = 0;
40 35
41 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory. 36 /// Notify rasterizer that all caches should be flushed to 3DS memory
37 virtual void FlushAll() = 0;
38
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
42 virtual void FlushRegion(PAddr addr, u32 size) = 0; 40 virtual void FlushRegion(PAddr addr, u32 size) = 0;
43 41
44 /// Notify rasterizer that any caches of the specified region should be discraded and reloaded from 3DS memory. 42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated
45 virtual void InvalidateRegion(PAddr addr, u32 size) = 0; 43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44
45 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; }
47
48 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; }
50
51 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; }
46}; 53};
47 54
48} 55}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 101f84eb9..ccd497de0 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -21,7 +21,5 @@ void RendererBase::RefreshRasterizerSetting() {
21 } else { 21 } else {
22 rasterizer = std::make_unique<VideoCore::SWRasterizer>(); 22 rasterizer = std::make_unique<VideoCore::SWRasterizer>();
23 } 23 }
24 rasterizer->InitObjects();
25 rasterizer->Reset();
26 } 24 }
27} 25}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6ca9f45e2..da4121c35 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -36,10 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
36 stage.GetAlphaMultiplier() == 1); 36 stage.GetAlphaMultiplier() == 1);
37} 37}
38 38
39RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } 39RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
40RasterizerOpenGL::~RasterizerOpenGL() { }
41
42void RasterizerOpenGL::InitObjects() {
43 // Create sampler objects 40 // Create sampler objects
44 for (size_t i = 0; i < texture_samplers.size(); ++i) { 41 for (size_t i = 0; i < texture_samplers.size(); ++i) {
45 texture_samplers[i].Create(); 42 texture_samplers[i].Create();
@@ -61,6 +58,10 @@ void RasterizerOpenGL::InitObjects() {
61 58
62 uniform_block_data.dirty = true; 59 uniform_block_data.dirty = true;
63 60
61 for (unsigned index = 0; index < lighting_luts.size(); index++) {
62 uniform_block_data.lut_dirty[index] = true;
63 }
64
64 // Set vertex attributes 65 // Set vertex attributes
65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 66 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 67 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -81,70 +82,24 @@ void RasterizerOpenGL::InitObjects() {
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 82 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83 84
84 SetShader(); 85 // Create render framebuffer
85
86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
87 fb_color_texture.texture.Create();
88 ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1);
89
90 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
91 state.Apply();
92
93 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
94 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
95 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
96 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
97 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
98
99 state.texture_units[0].texture_2d = 0;
100 state.Apply();
101
102 fb_depth_texture.texture.Create();
103 ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1);
104
105 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
106 state.Apply();
107
108 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
109 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
110 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
111 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
112 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
113 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
114 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
115
116 state.texture_units[0].texture_2d = 0;
117 state.Apply();
118
119 // Configure OpenGL framebuffer
120 framebuffer.Create(); 86 framebuffer.Create();
121 87
122 state.draw.framebuffer = framebuffer.handle; 88 // Allocate and bind lighting lut textures
89 for (size_t i = 0; i < lighting_luts.size(); ++i) {
90 lighting_luts[i].Create();
91 state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
92 }
123 state.Apply(); 93 state.Apply();
124 94
125 glActiveTexture(GL_TEXTURE0); 95 for (size_t i = 0; i < lighting_luts.size(); ++i) {
126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i); 96 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); 97 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 98 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 99 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 } 100 }
140 state.Apply();
141
142 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
143 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
144 "OpenGL rasterizer framebuffer setup failed, status %X", status);
145}
146 101
147void RasterizerOpenGL::Reset() { 102 // Sync fixed function OpenGL state
148 SyncCullMode(); 103 SyncCullMode();
149 SyncDepthModifiers(); 104 SyncDepthModifiers();
150 SyncBlendEnabled(); 105 SyncBlendEnabled();
@@ -156,10 +111,10 @@ void RasterizerOpenGL::Reset() {
156 SyncColorWriteMask(); 111 SyncColorWriteMask();
157 SyncStencilWriteMask(); 112 SyncStencilWriteMask();
158 SyncDepthWriteMask(); 113 SyncDepthWriteMask();
114}
159 115
160 SetShader(); 116RasterizerOpenGL::~RasterizerOpenGL() {
161 117
162 res_cache.InvalidateAll();
163} 118}
164 119
165/** 120/**
@@ -196,47 +151,98 @@ void RasterizerOpenGL::DrawTriangles() {
196 if (vertex_batch.empty()) 151 if (vertex_batch.empty())
197 return; 152 return;
198 153
199 SyncFramebuffer(); 154 const auto& regs = Pica::g_state.regs;
200 SyncDrawState(); 155
156 // Sync and bind the framebuffer surfaces
157 CachedSurface* color_surface;
158 CachedSurface* depth_surface;
159 MathUtil::Rectangle<int> rect;
160 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer);
161
162 state.draw.draw_framebuffer = framebuffer.handle;
163 state.Apply();
164
165 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0);
166 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
167 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
168 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
169
170 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
171 return;
172 }
173
174 // Sync the viewport
175 // These registers hold half-width and half-height, so must be multiplied by 2
176 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
177 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
178
179 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
180 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
181 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
182
183 // Sync and bind the texture surfaces
184 const auto pica_textures = regs.GetTextures();
185 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
186 const auto& texture = pica_textures[texture_index];
187
188 if (texture.enabled) {
189 texture_samplers[texture_index].SyncWithConfig(texture.config);
190 CachedSurface* surface = res_cache.GetTextureSurface(texture);
191 if (surface != nullptr) {
192 state.texture_units[texture_index].texture_2d = surface->texture.handle;
193 } else {
194 // Can occur when texture addr is null or its memory is unmapped/invalid
195 state.texture_units[texture_index].texture_2d = 0;
196 }
197 } else {
198 state.texture_units[texture_index].texture_2d = 0;
199 }
200 }
201 201
202 if (state.draw.shader_dirty) { 202 // Sync and bind the shader
203 if (shader_dirty) {
203 SetShader(); 204 SetShader();
204 state.draw.shader_dirty = false; 205 shader_dirty = false;
205 } 206 }
206 207
207 for (unsigned index = 0; index < lighting_lut.size(); index++) { 208 // Sync the lighting luts
209 for (unsigned index = 0; index < lighting_luts.size(); index++) {
208 if (uniform_block_data.lut_dirty[index]) { 210 if (uniform_block_data.lut_dirty[index]) {
209 SyncLightingLUT(index); 211 SyncLightingLUT(index);
210 uniform_block_data.lut_dirty[index] = false; 212 uniform_block_data.lut_dirty[index] = false;
211 } 213 }
212 } 214 }
213 215
216 // Sync the uniform data
214 if (uniform_block_data.dirty) { 217 if (uniform_block_data.dirty) {
215 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 218 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
216 uniform_block_data.dirty = false; 219 uniform_block_data.dirty = false;
217 } 220 }
218 221
222 state.Apply();
223
224 // Draw the vertex batch
219 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 225 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
220 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 226 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
221 227
222 vertex_batch.clear(); 228 // Mark framebuffer surfaces as dirty
223 229 // TODO: Restrict invalidation area to the viewport
224 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture 230 if (color_surface != nullptr) {
225 const auto& regs = Pica::g_state.regs; 231 color_surface->dirty = true;
226 232 res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
227 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 233 }
228 * fb_color_texture.width * fb_color_texture.height; 234 if (depth_surface != nullptr) {
229 235 depth_surface->dirty = true;
230 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 236 res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
231 * fb_depth_texture.width * fb_depth_texture.height; 237 }
232 238
233 res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); 239 vertex_batch.clear();
234 res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
235}
236 240
237void RasterizerOpenGL::FlushFramebuffer() { 241 // Unbind textures for potential future use as framebuffer attachments
238 CommitColorBuffer(); 242 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
239 CommitDepthBuffer(); 243 state.texture_units[texture_index].texture_2d = 0;
244 }
245 state.Apply();
240} 246}
241 247
242void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 248void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@@ -268,7 +274,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
268 // Alpha test 274 // Alpha test
269 case PICA_REG_INDEX(output_merger.alpha_test): 275 case PICA_REG_INDEX(output_merger.alpha_test):
270 SyncAlphaTest(); 276 SyncAlphaTest();
271 state.draw.shader_dirty = true; 277 shader_dirty = true;
272 break; 278 break;
273 279
274 // Sync GL stencil test + stencil write mask 280 // Sync GL stencil test + stencil write mask
@@ -334,7 +340,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
334 case PICA_REG_INDEX(tev_stage5.color_op): 340 case PICA_REG_INDEX(tev_stage5.color_op):
335 case PICA_REG_INDEX(tev_stage5.color_scale): 341 case PICA_REG_INDEX(tev_stage5.color_scale):
336 case PICA_REG_INDEX(tev_combiner_buffer_input): 342 case PICA_REG_INDEX(tev_combiner_buffer_input):
337 state.draw.shader_dirty = true; 343 shader_dirty = true;
338 break; 344 break;
339 case PICA_REG_INDEX(tev_stage0.const_r): 345 case PICA_REG_INDEX(tev_stage0.const_r):
340 SyncTevConstColor(0, regs.tev_stage0); 346 SyncTevConstColor(0, regs.tev_stage0);
@@ -521,41 +527,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
521 } 527 }
522} 528}
523 529
530void RasterizerOpenGL::FlushAll() {
531 res_cache.FlushAll();
532}
533
524void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { 534void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
525 const auto& regs = Pica::g_state.regs; 535 res_cache.FlushRegion(addr, size, nullptr, false);
536}
526 537
527 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 538void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
528 * fb_color_texture.width * fb_color_texture.height; 539 res_cache.FlushRegion(addr, size, nullptr, true);
540}
529 541
530 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 542bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
531 * fb_depth_texture.width * fb_depth_texture.height; 543 using PixelFormat = CachedSurface::PixelFormat;
544 using SurfaceType = CachedSurface::SurfaceType;
532 545
533 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens 546 if (config.is_texture_copy) {
534 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 547 // TODO(tfarley): Try to hardware accelerate this
535 CommitColorBuffer(); 548 return false;
549 }
536 550
537 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 551 CachedSurface src_params;
538 CommitDepthBuffer(); 552 src_params.addr = config.GetPhysicalInputAddress();
553 src_params.width = config.output_width;
554 src_params.height = config.output_height;
555 src_params.is_tiled = !config.input_linear;
556 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
557
558 CachedSurface dst_params;
559 dst_params.addr = config.GetPhysicalOutputAddress();
560 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
561 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
562 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
563 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
564
565 MathUtil::Rectangle<int> src_rect;
566 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
567
568 if (src_surface == nullptr) {
569 return false;
570 }
571
572 // Require destination surface to have same resolution scale as source to preserve scaling
573 dst_params.res_scale_width = src_surface->res_scale_width;
574 dst_params.res_scale_height = src_surface->res_scale_height;
575
576 MathUtil::Rectangle<int> dst_rect;
577 CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
578
579 if (dst_surface == nullptr) {
580 return false;
581 }
582
583 // Don't accelerate if the src and dst surfaces are the same
584 if (src_surface == dst_surface) {
585 return false;
586 }
587
588 if (config.flip_vertically) {
589 std::swap(dst_rect.top, dst_rect.bottom);
590 }
591
592 if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
593 return false;
594 }
595
596 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
597 dst_surface->dirty = true;
598 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
599 return true;
539} 600}
540 601
541void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { 602bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
542 const auto& regs = Pica::g_state.regs; 603 using PixelFormat = CachedSurface::PixelFormat;
604 using SurfaceType = CachedSurface::SurfaceType;
605
606 CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
607
608 if (dst_surface == nullptr) {
609 return false;
610 }
611
612 OpenGLState cur_state = OpenGLState::GetCurState();
613
614 SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
543 615
544 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 616 GLuint old_fb = cur_state.draw.draw_framebuffer;
545 * fb_color_texture.width * fb_color_texture.height; 617 cur_state.draw.draw_framebuffer = framebuffer.handle;
618 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected
619 cur_state.Apply();
546 620
547 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 621 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
548 * fb_depth_texture.width * fb_depth_texture.height; 622 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
623 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
549 624
550 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL 625 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
551 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 626 return false;
552 ReloadColorBuffer(); 627 }
628
629 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
630
631 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases
632 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/...
633 // Currently only handles formats that are multiples of the fill value size
634
635 if (config.fill_24bit) {
636 switch (dst_surface->pixel_format) {
637 case PixelFormat::RGB8:
638 color_values[0] = config.value_24bit_r / 255.0f;
639 color_values[1] = config.value_24bit_g / 255.0f;
640 color_values[2] = config.value_24bit_b / 255.0f;
641 break;
642 default:
643 return false;
644 }
645 } else if (config.fill_32bit) {
646 u32 value = config.value_32bit;
647
648 switch (dst_surface->pixel_format) {
649 case PixelFormat::RGBA8:
650 color_values[0] = (value >> 24) / 255.0f;
651 color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
652 color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
653 color_values[3] = (value & 0xFF) / 255.0f;
654 break;
655 default:
656 return false;
657 }
658 } else {
659 u16 value_16bit = config.value_16bit.Value();
660 Math::Vec4<u8> color;
661
662 switch (dst_surface->pixel_format) {
663 case PixelFormat::RGBA8:
664 color_values[0] = (value_16bit >> 8) / 255.0f;
665 color_values[1] = (value_16bit & 0xFF) / 255.0f;
666 color_values[2] = color_values[0];
667 color_values[3] = color_values[1];
668 break;
669 case PixelFormat::RGB5A1:
670 color = Color::DecodeRGB5A1((const u8*)&value_16bit);
671 color_values[0] = color[0] / 31.0f;
672 color_values[1] = color[1] / 31.0f;
673 color_values[2] = color[2] / 31.0f;
674 color_values[3] = color[3];
675 break;
676 case PixelFormat::RGB565:
677 color = Color::DecodeRGB565((const u8*)&value_16bit);
678 color_values[0] = color[0] / 31.0f;
679 color_values[1] = color[1] / 63.0f;
680 color_values[2] = color[2] / 31.0f;
681 break;
682 case PixelFormat::RGBA4:
683 color = Color::DecodeRGBA4((const u8*)&value_16bit);
684 color_values[0] = color[0] / 15.0f;
685 color_values[1] = color[1] / 15.0f;
686 color_values[2] = color[2] / 15.0f;
687 color_values[3] = color[3] / 15.0f;
688 break;
689 case PixelFormat::IA8:
690 case PixelFormat::RG8:
691 color_values[0] = (value_16bit >> 8) / 255.0f;
692 color_values[1] = (value_16bit & 0xFF) / 255.0f;
693 break;
694 default:
695 return false;
696 }
697 }
698
699 cur_state.color_mask.red_enabled = true;
700 cur_state.color_mask.green_enabled = true;
701 cur_state.color_mask.blue_enabled = true;
702 cur_state.color_mask.alpha_enabled = true;
703 cur_state.Apply();
704 glClearBufferfv(GL_COLOR, 0, color_values);
705 } else if (dst_type == SurfaceType::Depth) {
706 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
707 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
708 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
709
710 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
711 return false;
712 }
713
714 GLfloat value_float;
715 if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
716 value_float = config.value_32bit / 65535.0f; // 2^16 - 1
717 } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
718 value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
719 }
720
721 cur_state.depth.write_mask = true;
722 cur_state.Apply();
723 glClearBufferfv(GL_DEPTH, 0, &value_float);
724 } else if (dst_type == SurfaceType::DepthStencil) {
725 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
726 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
727
728 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
729 return false;
730 }
731
732 GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
733 GLint value_int = (config.value_32bit >> 24);
734
735 cur_state.depth.write_mask = true;
736 cur_state.stencil.write_mask = true;
737 cur_state.Apply();
738 glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
739 }
553 740
554 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 741 cur_state.draw.draw_framebuffer = old_fb;
555 ReloadDepthBuffer(); 742 // TODO: Return scissor test to previous value when scissor test is implemented
743 cur_state.Apply();
556 744
557 // Notify cache of flush in case the region touches a cached resource 745 dst_surface->dirty = true;
558 res_cache.InvalidateInRange(addr, size); 746 res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
747 return true;
748}
749
750bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) {
751 if (framebuffer_addr == 0) {
752 return false;
753 }
754
755 CachedSurface src_params;
756 src_params.addr = framebuffer_addr;
757 src_params.width = config.width;
758 src_params.height = config.height;
759 src_params.stride = pixel_stride;
760 src_params.is_tiled = false;
761 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
762
763 MathUtil::Rectangle<int> src_rect;
764 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
765
766 if (src_surface == nullptr) {
767 return false;
768 }
769
770 u32 scaled_width = src_surface->GetScaledWidth();
771 u32 scaled_height = src_surface->GetScaledHeight();
772
773 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height,
774 (float)src_rect.left / (float)scaled_width,
775 (float)src_rect.bottom / (float)scaled_height,
776 (float)src_rect.right / (float)scaled_width);
777
778 screen_info.display_texture = src_surface->texture.handle;
779
780 return true;
559} 781}
560 782
561void RasterizerOpenGL::SamplerInfo::Create() { 783void RasterizerOpenGL::SamplerInfo::Create() {
@@ -597,108 +819,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf
597 } 819 }
598} 820}
599 821
600void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) {
601 GLint internal_format;
602
603 texture.format = format;
604 texture.width = width;
605 texture.height = height;
606
607 switch (format) {
608 case Pica::Regs::ColorFormat::RGBA8:
609 internal_format = GL_RGBA;
610 texture.gl_format = GL_RGBA;
611 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
612 break;
613
614 case Pica::Regs::ColorFormat::RGB8:
615 // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
616 // specific OpenGL type used in this function using native-endian (that is, little-endian
617 // mostly everywhere) for words or half-words.
618 // TODO: check how those behave on big-endian processors.
619 internal_format = GL_RGB;
620 texture.gl_format = GL_BGR;
621 texture.gl_type = GL_UNSIGNED_BYTE;
622 break;
623
624 case Pica::Regs::ColorFormat::RGB5A1:
625 internal_format = GL_RGBA;
626 texture.gl_format = GL_RGBA;
627 texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
628 break;
629
630 case Pica::Regs::ColorFormat::RGB565:
631 internal_format = GL_RGB;
632 texture.gl_format = GL_RGB;
633 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
634 break;
635
636 case Pica::Regs::ColorFormat::RGBA4:
637 internal_format = GL_RGBA;
638 texture.gl_format = GL_RGBA;
639 texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
640 break;
641
642 default:
643 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format);
644 UNIMPLEMENTED();
645 break;
646 }
647
648 state.texture_units[0].texture_2d = texture.texture.handle;
649 state.Apply();
650
651 glActiveTexture(GL_TEXTURE0);
652 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
653 texture.gl_format, texture.gl_type, nullptr);
654
655 state.texture_units[0].texture_2d = 0;
656 state.Apply();
657}
658
659void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) {
660 GLint internal_format;
661
662 texture.format = format;
663 texture.width = width;
664 texture.height = height;
665
666 switch (format) {
667 case Pica::Regs::DepthFormat::D16:
668 internal_format = GL_DEPTH_COMPONENT16;
669 texture.gl_format = GL_DEPTH_COMPONENT;
670 texture.gl_type = GL_UNSIGNED_SHORT;
671 break;
672
673 case Pica::Regs::DepthFormat::D24:
674 internal_format = GL_DEPTH_COMPONENT24;
675 texture.gl_format = GL_DEPTH_COMPONENT;
676 texture.gl_type = GL_UNSIGNED_INT;
677 break;
678
679 case Pica::Regs::DepthFormat::D24S8:
680 internal_format = GL_DEPTH24_STENCIL8;
681 texture.gl_format = GL_DEPTH_STENCIL;
682 texture.gl_type = GL_UNSIGNED_INT_24_8;
683 break;
684
685 default:
686 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format);
687 UNIMPLEMENTED();
688 break;
689 }
690
691 state.texture_units[0].texture_2d = texture.texture.handle;
692 state.Apply();
693
694 glActiveTexture(GL_TEXTURE0);
695 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
696 texture.gl_format, texture.gl_type, nullptr);
697
698 state.texture_units[0].texture_2d = 0;
699 state.Apply();
700}
701
702void RasterizerOpenGL::SetShader() { 822void RasterizerOpenGL::SetShader() {
703 PicaShaderConfig config = PicaShaderConfig::CurrentConfig(); 823 PicaShaderConfig config = PicaShaderConfig::CurrentConfig();
704 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>(); 824 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
@@ -761,83 +881,6 @@ void RasterizerOpenGL::SetShader() {
761 } 881 }
762} 882}
763 883
764void RasterizerOpenGL::SyncFramebuffer() {
765 const auto& regs = Pica::g_state.regs;
766
767 PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
768 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
769
770 PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
771 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
772
773 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
774 fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight());
775
776 bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
777 fb_size_changed;
778
779 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
780 fb_size_changed;
781
782 bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
783 color_fb_prop_changed;
784
785 bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
786 depth_fb_prop_changed;
787
788 // Commit if framebuffer modified in any way
789 if (color_fb_modified)
790 CommitColorBuffer();
791
792 if (depth_fb_modified)
793 CommitDepthBuffer();
794
795 // Reconfigure framebuffer textures if any property has changed
796 if (color_fb_prop_changed) {
797 ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
798 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
799 }
800
801 if (depth_fb_prop_changed) {
802 ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
803 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
804
805 // Only attach depth buffer as stencil if it supports stencil
806 switch (new_fb_depth_format) {
807 case Pica::Regs::DepthFormat::D16:
808 case Pica::Regs::DepthFormat::D24:
809 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
810 break;
811
812 case Pica::Regs::DepthFormat::D24S8:
813 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
814 break;
815
816 default:
817 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format);
818 UNIMPLEMENTED();
819 break;
820 }
821 }
822
823 // Load buffer data again if fb modified in any way
824 if (color_fb_modified) {
825 cached_fb_color_addr = new_fb_color_addr;
826
827 ReloadColorBuffer();
828 }
829
830 if (depth_fb_modified) {
831 cached_fb_depth_addr = new_fb_depth_addr;
832
833 ReloadDepthBuffer();
834 }
835
836 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
837 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
838 "OpenGL rasterizer framebuffer setup failed, status %X", status);
839}
840
841void RasterizerOpenGL::SyncCullMode() { 884void RasterizerOpenGL::SyncCullMode() {
842 const auto& regs = Pica::g_state.regs; 885 const auto& regs = Pica::g_state.regs;
843 886
@@ -1034,229 +1077,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1034 uniform_block_data.dirty = true; 1077 uniform_block_data.dirty = true;
1035 } 1078 }
1036} 1079}
1037
1038void RasterizerOpenGL::SyncDrawState() {
1039 const auto& regs = Pica::g_state.regs;
1040
1041 // Sync the viewport
1042 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
1043 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
1044
1045 // OpenGL uses different y coordinates, so negate corner offset and flip origin
1046 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
1047 // TODO: Use floating-point viewports for accuracy if supported
1048 glViewport((GLsizei)regs.viewport_corner.x,
1049 (GLsizei)regs.viewport_corner.y,
1050 viewport_width, viewport_height);
1051
1052 // Sync bound texture(s), upload if not cached
1053 const auto pica_textures = regs.GetTextures();
1054 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
1055 const auto& texture = pica_textures[texture_index];
1056
1057 if (texture.enabled) {
1058 texture_samplers[texture_index].SyncWithConfig(texture.config);
1059 res_cache.LoadAndBindTexture(state, texture_index, texture);
1060 } else {
1061 state.texture_units[texture_index].texture_2d = 0;
1062 }
1063 }
1064
1065 state.draw.uniform_buffer = uniform_buffer.handle;
1066 state.Apply();
1067}
1068
1069MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
1070
1071void RasterizerOpenGL::ReloadColorBuffer() {
1072 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1073
1074 if (color_buffer == nullptr)
1075 return;
1076
1077 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1078
1079 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1080
1081 std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1082
1083 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1084 for (int y = 0; y < fb_color_texture.height; ++y) {
1085 for (int x = 0; x < fb_color_texture.width; ++x) {
1086 const u32 coarse_y = y & ~7;
1087 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1088 u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel;
1089
1090 u8* pixel = color_buffer + dst_offset;
1091 memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel);
1092 }
1093 }
1094
1095 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1096 state.Apply();
1097
1098 glActiveTexture(GL_TEXTURE0);
1099 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height,
1100 fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get());
1101
1102 state.texture_units[0].texture_2d = 0;
1103 state.Apply();
1104}
1105
1106void RasterizerOpenGL::ReloadDepthBuffer() {
1107 if (cached_fb_depth_addr == 0)
1108 return;
1109
1110 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
1111 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1112
1113 if (depth_buffer == nullptr)
1114 return;
1115
1116 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1117
1118 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1119
1120 // OpenGL needs 4 bpp alignment for D24
1121 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1122
1123 std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1124
1125 u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get();
1126
1127 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1128 for (int y = 0; y < fb_depth_texture.height; ++y) {
1129 for (int x = 0; x < fb_depth_texture.width; ++x) {
1130 const u32 coarse_y = y & ~7;
1131 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1132 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1133
1134 u8* pixel = depth_buffer + dst_offset;
1135 u32 depth_stencil = *(u32*)pixel;
1136 ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24);
1137 }
1138 }
1139 } else {
1140 for (int y = 0; y < fb_depth_texture.height; ++y) {
1141 for (int x = 0; x < fb_depth_texture.width; ++x) {
1142 const u32 coarse_y = y & ~7;
1143 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1144 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1145
1146 u8* pixel = depth_buffer + dst_offset;
1147 memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel);
1148 }
1149 }
1150 }
1151
1152 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1153 state.Apply();
1154
1155 glActiveTexture(GL_TEXTURE0);
1156 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1157 // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
1158 // The bug has been reported to Intel (https://communities.intel.com/message/324464)
1159 glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
1160 GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
1161 } else {
1162 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
1163 fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
1164 }
1165
1166 state.texture_units[0].texture_2d = 0;
1167 state.Apply();
1168}
1169
1170Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
1171MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
1172
1173void RasterizerOpenGL::CommitColorBuffer() {
1174 if (cached_fb_color_addr != 0) {
1175 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1176
1177 if (color_buffer != nullptr) {
1178 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1179 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1180
1181 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1182
1183 std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1184
1185 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1186 state.Apply();
1187
1188 glActiveTexture(GL_TEXTURE0);
1189 glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get());
1190
1191 state.texture_units[0].texture_2d = 0;
1192 state.Apply();
1193
1194 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1195 for (int y = 0; y < fb_color_texture.height; ++y) {
1196 for (int x = 0; x < fb_color_texture.width; ++x) {
1197 const u32 coarse_y = y & ~7;
1198 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1199 u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel;
1200
1201 u8* pixel = color_buffer + dst_offset;
1202 memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel);
1203 }
1204 }
1205 }
1206 }
1207}
1208
1209void RasterizerOpenGL::CommitDepthBuffer() {
1210 if (cached_fb_depth_addr != 0) {
1211 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
1212 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1213
1214 if (depth_buffer != nullptr) {
1215 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1216 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1217
1218 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1219
1220 // OpenGL needs 4 bpp alignment for D24
1221 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1222
1223 std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1224
1225 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1226 state.Apply();
1227
1228 glActiveTexture(GL_TEXTURE0);
1229 glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get());
1230
1231 state.texture_units[0].texture_2d = 0;
1232 state.Apply();
1233
1234 u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get();
1235
1236 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1237 for (int y = 0; y < fb_depth_texture.height; ++y) {
1238 for (int x = 0; x < fb_depth_texture.width; ++x) {
1239 const u32 coarse_y = y & ~7;
1240 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1241 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1242
1243 u8* pixel = depth_buffer + dst_offset;
1244 u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index];
1245 *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24);
1246 }
1247 }
1248 } else {
1249 for (int y = 0; y < fb_depth_texture.height; ++y) {
1250 for (int x = 0; x < fb_depth_texture.width; ++x) {
1251 const u32 coarse_y = y & ~7;
1252 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1253 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1254
1255 u8* pixel = depth_buffer + dst_offset;
1256 memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel);
1257 }
1258 }
1259 }
1260 }
1261 }
1262}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 390349a0c..5aa638985 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,6 +19,7 @@
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 19#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 20#include "video_core/renderer_opengl/gl_state.h"
21#include "video_core/renderer_opengl/pica_to_gl.h" 21#include "video_core/renderer_opengl/pica_to_gl.h"
22#include "video_core/renderer_opengl/renderer_opengl.h"
22#include "video_core/shader/shader_interpreter.h" 23#include "video_core/shader/shader_interpreter.h"
23 24
24/** 25/**
@@ -191,16 +192,17 @@ public:
191 RasterizerOpenGL(); 192 RasterizerOpenGL();
192 ~RasterizerOpenGL() override; 193 ~RasterizerOpenGL() override;
193 194
194 void InitObjects() override;
195 void Reset() override;
196 void AddTriangle(const Pica::Shader::OutputVertex& v0, 195 void AddTriangle(const Pica::Shader::OutputVertex& v0,
197 const Pica::Shader::OutputVertex& v1, 196 const Pica::Shader::OutputVertex& v1,
198 const Pica::Shader::OutputVertex& v2) override; 197 const Pica::Shader::OutputVertex& v2) override;
199 void DrawTriangles() override; 198 void DrawTriangles() override;
200 void FlushFramebuffer() override;
201 void NotifyPicaRegisterChanged(u32 id) override; 199 void NotifyPicaRegisterChanged(u32 id) override;
200 void FlushAll() override;
202 void FlushRegion(PAddr addr, u32 size) override; 201 void FlushRegion(PAddr addr, u32 size) override;
203 void InvalidateRegion(PAddr addr, u32 size) override; 202 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
203 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
204 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
205 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override;
204 206
205 /// OpenGL shader generated for a given Pica register state 207 /// OpenGL shader generated for a given Pica register state
206 struct PicaShader { 208 struct PicaShader {
@@ -210,26 +212,6 @@ public:
210 212
211private: 213private:
212 214
213 /// Structure used for storing information about color textures
214 struct TextureInfo {
215 OGLTexture texture;
216 GLsizei width;
217 GLsizei height;
218 Pica::Regs::ColorFormat format;
219 GLenum gl_format;
220 GLenum gl_type;
221 };
222
223 /// Structure used for storing information about depth textures
224 struct DepthTextureInfo {
225 OGLTexture texture;
226 GLsizei width;
227 GLsizei height;
228 Pica::Regs::DepthFormat format;
229 GLenum gl_format;
230 GLenum gl_type;
231 };
232
233 struct SamplerInfo { 215 struct SamplerInfo {
234 using TextureConfig = Pica::Regs::TextureConfig; 216 using TextureConfig = Pica::Regs::TextureConfig;
235 217
@@ -311,18 +293,9 @@ private:
311 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); 293 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
312 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 294 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
313 295
314 /// Reconfigure the OpenGL color texture to use the given format and dimensions
315 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
316
317 /// Reconfigure the OpenGL depth texture to use the given format and dimensions
318 void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height);
319
320 /// Sets the OpenGL shader in accordance with the current PICA register state 296 /// Sets the OpenGL shader in accordance with the current PICA register state
321 void SetShader(); 297 void SetShader();
322 298
323 /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer
324 void SyncFramebuffer();
325
326 /// Syncs the cull mode to match the PICA register 299 /// Syncs the cull mode to match the PICA register
327 void SyncCullMode(); 300 void SyncCullMode();
328 301
@@ -386,45 +359,15 @@ private:
386 /// Syncs the specified light's specular 1 color to match the PICA register 359 /// Syncs the specified light's specular 1 color to match the PICA register
387 void SyncLightSpecular1(int light_index); 360 void SyncLightSpecular1(int light_index);
388 361
389 /// Syncs the remaining OpenGL drawing state to match the current PICA state 362 OpenGLState state;
390 void SyncDrawState();
391
392 /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture
393 void ReloadColorBuffer();
394
395 /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture
396 void ReloadDepthBuffer();
397
398 /**
399 * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory
400 * Loads the OpenGL framebuffer textures into temporary buffers
401 * Then copies into the 3DS framebuffer using proper Morton order
402 */
403 void CommitColorBuffer();
404
405 /**
406 * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory
407 * Loads the OpenGL framebuffer textures into temporary buffers
408 * Then copies into the 3DS framebuffer using proper Morton order
409 */
410 void CommitDepthBuffer();
411 363
412 RasterizerCacheOpenGL res_cache; 364 RasterizerCacheOpenGL res_cache;
413 365
414 std::vector<HardwareVertex> vertex_batch; 366 std::vector<HardwareVertex> vertex_batch;
415 367
416 OpenGLState state;
417
418 PAddr cached_fb_color_addr;
419 PAddr cached_fb_depth_addr;
420
421 // Hardware rasterizer
422 std::array<SamplerInfo, 3> texture_samplers;
423 TextureInfo fb_color_texture;
424 DepthTextureInfo fb_depth_texture;
425
426 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache; 368 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
427 const PicaShader* current_shader = nullptr; 369 const PicaShader* current_shader = nullptr;
370 bool shader_dirty;
428 371
429 struct { 372 struct {
430 UniformData data; 373 UniformData data;
@@ -432,11 +375,12 @@ private:
432 bool dirty; 375 bool dirty;
433 } uniform_block_data; 376 } uniform_block_data;
434 377
378 std::array<SamplerInfo, 3> texture_samplers;
435 OGLVertexArray vertex_array; 379 OGLVertexArray vertex_array;
436 OGLBuffer vertex_buffer; 380 OGLBuffer vertex_buffer;
437 OGLBuffer uniform_buffer; 381 OGLBuffer uniform_buffer;
438 OGLFramebuffer framebuffer; 382 OGLFramebuffer framebuffer;
439 383
440 std::array<OGLTexture, 6> lighting_lut; 384 std::array<OGLTexture, 6> lighting_luts;
441 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 385 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
442}; 386};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1323c12e4..55c2fb283 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,8 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <unordered_set>
6 6
7#include "common/emu_window.h"
7#include "common/hash.h" 8#include "common/hash.h"
8#include "common/math_util.h" 9#include "common/math_util.h"
9#include "common/microprofile.h" 10#include "common/microprofile.h"
@@ -12,71 +13,693 @@
12#include "core/memory.h" 13#include "core/memory.h"
13 14
14#include "video_core/debug_utils/debug_utils.h" 15#include "video_core/debug_utils/debug_utils.h"
16#include "video_core/pica_state.h"
15#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 17#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
16#include "video_core/renderer_opengl/pica_to_gl.h" 18#include "video_core/renderer_opengl/pica_to_gl.h"
19#include "video_core/utils.h"
20#include "video_core/video_core.h"
21
22struct FormatTuple {
23 GLint internal_format;
24 GLenum format;
25 GLenum type;
26};
27
28static const std::array<FormatTuple, 5> fb_format_tuples = {{
29 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8
30 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8
31 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1
32 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565
33 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4
34}};
35
36static const std::array<FormatTuple, 4> depth_format_tuples = {{
37 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16
38 {},
39 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24
40 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8
41}};
42
43RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
44 transfer_framebuffers[0].Create();
45 transfer_framebuffers[1].Create();
46}
17 47
18RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { 48RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
19 InvalidateAll(); 49 FlushAll();
20} 50}
21 51
22MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); 52static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) {
53 using PixelFormat = CachedSurface::PixelFormat;
54
55 u8* data_ptrs[2];
56 u32 depth_stencil_shifts[2] = {24, 8};
23 57
24void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) { 58 if (morton_to_gl) {
25 const auto cached_texture = texture_cache.find(info.physical_address); 59 std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
60 }
61
62 if (pixel_format == PixelFormat::D24S8) {
63 for (unsigned y = 0; y < height; ++y) {
64 for (unsigned x = 0; x < width; ++x) {
65 const u32 coarse_y = y & ~7;
66 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
67 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
68
69 data_ptrs[morton_to_gl] = morton_data + morton_offset;
70 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
26 71
27 if (cached_texture != texture_cache.end()) { 72 // Swap depth and stencil value ordering since 3DS does not match OpenGL
28 state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; 73 u32 depth_stencil;
29 state.Apply(); 74 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
75 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]);
76
77 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
78 }
79 }
30 } else { 80 } else {
31 MICROPROFILE_SCOPE(OpenGL_TextureUpload); 81 for (unsigned y = 0; y < height; ++y) {
82 for (unsigned x = 0; x < width; ++x) {
83 const u32 coarse_y = y & ~7;
84 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
85 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
86
87 data_ptrs[morton_to_gl] = morton_data + morton_offset;
88 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
89
90 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
91 }
92 }
93 }
94}
95
96bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) {
97 using SurfaceType = CachedSurface::SurfaceType;
98
99 OpenGLState cur_state = OpenGLState::GetCurState();
100
101 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components
102 OpenGLState::ResetTexture(src_tex);
103 OpenGLState::ResetTexture(dst_tex);
104
105 // Keep track of previous framebuffer bindings
106 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer };
107 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
108 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
109 cur_state.Apply();
110
111 u32 buffers = 0;
112
113 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
114 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0);
115 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
116
117 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
118 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
119
120 buffers = GL_COLOR_BUFFER_BIT;
121 } else if (type == SurfaceType::Depth) {
122 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
125
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
128 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
129
130 buffers = GL_DEPTH_BUFFER_BIT;
131 } else if (type == SurfaceType::DepthStencil) {
132 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
133 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
134
135 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
136 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
137
138 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
139 }
140
141 if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
142 return false;
143 }
144
145 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
146 return false;
147 }
148
149 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
150 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom,
151 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
152
153 // Restore previous framebuffer bindings
154 cur_state.draw.read_framebuffer = old_fbs[0];
155 cur_state.draw.draw_framebuffer = old_fbs[1];
156 cur_state.Apply();
157
158 return true;
159}
160
161bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) {
162 using SurfaceType = CachedSurface::SurfaceType;
163
164 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
165 return false;
166 }
167
168 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
169}
170
171static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) {
172 // Allocate an uninitialized texture of appropriate size and format for the surface
173 using SurfaceType = CachedSurface::SurfaceType;
174
175 OpenGLState cur_state = OpenGLState::GetCurState();
176
177 // Keep track of previous texture bindings
178 GLuint old_tex = cur_state.texture_units[0].texture_2d;
179 cur_state.texture_units[0].texture_2d = texture;
180 cur_state.Apply();
181 glActiveTexture(GL_TEXTURE0);
182
183 SurfaceType type = CachedSurface::GetFormatType(pixel_format);
184
185 FormatTuple tuple;
186 if (type == SurfaceType::Color) {
187 ASSERT((size_t)pixel_format < fb_format_tuples.size());
188 tuple = fb_format_tuples[(unsigned int)pixel_format];
189 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
190 size_t tuple_idx = (size_t)pixel_format - 14;
191 ASSERT(tuple_idx < depth_format_tuples.size());
192 tuple = depth_format_tuples[tuple_idx];
193 } else {
194 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
195 }
196
197 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0,
198 tuple.format, tuple.type, nullptr);
199
200 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
201 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
202 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
203 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
204
205 // Restore previous texture bindings
206 cur_state.texture_units[0].texture_2d = old_tex;
207 cur_state.Apply();
208}
209
210MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
211CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) {
212 using PixelFormat = CachedSurface::PixelFormat;
213 using SurfaceType = CachedSurface::SurfaceType;
214
215 if (params.addr == 0) {
216 return nullptr;
217 }
218
219 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
220
221 // Check for an exact match in existing surfaces
222 CachedSurface* best_exact_surface = nullptr;
223 float exact_surface_goodness = -1.f;
224
225 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
226 auto range = surface_cache.equal_range(surface_interval);
227 for (auto it = range.first; it != range.second; ++it) {
228 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
229 CachedSurface* surface = it2->get();
230
231 // Check if the request matches the surface exactly
232 if (params.addr == surface->addr &&
233 params.width == surface->width && params.height == surface->height &&
234 params.pixel_format == surface->pixel_format)
235 {
236 // Make sure optional param-matching criteria are fulfilled
237 bool tiling_match = (params.is_tiled == surface->is_tiled);
238 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
239 if (!match_res_scale || res_scale_match) {
240 // Prioritize same-tiling and highest resolution surfaces
241 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
242 if (match_goodness > exact_surface_goodness || surface->dirty) {
243 exact_surface_goodness = match_goodness;
244 best_exact_surface = surface;
245 }
246 }
247 }
248 }
249 }
250
251 // Return the best exact surface if found
252 if (best_exact_surface != nullptr) {
253 return best_exact_surface;
254 }
255
256 // No matching surfaces found, so create a new one
257 u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
258 if (texture_src_data == nullptr) {
259 return nullptr;
260 }
261
262 MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
263
264 std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
265
266 new_surface->addr = params.addr;
267 new_surface->size = params_size;
268
269 new_surface->texture.Create();
270 new_surface->width = params.width;
271 new_surface->height = params.height;
272 new_surface->stride = params.stride;
273 new_surface->res_scale_width = params.res_scale_width;
274 new_surface->res_scale_height = params.res_scale_height;
275
276 new_surface->is_tiled = params.is_tiled;
277 new_surface->pixel_format = params.pixel_format;
278 new_surface->dirty = false;
279
280 if (!load_if_create) {
281 // Don't load any data; just allocate the surface's texture
282 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
283 } else {
284 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game
285
286 Memory::RasterizerFlushRegion(params.addr, params_size);
287
288 // Load data from memory to the new surface
289 OpenGLState cur_state = OpenGLState::GetCurState();
290
291 GLuint old_tex = cur_state.texture_units[0].texture_2d;
292 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
293 cur_state.Apply();
294 glActiveTexture(GL_TEXTURE0);
295
296 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride);
297 if (!new_surface->is_tiled) {
298 // TODO: Ensure this will always be a color format, not a depth or other format
299 ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
300 const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
301
302 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
303 tuple.format, tuple.type, texture_src_data);
304 } else {
305 SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
306 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
307 FormatTuple tuple;
308 if ((size_t)params.pixel_format < fb_format_tuples.size()) {
309 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
310 } else {
311 // Texture
312 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
313 }
314
315 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
316
317 Pica::DebugUtils::TextureInfo tex_info;
318 tex_info.width = params.width;
319 tex_info.height = params.height;
320 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
321 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
322 tex_info.physical_address = params.addr;
323
324 for (unsigned y = 0; y < params.height; ++y) {
325 for (unsigned x = 0; x < params.width; ++x) {
326 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info);
327 }
328 }
329
330 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
331 } else {
332 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
333 size_t tuple_idx = (size_t)params.pixel_format - 14;
334 ASSERT(tuple_idx < depth_format_tuples.size());
335 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
336
337 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
338
339 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
340 bool use_4bpp = (params.pixel_format == PixelFormat::D24);
341
342 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
343
344 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel);
345
346 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
347
348 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true);
349
350 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
351 tuple.format, tuple.type, temp_fb_depth_buffer.data());
352 }
353 }
354 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
32 355
33 std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>(); 356 // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
357 if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
358 OGLTexture scaled_texture;
359 scaled_texture.Create();
34 360
35 new_texture->texture.Create(); 361 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
36 state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; 362 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format),
37 state.Apply(); 363 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
38 glActiveTexture(GL_TEXTURE0 + texture_unit); 364 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()));
39 365
40 u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address); 366 new_surface->texture.Release();
367 new_surface->texture.handle = scaled_texture.handle;
368 scaled_texture.handle = 0;
369 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
370 cur_state.Apply();
371 }
372
373 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
374 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
375 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
376 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
377
378 cur_state.texture_units[0].texture_2d = old_tex;
379 cur_state.Apply();
380 }
381
382 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
383 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface })));
384 return new_surface.get();
385}
41 386
42 new_texture->width = info.width; 387CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) {
43 new_texture->height = info.height; 388 if (params.addr == 0) {
44 new_texture->size = info.stride * info.height; 389 return nullptr;
45 new_texture->addr = info.physical_address; 390 }
46 new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size); 391
392 u32 total_pixels = params.width * params.height;
393 u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
394
395 // Attempt to find encompassing surfaces
396 CachedSurface* best_subrect_surface = nullptr;
397 float subrect_surface_goodness = -1.f;
47 398
48 std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); 399 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
400 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
401 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
402 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
403 CachedSurface* surface = it2->get();
49 404
50 for (int y = 0; y < info.height; ++y) { 405 // Check if the request is contained in the surface
51 for (int x = 0; x < info.width; ++x) { 406 if (params.addr >= surface->addr &&
52 temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); 407 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
408 params.pixel_format == surface->pixel_format)
409 {
410 // Make sure optional param-matching criteria are fulfilled
411 bool tiling_match = (params.is_tiled == surface->is_tiled);
412 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
413 if (!match_res_scale || res_scale_match) {
414 // Prioritize same-tiling and highest resolution surfaces
415 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
416 if (match_goodness > subrect_surface_goodness || surface->dirty) {
417 subrect_surface_goodness = match_goodness;
418 best_subrect_surface = surface;
419 }
420 }
53 } 421 }
54 } 422 }
423 }
424
425 // Return the best subrect surface if found
426 if (best_subrect_surface != nullptr) {
427 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
428
429 int x0, y0;
430
431 if (!params.is_tiled) {
432 u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
433 x0 = begin_pixel_index % best_subrect_surface->width;
434 y0 = begin_pixel_index / best_subrect_surface->width;
435
436 out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
437 } else {
438 u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
439 u32 tiles_per_row = best_subrect_surface->width / 8;
440
441 u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
442 x0 = begin_tile_index % tiles_per_row * 8;
443 y0 = begin_tile_index / tiles_per_row * 8;
444
445 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
446 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height));
447 }
55 448
56 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); 449 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
450 out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
451 out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
452 out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
57 453
58 texture_cache.emplace(info.physical_address, std::move(new_texture)); 454 return best_subrect_surface;
59 } 455 }
456
457 // No subrect found - create and return a new surface
458 if (!params.is_tiled) {
459 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height));
460 } else {
461 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0);
462 }
463
464 return GetSurface(params, match_res_scale, load_if_create);
465}
466
467CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
468 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
469
470 CachedSurface params;
471 params.addr = info.physical_address;
472 params.width = info.width;
473 params.height = info.height;
474 params.is_tiled = true;
475 params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
476 return GetSurface(params, false, true);
60} 477}
61 478
62void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { 479std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
63 // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound 480 const auto& regs = Pica::g_state.regs;
64 auto cache_upper_bound = texture_cache.upper_bound(addr + size); 481
482 // Make sur that framebuffers don't overlap if both color and depth are being used
483 u32 fb_area = config.GetWidth() * config.GetHeight();
484 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 &&
485 config.GetDepthBufferPhysicalAddress() != 0 &&
486 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
487 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
488 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
489 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap);
490
491 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
492 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!");
493 using_depth_fb = false;
494 }
495
496 // get color and depth surfaces
497 CachedSurface color_params;
498 CachedSurface depth_params;
499 color_params.width = depth_params.width = config.GetWidth();
500 color_params.height = depth_params.height = config.GetHeight();
501 color_params.is_tiled = depth_params.is_tiled = true;
502 if (VideoCore::g_scaled_resolution_enabled) {
503 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
504
505 // Assume same scaling factor for top and bottom screens
506 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
507 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
508 }
509
510 color_params.addr = config.GetColorBufferPhysicalAddress();
511 color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
512
513 depth_params.addr = config.GetDepthBufferPhysicalAddress();
514 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
515
516 MathUtil::Rectangle<int> color_rect;
517 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
518
519 MathUtil::Rectangle<int> depth_rect;
520 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
521
522 // Sanity check to make sure found surfaces aren't the same
523 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
524 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
525 using_depth_fb = false;
526 depth_surface = nullptr;
527 }
528
529 MathUtil::Rectangle<int> rect;
65 530
66 for (auto it = texture_cache.begin(); it != cache_upper_bound;) { 531 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
67 const auto& info = *it->second; 532 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match
533 if (color_rect.left != 0 || color_rect.top != 0) {
534 color_surface = GetSurface(color_params, true, true);
535 }
68 536
69 // Flush the texture only if the memory region intersects and a change is detected 537 if (depth_rect.left != 0 || depth_rect.top != 0) {
70 if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && 538 depth_surface = GetSurface(depth_params, true, true);
71 (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { 539 }
72 540
73 it = texture_cache.erase(it); 541 if (!color_surface->is_tiled) {
542 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height));
74 } else { 543 } else {
75 ++it; 544 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0);
76 } 545 }
546 } else if (color_surface != nullptr) {
547 rect = color_rect;
548 } else if (depth_surface != nullptr) {
549 rect = depth_rect;
550 } else {
551 rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
77 } 552 }
553
554 return std::make_tuple(color_surface, depth_surface, rect);
78} 555}
79 556
80void RasterizerCacheOpenGL::InvalidateAll() { 557CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
81 texture_cache.clear(); 558 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
559 auto range = surface_cache.equal_range(surface_interval);
560 for (auto it = range.first; it != range.second; ++it) {
561 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
562 int bits_per_value = 0;
563 if (config.fill_24bit) {
564 bits_per_value = 24;
565 } else if (config.fill_32bit) {
566 bits_per_value = 32;
567 } else {
568 bits_per_value = 16;
569 }
570
571 CachedSurface* surface = it2->get();
572
573 if (surface->addr == config.GetStartAddress() &&
574 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
575 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress()))
576 {
577 return surface;
578 }
579 }
580 }
581
582 return nullptr;
583}
584
585MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
586void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
587 using PixelFormat = CachedSurface::PixelFormat;
588 using SurfaceType = CachedSurface::SurfaceType;
589
590 if (!surface->dirty) {
591 return;
592 }
593
594 MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
595
596 u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
597 if (dst_buffer == nullptr) {
598 return;
599 }
600
601 OpenGLState cur_state = OpenGLState::GetCurState();
602 GLuint old_tex = cur_state.texture_units[0].texture_2d;
603
604 OGLTexture unscaled_tex;
605 GLuint texture_to_flush = surface->texture.handle;
606
607 // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
608 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
609 unscaled_tex.Create();
610
611 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height);
612 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format),
613 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
614 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
615
616 texture_to_flush = unscaled_tex.handle;
617 }
618
619 cur_state.texture_units[0].texture_2d = texture_to_flush;
620 cur_state.Apply();
621 glActiveTexture(GL_TEXTURE0);
622
623 glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride);
624 if (!surface->is_tiled) {
625 // TODO: Ensure this will always be a color format, not a depth or other format
626 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
627 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
628
629 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
630 } else {
631 SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
632 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
633 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
634 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
635
636 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
637
638 std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
639
640 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
641
642 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
643 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false);
644 } else {
645 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
646 size_t tuple_idx = (size_t)surface->pixel_format - 14;
647 ASSERT(tuple_idx < depth_format_tuples.size());
648 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
649
650 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
651
652 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
653 bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
654
655 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
656
657 std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
658
659 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
660
661 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
662
663 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false);
664 }
665 }
666 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
667
668 surface->dirty = false;
669
670 cur_state.texture_units[0].texture_2d = old_tex;
671 cur_state.Apply();
672}
673
674void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) {
675 if (size == 0) {
676 return;
677 }
678
679 // Gather up unique surfaces that touch the region
680 std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
681
682 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
683 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
684 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
685 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()),
686 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); });
687 }
688
689 // Flush and invalidate surfaces
690 for (auto surface : touching_surfaces) {
691 FlushSurface(surface.get());
692 if (invalidate) {
693 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
694 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface })));
695 }
696 }
697}
698
699void RasterizerCacheOpenGL::FlushAll() {
700 for (auto& surfaces : surface_cache) {
701 for (auto& surface : surfaces.second) {
702 FlushSurface(surface.get());
703 }
704 }
82} 705}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b69651427..893d51138 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -6,38 +6,211 @@
6 6
7#include <map> 7#include <map>
8#include <memory> 8#include <memory>
9#include <set>
10
11#include <boost/icl/interval_map.hpp>
12
13#include "common/math_util.h"
14
15#include "core/hw/gpu.h"
9 16
10#include "video_core/pica.h" 17#include "video_core/pica.h"
11#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 20#include "video_core/renderer_opengl/gl_state.h"
14 21
22struct CachedSurface;
23
24using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
25
26struct CachedSurface {
27 enum class PixelFormat {
28 // First 5 formats are shared between textures and color buffers
29 RGBA8 = 0,
30 RGB8 = 1,
31 RGB5A1 = 2,
32 RGB565 = 3,
33 RGBA4 = 4,
34
35 // Texture-only formats
36 IA8 = 5,
37 RG8 = 6,
38 I8 = 7,
39 A8 = 8,
40 IA4 = 9,
41 I4 = 10,
42 A4 = 11,
43 ETC1 = 12,
44 ETC1A4 = 13,
45
46 // Depth buffer-only formats
47 D16 = 14,
48 // gap
49 D24 = 16,
50 D24S8 = 17,
51
52 Invalid = 255,
53 };
54
55 enum class SurfaceType {
56 Color = 0,
57 Texture = 1,
58 Depth = 2,
59 DepthStencil = 3,
60 Invalid = 4,
61 };
62
63 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
64 static const std::array<unsigned int, 18> bpp_table = {
65 32, // RGBA8
66 24, // RGB8
67 16, // RGB5A1
68 16, // RGB565
69 16, // RGBA4
70 16, // IA8
71 16, // RG8
72 8, // I8
73 8, // A8
74 8, // IA4
75 4, // I4
76 4, // A4
77 4, // ETC1
78 8, // ETC1A4
79 16, // D16
80 0,
81 24, // D24
82 32, // D24S8
83 };
84
85 ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
86 return bpp_table[(unsigned int)format];
87 }
88
89 static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
90 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
91 }
92
93 static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
94 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
95 }
96
97 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
98 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid;
99 }
100
101 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
102 switch (format) {
103 // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
104 case GPU::Regs::PixelFormat::RGB565:
105 return PixelFormat::RGB565;
106 case GPU::Regs::PixelFormat::RGB5A1:
107 return PixelFormat::RGB5A1;
108 default:
109 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
110 }
111 }
112
113 static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
114 SurfaceType a_type = GetFormatType(pixel_format_a);
115 SurfaceType b_type = GetFormatType(pixel_format_b);
116
117 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
118 return true;
119 }
120
121 if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
122 return true;
123 }
124
125 if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
126 return true;
127 }
128
129 return false;
130 }
131
132 static SurfaceType GetFormatType(PixelFormat pixel_format) {
133 if ((unsigned int)pixel_format < 5) {
134 return SurfaceType::Color;
135 }
136
137 if ((unsigned int)pixel_format < 14) {
138 return SurfaceType::Texture;
139 }
140
141 if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
142 return SurfaceType::Depth;
143 }
144
145 if (pixel_format == PixelFormat::D24S8) {
146 return SurfaceType::DepthStencil;
147 }
148
149 return SurfaceType::Invalid;
150 }
151
152 u32 GetScaledWidth() const {
153 return (u32)(width * res_scale_width);
154 }
155
156 u32 GetScaledHeight() const {
157 return (u32)(height * res_scale_height);
158 }
159
160 PAddr addr;
161 u32 size;
162
163 PAddr min_valid;
164 PAddr max_valid;
165
166 OGLTexture texture;
167 u32 width;
168 u32 height;
169 u32 stride = 0;
170 float res_scale_width = 1.f;
171 float res_scale_height = 1.f;
172
173 bool is_tiled;
174 PixelFormat pixel_format;
175 bool dirty;
176};
177
15class RasterizerCacheOpenGL : NonCopyable { 178class RasterizerCacheOpenGL : NonCopyable {
16public: 179public:
180 RasterizerCacheOpenGL();
17 ~RasterizerCacheOpenGL(); 181 ~RasterizerCacheOpenGL();
18 182
183 /// Blits one texture to another
184 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect);
185
186 /// Attempt to blit one surface's texture to another
187 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
188
19 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 189 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
20 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info); 190 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create);
21 191
22 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { 192 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
23 LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); 193 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect);
24 }
25 194
26 /// Invalidate any cached resource intersecting the specified region. 195 /// Gets a surface based on the texture configuration
27 void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); 196 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
28 197
29 /// Invalidate all cached OpenGL resources tracked by this cache manager 198 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration
30 void InvalidateAll(); 199 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
31 200
32private: 201 /// Attempt to get a surface that exactly matches the fill region and format
33 struct CachedTexture { 202 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
34 OGLTexture texture; 203
35 GLuint width; 204 /// Write the surface back to memory
36 GLuint height; 205 void FlushSurface(CachedSurface* surface);
37 u32 size;
38 u64 hash;
39 PAddr addr;
40 };
41 206
42 std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; 207 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache
208 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
209
210 /// Flush all cached resources tracked by this cache manager
211 void FlushAll();
212
213private:
214 SurfaceCache surface_cache;
215 OGLFramebuffer transfer_framebuffers[2];
43}; 216};
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 08e4d0b54..f04bdd8c5 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/pica.h" 5#include "video_core/pica.h"
6#include "video_core/renderer_opengl/gl_resource_manager.h"
6#include "video_core/renderer_opengl/gl_state.h" 7#include "video_core/renderer_opengl/gl_state.h"
7 8
8OpenGLState OpenGLState::cur_state; 9OpenGLState OpenGLState::cur_state;
@@ -48,17 +49,19 @@ OpenGLState::OpenGLState() {
48 texture_unit.sampler = 0; 49 texture_unit.sampler = 0;
49 } 50 }
50 51
51 for (auto& lut : lighting_lut) { 52 for (auto& lut : lighting_luts) {
52 lut.texture_1d = 0; 53 lut.texture_1d = 0;
53 } 54 }
54 55
55 draw.framebuffer = 0; 56 draw.read_framebuffer = 0;
57 draw.draw_framebuffer = 0;
56 draw.vertex_array = 0; 58 draw.vertex_array = 0;
57 draw.vertex_buffer = 0; 59 draw.vertex_buffer = 0;
60 draw.uniform_buffer = 0;
58 draw.shader_program = 0; 61 draw.shader_program = 0;
59} 62}
60 63
61void OpenGLState::Apply() { 64void OpenGLState::Apply() const {
62 // Culling 65 // Culling
63 if (cull.enabled != cur_state.cull.enabled) { 66 if (cull.enabled != cur_state.cull.enabled) {
64 if (cull.enabled) { 67 if (cull.enabled) {
@@ -175,16 +178,19 @@ void OpenGLState::Apply() {
175 } 178 }
176 179
177 // Lighting LUTs 180 // Lighting LUTs
178 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { 181 for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
179 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { 182 if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
180 glActiveTexture(GL_TEXTURE3 + i); 183 glActiveTexture(GL_TEXTURE3 + i);
181 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); 184 glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
182 } 185 }
183 } 186 }
184 187
185 // Framebuffer 188 // Framebuffer
186 if (draw.framebuffer != cur_state.draw.framebuffer) { 189 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
187 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 190 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
191 }
192 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
193 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
188 } 194 }
189 195
190 // Vertex array 196 // Vertex array
@@ -210,45 +216,58 @@ void OpenGLState::Apply() {
210 cur_state = *this; 216 cur_state = *this;
211} 217}
212 218
213void OpenGLState::ResetTexture(GLuint id) { 219GLenum OpenGLState::CheckFBStatus(GLenum target) {
220 GLenum fb_status = glCheckFramebufferStatus(target);
221 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
222 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
223 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status);
224 }
225
226 return fb_status;
227}
228
229void OpenGLState::ResetTexture(GLuint handle) {
214 for (auto& unit : cur_state.texture_units) { 230 for (auto& unit : cur_state.texture_units) {
215 if (unit.texture_2d == id) { 231 if (unit.texture_2d == handle) {
216 unit.texture_2d = 0; 232 unit.texture_2d = 0;
217 } 233 }
218 } 234 }
219} 235}
220 236
221void OpenGLState::ResetSampler(GLuint id) { 237void OpenGLState::ResetSampler(GLuint handle) {
222 for (auto& unit : cur_state.texture_units) { 238 for (auto& unit : cur_state.texture_units) {
223 if (unit.sampler == id) { 239 if (unit.sampler == handle) {
224 unit.sampler = 0; 240 unit.sampler = 0;
225 } 241 }
226 } 242 }
227} 243}
228 244
229void OpenGLState::ResetProgram(GLuint id) { 245void OpenGLState::ResetProgram(GLuint handle) {
230 if (cur_state.draw.shader_program == id) { 246 if (cur_state.draw.shader_program == handle) {
231 cur_state.draw.shader_program = 0; 247 cur_state.draw.shader_program = 0;
232 } 248 }
233} 249}
234 250
235void OpenGLState::ResetBuffer(GLuint id) { 251void OpenGLState::ResetBuffer(GLuint handle) {
236 if (cur_state.draw.vertex_buffer == id) { 252 if (cur_state.draw.vertex_buffer == handle) {
237 cur_state.draw.vertex_buffer = 0; 253 cur_state.draw.vertex_buffer = 0;
238 } 254 }
239 if (cur_state.draw.uniform_buffer == id) { 255 if (cur_state.draw.uniform_buffer == handle) {
240 cur_state.draw.uniform_buffer = 0; 256 cur_state.draw.uniform_buffer = 0;
241 } 257 }
242} 258}
243 259
244void OpenGLState::ResetVertexArray(GLuint id) { 260void OpenGLState::ResetVertexArray(GLuint handle) {
245 if (cur_state.draw.vertex_array == id) { 261 if (cur_state.draw.vertex_array == handle) {
246 cur_state.draw.vertex_array = 0; 262 cur_state.draw.vertex_array = 0;
247 } 263 }
248} 264}
249 265
250void OpenGLState::ResetFramebuffer(GLuint id) { 266void OpenGLState::ResetFramebuffer(GLuint handle) {
251 if (cur_state.draw.framebuffer == id) { 267 if (cur_state.draw.read_framebuffer == handle) {
252 cur_state.draw.framebuffer = 0; 268 cur_state.draw.read_framebuffer = 0;
269 }
270 if (cur_state.draw.draw_framebuffer == handle) {
271 cur_state.draw.draw_framebuffer = 0;
253 } 272 }
254} 273}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e848058d7..0f72e9004 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <glad/glad.h> 7#include <glad/glad.h>
8#include <memory>
8 9
9class OpenGLState { 10class OpenGLState {
10public: 11public:
@@ -63,15 +64,15 @@ public:
63 64
64 struct { 65 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D 66 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6]; 67 } lighting_luts[6];
67 68
68 struct { 69 struct {
69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 70 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
71 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 72 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 73 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
72 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 74 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
73 GLuint shader_program; // GL_CURRENT_PROGRAM 75 GLuint shader_program; // GL_CURRENT_PROGRAM
74 bool shader_dirty;
75 } draw; 76 } draw;
76 77
77 OpenGLState(); 78 OpenGLState();
@@ -82,14 +83,18 @@ public:
82 } 83 }
83 84
84 /// Apply this state as the current OpenGL state 85 /// Apply this state as the current OpenGL state
85 void Apply(); 86 void Apply() const;
86 87
87 static void ResetTexture(GLuint id); 88 /// Check the status of the current OpenGL read or draw framebuffer configuration
88 static void ResetSampler(GLuint id); 89 static GLenum CheckFBStatus(GLenum target);
89 static void ResetProgram(GLuint id); 90
90 static void ResetBuffer(GLuint id); 91 /// Resets and unbinds any references to the given resource in the current OpenGL state
91 static void ResetVertexArray(GLuint id); 92 static void ResetTexture(GLuint handle);
92 static void ResetFramebuffer(GLuint id); 93 static void ResetSampler(GLuint handle);
94 static void ResetProgram(GLuint handle);
95 static void ResetBuffer(GLuint handle);
96 static void ResetVertexArray(GLuint handle);
97 static void ResetFramebuffer(GLuint handle);
93 98
94private: 99private:
95 static OpenGLState cur_state; 100 static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 11c4d0daf..8f907593f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -107,7 +107,7 @@ void RendererOpenGL::SwapBuffers() {
107 OpenGLState prev_state = OpenGLState::GetCurState(); 107 OpenGLState prev_state = OpenGLState::GetCurState();
108 state.Apply(); 108 state.Apply();
109 109
110 for(int i : {0, 1}) { 110 for (int i : {0, 1}) {
111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
112 112
113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
@@ -117,25 +117,25 @@ void RendererOpenGL::SwapBuffers() {
117 LCD::Read(color_fill.raw, lcd_color_addr); 117 LCD::Read(color_fill.raw, lcd_color_addr);
118 118
119 if (color_fill.is_enabled) { 119 if (color_fill.is_enabled) {
120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]); 120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
121 121
122 // Resize the texture in case the framebuffer size has changed 122 // Resize the texture in case the framebuffer size has changed
123 textures[i].width = 1; 123 screen_infos[i].texture.width = 1;
124 textures[i].height = 1; 124 screen_infos[i].texture.height = 1;
125 } else { 125 } else {
126 if (textures[i].width != (GLsizei)framebuffer.width || 126 if (screen_infos[i].texture.width != (GLsizei)framebuffer.width ||
127 textures[i].height != (GLsizei)framebuffer.height || 127 screen_infos[i].texture.height != (GLsizei)framebuffer.height ||
128 textures[i].format != framebuffer.color_format) { 128 screen_infos[i].texture.format != framebuffer.color_format) {
129 // Reallocate texture if the framebuffer size has changed. 129 // Reallocate texture if the framebuffer size has changed.
130 // This is expected to not happen very often and hence should not be a 130 // This is expected to not happen very often and hence should not be a
131 // performance problem. 131 // performance problem.
132 ConfigureFramebufferTexture(textures[i], framebuffer); 132 ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
133 } 133 }
134 LoadFBToActiveGLTexture(framebuffer, textures[i]); 134 LoadFBToScreenInfo(framebuffer, screen_infos[i]);
135 135
136 // Resize the texture in case the framebuffer size has changed 136 // Resize the texture in case the framebuffer size has changed
137 textures[i].width = framebuffer.width; 137 screen_infos[i].texture.width = framebuffer.width;
138 textures[i].height = framebuffer.height; 138 screen_infos[i].texture.height = framebuffer.height;
139 } 139 }
140 } 140 }
141 141
@@ -166,8 +166,8 @@ void RendererOpenGL::SwapBuffers() {
166/** 166/**
167 * Loads framebuffer from emulated memory into the active OpenGL texture. 167 * Loads framebuffer from emulated memory into the active OpenGL texture.
168 */ 168 */
169void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 169void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
170 const TextureInfo& texture) { 170 ScreenInfo& screen_info) {
171 171
172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
173 framebuffer.address_left1 : framebuffer.address_left2; 173 framebuffer.address_left1 : framebuffer.address_left2;
@@ -177,8 +177,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
177 framebuffer_addr, (int)framebuffer.width, 177 framebuffer_addr, (int)framebuffer.width,
178 (int)framebuffer.height, (int)framebuffer.format); 178 (int)framebuffer.height, (int)framebuffer.format);
179 179
180 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
181
182 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 180 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
183 size_t pixel_stride = framebuffer.stride / bpp; 181 size_t pixel_stride = framebuffer.stride / bpp;
184 182
@@ -189,24 +187,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
189 // only allows rows to have a memory alignement of 4. 187 // only allows rows to have a memory alignement of 4.
190 ASSERT(pixel_stride % 4 == 0); 188 ASSERT(pixel_stride % 4 == 0);
191 189
192 state.texture_units[0].texture_2d = texture.handle; 190 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, pixel_stride, screen_info)) {
193 state.Apply(); 191 // Reset the screen info's display texture to its own permanent texture
192 screen_info.display_texture = screen_info.texture.resource.handle;
193 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
194 194
195 glActiveTexture(GL_TEXTURE0); 195 Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
196 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
197 196
198 // Update existing texture 197 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
199 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
200 // differ from the LCD resolution.
201 // TODO: Applications could theoretically crash Citra here by specifying too large
202 // framebuffer sizes. We should make sure that this cannot happen.
203 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
204 texture.gl_format, texture.gl_type, framebuffer_data);
205 198
206 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 199 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
200 state.Apply();
207 201
208 state.texture_units[0].texture_2d = 0; 202 glActiveTexture(GL_TEXTURE0);
209 state.Apply(); 203 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
204
205 // Update existing texture
206 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
207 // differ from the LCD resolution.
208 // TODO: Applications could theoretically crash Citra here by specifying too large
209 // framebuffer sizes. We should make sure that this cannot happen.
210 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
211 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data);
212
213 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
214
215 state.texture_units[0].texture_2d = 0;
216 state.Apply();
217 }
210} 218}
211 219
212/** 220/**
@@ -216,7 +224,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
216 */ 224 */
217void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 225void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
218 const TextureInfo& texture) { 226 const TextureInfo& texture) {
219 state.texture_units[0].texture_2d = texture.handle; 227 state.texture_units[0].texture_2d = texture.resource.handle;
220 state.Apply(); 228 state.Apply();
221 229
222 glActiveTexture(GL_TEXTURE0); 230 glActiveTexture(GL_TEXTURE0);
@@ -224,6 +232,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
224 232
225 // Update existing texture 233 // Update existing texture
226 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 234 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
235
236 state.texture_units[0].texture_2d = 0;
237 state.Apply();
227} 238}
228 239
229/** 240/**
@@ -233,20 +244,22 @@ void RendererOpenGL::InitOpenGLObjects() {
233 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 244 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
234 245
235 // Link shaders and get variable locations 246 // Link shaders and get variable locations
236 program_id = GLShader::LoadProgram(vertex_shader, fragment_shader); 247 shader.Create(vertex_shader, fragment_shader);
237 uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix"); 248 state.draw.shader_program = shader.handle;
238 uniform_color_texture = glGetUniformLocation(program_id, "color_texture"); 249 state.Apply();
239 attrib_position = glGetAttribLocation(program_id, "vert_position"); 250 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
240 attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord"); 251 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
252 attrib_position = glGetAttribLocation(shader.handle, "vert_position");
253 attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
241 254
242 // Generate VBO handle for drawing 255 // Generate VBO handle for drawing
243 glGenBuffers(1, &vertex_buffer_handle); 256 vertex_buffer.Create();
244 257
245 // Generate VAO 258 // Generate VAO
246 glGenVertexArrays(1, &vertex_array_handle); 259 vertex_array.Create();
247 260
248 state.draw.vertex_array = vertex_array_handle; 261 state.draw.vertex_array = vertex_array.handle;
249 state.draw.vertex_buffer = vertex_buffer_handle; 262 state.draw.vertex_buffer = vertex_buffer.handle;
250 state.draw.uniform_buffer = 0; 263 state.draw.uniform_buffer = 0;
251 state.Apply(); 264 state.Apply();
252 265
@@ -258,13 +271,13 @@ void RendererOpenGL::InitOpenGLObjects() {
258 glEnableVertexAttribArray(attrib_tex_coord); 271 glEnableVertexAttribArray(attrib_tex_coord);
259 272
260 // Allocate textures for each screen 273 // Allocate textures for each screen
261 for (auto& texture : textures) { 274 for (auto& screen_info : screen_infos) {
262 glGenTextures(1, &texture.handle); 275 screen_info.texture.resource.Create();
263 276
264 // Allocation of storage is deferred until the first frame, when we 277 // Allocation of storage is deferred until the first frame, when we
265 // know the framebuffer size. 278 // know the framebuffer size.
266 279
267 state.texture_units[0].texture_2d = texture.handle; 280 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
268 state.Apply(); 281 state.Apply();
269 282
270 glActiveTexture(GL_TEXTURE0); 283 glActiveTexture(GL_TEXTURE0);
@@ -273,6 +286,8 @@ void RendererOpenGL::InitOpenGLObjects() {
273 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 286 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
274 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 287 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
275 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 288 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
289
290 screen_info.display_texture = screen_info.texture.resource.handle;
276 } 291 }
277 292
278 state.texture_units[0].texture_2d = 0; 293 state.texture_units[0].texture_2d = 0;
@@ -327,30 +342,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
327 UNIMPLEMENTED(); 342 UNIMPLEMENTED();
328 } 343 }
329 344
330 state.texture_units[0].texture_2d = texture.handle; 345 state.texture_units[0].texture_2d = texture.resource.handle;
331 state.Apply(); 346 state.Apply();
332 347
333 glActiveTexture(GL_TEXTURE0); 348 glActiveTexture(GL_TEXTURE0);
334 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 349 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
335 texture.gl_format, texture.gl_type, nullptr); 350 texture.gl_format, texture.gl_type, nullptr);
351
352 state.texture_units[0].texture_2d = 0;
353 state.Apply();
336} 354}
337 355
338/** 356/**
339 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 357 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
340 */ 358 */
341void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) { 359void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) {
360 auto& texcoords = screen_info.display_texcoords;
361
342 std::array<ScreenRectVertex, 4> vertices = {{ 362 std::array<ScreenRectVertex, 4> vertices = {{
343 ScreenRectVertex(x, y, 1.f, 0.f), 363 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
344 ScreenRectVertex(x+w, y, 1.f, 1.f), 364 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right),
345 ScreenRectVertex(x, y+h, 0.f, 0.f), 365 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left),
346 ScreenRectVertex(x+w, y+h, 0.f, 1.f), 366 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right),
347 }}; 367 }};
348 368
349 state.texture_units[0].texture_2d = texture.handle; 369 state.texture_units[0].texture_2d = screen_info.display_texture;
350 state.Apply(); 370 state.Apply();
351 371
352 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 372 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
353 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 373 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
374
375 state.texture_units[0].texture_2d = 0;
376 state.Apply();
354} 377}
355 378
356/** 379/**
@@ -362,9 +385,6 @@ void RendererOpenGL::DrawScreens() {
362 glViewport(0, 0, layout.width, layout.height); 385 glViewport(0, 0, layout.width, layout.height);
363 glClear(GL_COLOR_BUFFER_BIT); 386 glClear(GL_COLOR_BUFFER_BIT);
364 387
365 state.draw.shader_program = program_id;
366 state.Apply();
367
368 // Set projection matrix 388 // Set projection matrix
369 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 389 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
370 (float)layout.height); 390 (float)layout.height);
@@ -374,9 +394,9 @@ void RendererOpenGL::DrawScreens() {
374 glActiveTexture(GL_TEXTURE0); 394 glActiveTexture(GL_TEXTURE0);
375 glUniform1i(uniform_color_texture, 0); 395 glUniform1i(uniform_color_texture, 0);
376 396
377 DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 397 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
378 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 398 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
379 DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 399 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
380 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 400 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
381 401
382 m_current_frame++; 402 m_current_frame++;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index fe4d142a5..5ca5255ac 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -11,10 +11,28 @@
11#include "core/hw/gpu.h" 11#include "core/hw/gpu.h"
12 12
13#include "video_core/renderer_base.h" 13#include "video_core/renderer_base.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_state.h" 15#include "video_core/renderer_opengl/gl_state.h"
15 16
16class EmuWindow; 17class EmuWindow;
17 18
19/// Structure used for storing information about the textures for each 3DS screen
20struct TextureInfo {
21 OGLTexture resource;
22 GLsizei width;
23 GLsizei height;
24 GPU::Regs::PixelFormat format;
25 GLenum gl_format;
26 GLenum gl_type;
27};
28
29/// Structure used for storing information about the display target for each 3DS screen
30struct ScreenInfo {
31 GLuint display_texture;
32 MathUtil::Rectangle<float> display_texcoords;
33 TextureInfo texture;
34};
35
18class RendererOpenGL : public RendererBase { 36class RendererOpenGL : public RendererBase {
19public: 37public:
20 38
@@ -37,26 +55,16 @@ public:
37 void ShutDown() override; 55 void ShutDown() override;
38 56
39private: 57private:
40 /// Structure used for storing information about the textures for each 3DS screen
41 struct TextureInfo {
42 GLuint handle;
43 GLsizei width;
44 GLsizei height;
45 GPU::Regs::PixelFormat format;
46 GLenum gl_format;
47 GLenum gl_type;
48 };
49
50 void InitOpenGLObjects(); 58 void InitOpenGLObjects();
51 void ConfigureFramebufferTexture(TextureInfo& texture, 59 void ConfigureFramebufferTexture(TextureInfo& texture,
52 const GPU::Regs::FramebufferConfig& framebuffer); 60 const GPU::Regs::FramebufferConfig& framebuffer);
53 void DrawScreens(); 61 void DrawScreens();
54 void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); 62 void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h);
55 void UpdateFramerate(); 63 void UpdateFramerate();
56 64
57 // Loads framebuffer from emulated memory into the active OpenGL texture. 65 // Loads framebuffer from emulated memory into the display information structure
58 void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 66 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
59 const TextureInfo& texture); 67 ScreenInfo& screen_info);
60 // Fills active OpenGL texture with the given RGB color. 68 // Fills active OpenGL texture with the given RGB color.
61 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 69 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
62 const TextureInfo& texture); 70 const TextureInfo& texture);
@@ -69,10 +77,10 @@ private:
69 OpenGLState state; 77 OpenGLState state;
70 78
71 // OpenGL object IDs 79 // OpenGL object IDs
72 GLuint vertex_array_handle; 80 OGLVertexArray vertex_array;
73 GLuint vertex_buffer_handle; 81 OGLBuffer vertex_buffer;
74 GLuint program_id; 82 OGLShader shader;
75 std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively 83 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively
76 // Shader uniform location indices 84 // Shader uniform location indices
77 GLuint uniform_modelview_matrix; 85 GLuint uniform_modelview_matrix;
78 GLuint uniform_color_texture; 86 GLuint uniform_color_texture;
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 9a9a76d7a..090f899bc 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -11,16 +11,14 @@
11namespace VideoCore { 11namespace VideoCore {
12 12
13class SWRasterizer : public RasterizerInterface { 13class SWRasterizer : public RasterizerInterface {
14 void InitObjects() override {}
15 void Reset() override {}
16 void AddTriangle(const Pica::Shader::OutputVertex& v0, 14 void AddTriangle(const Pica::Shader::OutputVertex& v0,
17 const Pica::Shader::OutputVertex& v1, 15 const Pica::Shader::OutputVertex& v1,
18 const Pica::Shader::OutputVertex& v2) override; 16 const Pica::Shader::OutputVertex& v2) override;
19 void DrawTriangles() override {} 17 void DrawTriangles() override {}
20 void FlushFramebuffer() override {}
21 void NotifyPicaRegisterChanged(u32 id) override {} 18 void NotifyPicaRegisterChanged(u32 id) override {}
19 void FlushAll() override {}
22 void FlushRegion(PAddr addr, u32 size) override {} 20 void FlushRegion(PAddr addr, u32 size) override {}
23 void InvalidateRegion(PAddr addr, u32 size) override {} 21 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
24}; 22};
25 23
26} 24}
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 256899c89..855286173 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -25,6 +25,7 @@ std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
25 25
26std::atomic<bool> g_hw_renderer_enabled; 26std::atomic<bool> g_hw_renderer_enabled;
27std::atomic<bool> g_shader_jit_enabled; 27std::atomic<bool> g_shader_jit_enabled;
28std::atomic<bool> g_scaled_resolution_enabled;
28 29
29/// Initialize the video core 30/// Initialize the video core
30bool Init(EmuWindow* emu_window) { 31bool Init(EmuWindow* emu_window) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index bca67fb8c..30267489e 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -36,6 +36,7 @@ extern EmuWindow* g_emu_window; ///< Emu window
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 37extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 38extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled;
39 40
40/// Start the video core 41/// Start the video core
41void Start(); 42void Start();