summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/settings.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/engines/maxwell_dma.cpp9
-rw-r--r--src/video_core/macro/macro.cpp32
-rw-r--r--src/video_core/macro/macro.h3
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/yuzu/configuration/configure_debug.cpp3
-rw-r--r--src/yuzu/configuration/configure_debug.ui13
9 files changed, 84 insertions, 10 deletions
diff --git a/src/common/settings.h b/src/common/settings.h
index 5b34169a8..e61d9cd7f 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -606,6 +606,7 @@ struct Values {
606 BasicSetting<bool> dump_exefs{false, "dump_exefs"}; 606 BasicSetting<bool> dump_exefs{false, "dump_exefs"};
607 BasicSetting<bool> dump_nso{false, "dump_nso"}; 607 BasicSetting<bool> dump_nso{false, "dump_nso"};
608 BasicSetting<bool> dump_shaders{false, "dump_shaders"}; 608 BasicSetting<bool> dump_shaders{false, "dump_shaders"};
609 BasicSetting<bool> dump_macros{false, "dump_macros"};
609 BasicSetting<bool> enable_fs_access_log{false, "enable_fs_access_log"}; 610 BasicSetting<bool> enable_fs_access_log{false, "enable_fs_access_log"};
610 BasicSetting<bool> reporting_services{false, "reporting_services"}; 611 BasicSetting<bool> reporting_services{false, "reporting_services"};
611 BasicSetting<bool> quest_flag{false, "quest_flag"}; 612 BasicSetting<bool> quest_flag{false, "quest_flag"};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index d4652b167..7d0cb8fce 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -173,6 +173,8 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
173 case MAXWELL3D_REG_INDEX(shadow_ram_control): 173 case MAXWELL3D_REG_INDEX(shadow_ram_control):
174 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument); 174 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
175 return; 175 return;
176 case MAXWELL3D_REG_INDEX(macros.upload_address):
177 return macro_engine->ClearCode(regs.macros.upload_address);
176 case MAXWELL3D_REG_INDEX(macros.data): 178 case MAXWELL3D_REG_INDEX(macros.data):
177 return macro_engine->AddCode(regs.macros.upload_address, argument); 179 return macro_engine->AddCode(regs.macros.upload_address, argument);
178 case MAXWELL3D_REG_INDEX(macros.bind): 180 case MAXWELL3D_REG_INDEX(macros.bind):
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 76e8bc656..a7302f7c1 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -134,7 +134,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
134 134
135 // Deswizzle the input and copy it over. 135 // Deswizzle the input and copy it over.
136 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); 136 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
137 const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; 137 const u32 bytes_per_pixel =
138 regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
138 const Parameters& src_params = regs.src_params; 139 const Parameters& src_params = regs.src_params;
139 const u32 width = src_params.width; 140 const u32 width = src_params.width;
140 const u32 height = src_params.height; 141 const u32 height = src_params.height;
@@ -166,7 +167,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
166 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); 167 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
167 168
168 const auto& dst_params = regs.dst_params; 169 const auto& dst_params = regs.dst_params;
169 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; 170 const u32 bytes_per_pixel =
171 regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1;
170 const u32 width = dst_params.width; 172 const u32 width = dst_params.width;
171 const u32 height = dst_params.height; 173 const u32 height = dst_params.height;
172 const u32 depth = dst_params.depth; 174 const u32 depth = dst_params.depth;
@@ -210,7 +212,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
210} 212}
211 213
212void MaxwellDMA::FastCopyBlockLinearToPitch() { 214void MaxwellDMA::FastCopyBlockLinearToPitch() {
213 const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; 215 const u32 bytes_per_pixel =
216 regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
214 const size_t src_size = GOB_SIZE; 217 const size_t src_size = GOB_SIZE;
215 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 218 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
216 u32 pos_x = regs.src_params.origin.x; 219 u32 pos_x = regs.src_params.origin.x;
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index a033d03be..e7279efcd 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,11 +2,15 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <cstring> 4#include <cstring>
5#include <fstream>
5#include <optional> 6#include <optional>
7#include <span>
6 8
7#include <boost/container_hash/hash.hpp> 9#include <boost/container_hash/hash.hpp>
8 10
9#include "common/assert.h" 11#include "common/assert.h"
12#include "common/fs/fs.h"
13#include "common/fs/path_util.h"
10#include "common/settings.h" 14#include "common/settings.h"
11#include "video_core/macro/macro.h" 15#include "video_core/macro/macro.h"
12#include "video_core/macro/macro_hle.h" 16#include "video_core/macro/macro_hle.h"
@@ -15,6 +19,23 @@
15 19
16namespace Tegra { 20namespace Tegra {
17 21
22static void Dump(u64 hash, std::span<const u32> code) {
23 const auto base_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
24 const auto macro_dir{base_dir / "macros"};
25 if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
26 LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
27 return;
28 }
29 const auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
30 std::fstream macro_file(name, std::ios::out | std::ios::binary);
31 if (!macro_file) {
32 LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}",
33 Common::FS::PathToUTF8String(name));
34 return;
35 }
36 macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
37}
38
18MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) 39MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
19 : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} 40 : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
20 41
@@ -24,6 +45,11 @@ void MacroEngine::AddCode(u32 method, u32 data) {
24 uploaded_macro_code[method].push_back(data); 45 uploaded_macro_code[method].push_back(data);
25} 46}
26 47
48void MacroEngine::ClearCode(u32 method) {
49 macro_cache.erase(method);
50 uploaded_macro_code.erase(method);
51}
52
27void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { 53void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
28 auto compiled_macro = macro_cache.find(method); 54 auto compiled_macro = macro_cache.find(method);
29 if (compiled_macro != macro_cache.end()) { 55 if (compiled_macro != macro_cache.end()) {
@@ -54,6 +80,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
54 if (!mid_method.has_value()) { 80 if (!mid_method.has_value()) {
55 cache_info.lle_program = Compile(macro_code->second); 81 cache_info.lle_program = Compile(macro_code->second);
56 cache_info.hash = boost::hash_value(macro_code->second); 82 cache_info.hash = boost::hash_value(macro_code->second);
83 if (Settings::values.dump_macros) {
84 Dump(cache_info.hash, macro_code->second);
85 }
57 } else { 86 } else {
58 const auto& macro_cached = uploaded_macro_code[mid_method.value()]; 87 const auto& macro_cached = uploaded_macro_code[mid_method.value()];
59 const auto rebased_method = method - mid_method.value(); 88 const auto rebased_method = method - mid_method.value();
@@ -63,6 +92,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
63 code.size() * sizeof(u32)); 92 code.size() * sizeof(u32));
64 cache_info.hash = boost::hash_value(code); 93 cache_info.hash = boost::hash_value(code);
65 cache_info.lle_program = Compile(code); 94 cache_info.lle_program = Compile(code);
95 if (Settings::values.dump_macros) {
96 Dump(cache_info.hash, code);
97 }
66 } 98 }
67 99
68 if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { 100 if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index 7e12c16dc..07d97ba39 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -117,6 +117,9 @@ public:
117 // Store the uploaded macro code to compile them when they're called. 117 // Store the uploaded macro code to compile them when they're called.
118 void AddCode(u32 method, u32 data); 118 void AddCode(u32 method, u32 data);
119 119
120 // Clear the code associated with a method.
121 void ClearCode(u32 method);
122
120 // Compiles the macro if its not in the cache, and executes the compiled macro 123 // Compiles the macro if its not in the cache, and executes the compiled macro
121 void Execute(u32 method, const std::vector<u32>& parameters); 124 void Execute(u32 method, const std::vector<u32>& parameters);
122 125
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index dc2b490d4..dc5376501 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -23,7 +23,8 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255
23namespace Tegra { 23namespace Tegra {
24namespace { 24namespace {
25constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; 25constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
26constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; 26constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d;
27constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11;
27constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; 28constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
28constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; 29constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
29constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; 30constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
@@ -31,6 +32,7 @@ constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
31constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ 32constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
32 STATE, 33 STATE,
33 RESULT, 34 RESULT,
35 MAX_PARAMETER,
34 PARAMETERS, 36 PARAMETERS,
35 METHOD_ADDRESS, 37 METHOD_ADDRESS,
36 BRANCH_HOLDER, 38 BRANCH_HOLDER,
@@ -80,7 +82,7 @@ private:
80 u32 carry_flag{}; 82 u32 carry_flag{};
81 }; 83 };
82 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); 84 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
83 using ProgramType = void (*)(JITState*, const u32*); 85 using ProgramType = void (*)(JITState*, const u32*, const u32*);
84 86
85 struct OptimizerState { 87 struct OptimizerState {
86 bool can_skip_carry{}; 88 bool can_skip_carry{};
@@ -112,7 +114,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
112 JITState state{}; 114 JITState state{};
113 state.maxwell3d = &maxwell3d; 115 state.maxwell3d = &maxwell3d;
114 state.registers = {}; 116 state.registers = {};
115 program(&state, parameters.data()); 117 program(&state, parameters.data(), parameters.data() + parameters.size());
116} 118}
117 119
118void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { 120void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
@@ -488,6 +490,7 @@ void MacroJITx64Impl::Compile() {
488 // JIT state 490 // JIT state
489 mov(STATE, Common::X64::ABI_PARAM1); 491 mov(STATE, Common::X64::ABI_PARAM1);
490 mov(PARAMETERS, Common::X64::ABI_PARAM2); 492 mov(PARAMETERS, Common::X64::ABI_PARAM2);
493 mov(MAX_PARAMETER, Common::X64::ABI_PARAM3);
491 xor_(RESULT, RESULT); 494 xor_(RESULT, RESULT);
492 xor_(METHOD_ADDRESS, METHOD_ADDRESS); 495 xor_(METHOD_ADDRESS, METHOD_ADDRESS);
493 xor_(BRANCH_HOLDER, BRANCH_HOLDER); 496 xor_(BRANCH_HOLDER, BRANCH_HOLDER);
@@ -598,7 +601,22 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
598 return true; 601 return true;
599} 602}
600 603
604static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
605 LOG_CRITICAL(HW_GPU,
606 "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
607 parameter, max_parameter - sizeof(u32));
608}
609
601Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { 610Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
611 Xbyak::Label parameter_ok{};
612 cmp(PARAMETERS, MAX_PARAMETER);
613 jb(parameter_ok, T_NEAR);
614 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
615 mov(Common::X64::ABI_PARAM1, PARAMETERS);
616 mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
617 Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
618 Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
619 L(parameter_ok);
602 mov(eax, dword[PARAMETERS]); 620 mov(eax, dword[PARAMETERS]);
603 add(PARAMETERS, sizeof(u32)); 621 add(PARAMETERS, sizeof(u32));
604 return eax; 622 return eax;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8ef79753f..159b71161 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -563,12 +563,11 @@ void RasterizerOpenGL::SyncViewport() {
563 flags[Dirty::FrontFace] = false; 563 flags[Dirty::FrontFace] = false;
564 564
565 GLenum mode = MaxwellToGL::FrontFace(regs.front_face); 565 GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
566 bool flip_faces = false; 566 bool flip_faces = true;
567 if (regs.screen_y_control.triangle_rast_flip != 0 && 567 if (regs.screen_y_control.triangle_rast_flip != 0) {
568 regs.viewport_transform[0].scale_y < 0.0f) {
569 flip_faces = !flip_faces; 568 flip_faces = !flip_faces;
570 } 569 }
571 if (regs.viewport_transform[0].scale_z < 0.0f) { 570 if (regs.viewport_transform[0].scale_y < 0.0f) {
572 flip_faces = !flip_faces; 571 flip_faces = !flip_faces;
573 } 572 }
574 if (flip_faces) { 573 if (flip_faces) {
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index bd50f7a68..d6e8b5ead 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -53,6 +53,8 @@ void ConfigureDebug::SetConfiguration() {
53 ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); 53 ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue());
54 ui->dump_shaders->setEnabled(runtime_lock); 54 ui->dump_shaders->setEnabled(runtime_lock);
55 ui->dump_shaders->setChecked(Settings::values.dump_shaders.GetValue()); 55 ui->dump_shaders->setChecked(Settings::values.dump_shaders.GetValue());
56 ui->dump_macros->setEnabled(runtime_lock);
57 ui->dump_macros->setChecked(Settings::values.dump_macros.GetValue());
56 ui->disable_macro_jit->setEnabled(runtime_lock); 58 ui->disable_macro_jit->setEnabled(runtime_lock);
57 ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); 59 ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue());
58 ui->disable_loop_safety_checks->setEnabled(runtime_lock); 60 ui->disable_loop_safety_checks->setEnabled(runtime_lock);
@@ -83,6 +85,7 @@ void ConfigureDebug::ApplyConfiguration() {
83 Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); 85 Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
84 Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); 86 Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
85 Settings::values.dump_shaders = ui->dump_shaders->isChecked(); 87 Settings::values.dump_shaders = ui->dump_shaders->isChecked();
88 Settings::values.dump_macros = ui->dump_macros->isChecked();
86 Settings::values.disable_shader_loop_safety_checks = 89 Settings::values.disable_shader_loop_safety_checks =
87 ui->disable_loop_safety_checks->isChecked(); 90 ui->disable_loop_safety_checks->isChecked();
88 Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); 91 Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index c1d90d588..863a3fd57 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -118,6 +118,19 @@
118 </property> 118 </property>
119 </widget> 119 </widget>
120 </item> 120 </item>
121 <item row="0" column="2">
122 <widget class="QCheckBox" name="dump_macros">
123 <property name="enabled">
124 <bool>true</bool>
125 </property>
126 <property name="toolTip">
127 <string>When checked, it will dump all the macro programs of the GPU</string>
128 </property>
129 <property name="text">
130 <string>Dump Maxwell Macros</string>
131 </property>
132 </widget>
133 </item>
121 <item row="0" column="1"> 134 <item row="0" column="1">
122 <widget class="QCheckBox" name="disable_macro_jit"> 135 <widget class="QCheckBox" name="disable_macro_jit">
123 <property name="enabled"> 136 <property name="enabled">