diff options
| -rw-r--r-- | CMakeModules/GenerateSCMRev.cmake | 4 | ||||
| -rw-r--r-- | src/common/CMakeLists.txt | 5 | ||||
| -rw-r--r-- | src/core/hle/kernel/process.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_sampler_cache.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 2 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_hotkeys.cpp | 1 |
15 files changed, 104 insertions, 77 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 21e03ae98..fa7ae835f 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -5,6 +5,10 @@ function(get_timestamp _var) | |||
| 5 | endfunction() | 5 | endfunction() |
| 6 | 6 | ||
| 7 | list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules") | 7 | list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules") |
| 8 | |||
| 9 | # Find the package here with the known path so that the GetGit commands can find it as well | ||
| 10 | find_package(Git QUIET PATHS "${GIT_EXECUTABLE}") | ||
| 11 | |||
| 8 | # generate git/build information | 12 | # generate git/build information |
| 9 | include(GetGitRevisionDescription) | 13 | include(GetGitRevisionDescription) |
| 10 | get_git_head_revision(GIT_REF_SPEC GIT_REV) | 14 | get_git_head_revision(GIT_REF_SPEC GIT_REV) |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9b0c3db68..9afc6105d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -15,6 +15,10 @@ endif () | |||
| 15 | if (DEFINED ENV{DISPLAYVERSION}) | 15 | if (DEFINED ENV{DISPLAYVERSION}) |
| 16 | set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) | 16 | set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) |
| 17 | endif () | 17 | endif () |
| 18 | |||
| 19 | # Pass the path to git to the GenerateSCMRev.cmake as well | ||
| 20 | find_package(Git QUIET) | ||
| 21 | |||
| 18 | add_custom_command(OUTPUT scm_rev.cpp | 22 | add_custom_command(OUTPUT scm_rev.cpp |
| 19 | COMMAND ${CMAKE_COMMAND} | 23 | COMMAND ${CMAKE_COMMAND} |
| 20 | -DSRC_DIR="${CMAKE_SOURCE_DIR}" | 24 | -DSRC_DIR="${CMAKE_SOURCE_DIR}" |
| @@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 23 | -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" | 27 | -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" |
| 24 | -DBUILD_TAG="${BUILD_TAG}" | 28 | -DBUILD_TAG="${BUILD_TAG}" |
| 25 | -DBUILD_ID="${DISPLAY_VERSION}" | 29 | -DBUILD_ID="${DISPLAY_VERSION}" |
| 30 | -DGIT_EXECUTABLE="${GIT_EXECUTABLE}" | ||
| 26 | -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" | 31 | -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" |
| 27 | DEPENDS | 32 | DEPENDS |
| 28 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 12ea4ebe3..b9035a0be 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) { | |||
| 317 | } | 317 | } |
| 318 | 318 | ||
| 319 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { | 319 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { |
| 320 | code_memory_size += module_.memory.size(); | ||
| 321 | |||
| 320 | const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); | 322 | const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); |
| 321 | 323 | ||
| 322 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, | 324 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, |
| @@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { | |||
| 332 | MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); | 334 | MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); |
| 333 | MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); | 335 | MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); |
| 334 | MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); | 336 | MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); |
| 335 | |||
| 336 | code_memory_size += module_.memory.size(); | ||
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | Process::Process(Core::System& system) | 339 | Process::Process(Core::System& system) |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a35e7a195..16f95b77d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1271,8 +1271,6 @@ public: | |||
| 1271 | 1271 | ||
| 1272 | } dirty{}; | 1272 | } dirty{}; |
| 1273 | 1273 | ||
| 1274 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1275 | |||
| 1276 | /// Reads a register value located at the input method address | 1274 | /// Reads a register value located at the input method address |
| 1277 | u32 GetRegisterValue(u32 method) const; | 1275 | u32 GetRegisterValue(u32 method) const; |
| 1278 | 1276 | ||
| @@ -1367,6 +1365,8 @@ private: | |||
| 1367 | 1365 | ||
| 1368 | bool execute_on{true}; | 1366 | bool execute_on{true}; |
| 1369 | 1367 | ||
| 1368 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1369 | |||
| 1370 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1370 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1371 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1371 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1372 | 1372 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index de742d11c..a4acb3796 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR; | |||
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| 36 | 36 | ||
| 37 | // One UBO is always reserved for emulation values on staged shaders | ||
| 38 | constexpr u32 STAGE_RESERVED_UBOS = 1; | ||
| 39 | |||
| 40 | constexpr u32 STAGE_MAIN_OFFSET = 10; | 37 | constexpr u32 STAGE_MAIN_OFFSET = 10; |
| 41 | constexpr u32 KERNEL_MAIN_OFFSET = 0; | 38 | constexpr u32 KERNEL_MAIN_OFFSET = 0; |
| 42 | 39 | ||
| @@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 243 | if (!code_b.empty()) { | 240 | if (!code_b.empty()) { |
| 244 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); | 241 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); |
| 245 | } | 242 | } |
| 246 | const auto entries = GLShader::GetEntries(ir); | ||
| 247 | 243 | ||
| 248 | std::string source = fmt::format(R"(// {} | 244 | std::string source = fmt::format(R"(// {} |
| 249 | #version 430 core | 245 | #version 430 core |
| @@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 314 | 310 | ||
| 315 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, | 311 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, |
| 316 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) | 312 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) |
| 317 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache}, | 313 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, |
| 318 | device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, | 314 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, |
| 319 | shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} { | 315 | unique_identifier{params.unique_identifier}, shader_type{shader_type}, |
| 316 | entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { | ||
| 320 | if (!params.precompiled_variants) { | 317 | if (!params.precompiled_variants) { |
| 321 | return; | 318 | return; |
| 322 | } | 319 | } |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 9770dda1c..ac99e6385 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -6,16 +6,20 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | 7 | ||
| 8 | #include <fmt/format.h> | 8 | #include <fmt/format.h> |
| 9 | |||
| 10 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 11 | 10 | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "common/scope_exit.h" | ||
| 15 | #include "video_core/renderer_opengl/utils.h" | 12 | #include "video_core/renderer_opengl/utils.h" |
| 16 | 13 | ||
| 17 | namespace OpenGL { | 14 | namespace OpenGL { |
| 18 | 15 | ||
| 16 | struct VertexArrayPushBuffer::Entry { | ||
| 17 | GLuint binding_index{}; | ||
| 18 | const GLuint* buffer{}; | ||
| 19 | GLintptr offset{}; | ||
| 20 | GLsizei stride{}; | ||
| 21 | }; | ||
| 22 | |||
| 19 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | 23 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; |
| 20 | 24 | ||
| 21 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | 25 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; |
| @@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() { | |||
| 47 | } | 51 | } |
| 48 | } | 52 | } |
| 49 | 53 | ||
| 54 | struct BindBuffersRangePushBuffer::Entry { | ||
| 55 | GLuint binding; | ||
| 56 | const GLuint* buffer; | ||
| 57 | GLintptr offset; | ||
| 58 | GLsizeiptr size; | ||
| 59 | }; | ||
| 60 | |||
| 50 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 61 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 51 | 62 | ||
| 52 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 63 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index d56153fe7..3ad7c02d4 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -26,12 +26,7 @@ public: | |||
| 26 | void Bind(); | 26 | void Bind(); |
| 27 | 27 | ||
| 28 | private: | 28 | private: |
| 29 | struct Entry { | 29 | struct Entry; |
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | 30 | ||
| 36 | GLuint vao{}; | 31 | GLuint vao{}; |
| 37 | const GLuint* index_buffer{}; | 32 | const GLuint* index_buffer{}; |
| @@ -50,12 +45,7 @@ public: | |||
| 50 | void Bind(); | 45 | void Bind(); |
| 51 | 46 | ||
| 52 | private: | 47 | private: |
| 53 | struct Entry { | 48 | struct Entry; |
| 54 | GLuint binding; | ||
| 55 | const GLuint* buffer; | ||
| 56 | GLintptr offset; | ||
| 57 | GLsizeiptr size; | ||
| 58 | }; | ||
| 59 | 49 | ||
| 60 | GLenum target; | 50 | GLenum target; |
| 61 | std::vector<Entry> entries; | 51 | std::vector<Entry> entries; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 000e3616d..331808113 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt | |||
| 44 | return {}; | 44 | return {}; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, | 47 | vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, |
| 48 | Tegra::Texture::TextureFilter filter) { | 48 | Tegra::Texture::TextureFilter filter) { |
| 49 | switch (wrap_mode) { | 49 | switch (wrap_mode) { |
| 50 | case Tegra::Texture::WrapMode::Wrap: | 50 | case Tegra::Texture::WrapMode::Wrap: |
| @@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, | |||
| 56 | case Tegra::Texture::WrapMode::Border: | 56 | case Tegra::Texture::WrapMode::Border: |
| 57 | return vk::SamplerAddressMode::eClampToBorder; | 57 | return vk::SamplerAddressMode::eClampToBorder; |
| 58 | case Tegra::Texture::WrapMode::Clamp: | 58 | case Tegra::Texture::WrapMode::Clamp: |
| 59 | // TODO(Rodrigo): Emulate GL_CLAMP properly | 59 | if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { |
| 60 | // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this | ||
| 61 | // by sending an invalid enumeration. | ||
| 62 | return static_cast<vk::SamplerAddressMode>(0xcafe); | ||
| 63 | } | ||
| 64 | // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors | ||
| 60 | switch (filter) { | 65 | switch (filter) { |
| 61 | case Tegra::Texture::TextureFilter::Nearest: | 66 | case Tegra::Texture::TextureFilter::Nearest: |
| 62 | return vk::SamplerAddressMode::eClampToEdge; | 67 | return vk::SamplerAddressMode::eClampToEdge; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 1534b738b..7e9678b7b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter); | |||
| 22 | 22 | ||
| 23 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); | 23 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); |
| 24 | 24 | ||
| 25 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, | 25 | vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, |
| 26 | Tegra::Texture::TextureFilter filter); | 26 | Tegra::Texture::TextureFilter filter); |
| 27 | 27 | ||
| 28 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); | 28 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); |
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 1ce583f75..0a8ec8398 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp | |||
| @@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) | |||
| 46 | {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), | 46 | {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), |
| 47 | MaxwellToVK::Sampler::Filter(tsc.min_filter), | 47 | MaxwellToVK::Sampler::Filter(tsc.min_filter), |
| 48 | MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), | 48 | MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), |
| 49 | MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter), | 49 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), |
| 50 | MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter), | 50 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), |
| 51 | MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), | 51 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), |
| 52 | has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, | 52 | has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, |
| 53 | MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), | 53 | MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), |
| 54 | tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), | 54 | tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index b427ac873..0229733b6 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -65,7 +65,7 @@ struct BlockInfo { | |||
| 65 | 65 | ||
| 66 | struct CFGRebuildState { | 66 | struct CFGRebuildState { |
| 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) | 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) |
| 68 | : program_code{program_code}, start{start}, locker{locker} {} | 68 | : program_code{program_code}, locker{locker}, start{start} {} |
| 69 | 69 | ||
| 70 | const ProgramCode& program_code; | 70 | const ProgramCode& program_code; |
| 71 | ConstBufferLocker& locker; | 71 | ConstBufferLocker& locker; |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index c934d0719..8cc84e935 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include <fmt/format.h> | 7 | #include <fmt/format.h> |
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | ||
| 9 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| @@ -22,34 +23,39 @@ using Tegra::Shader::Register; | |||
| 22 | 23 | ||
| 23 | namespace { | 24 | namespace { |
| 24 | 25 | ||
| 25 | u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { | 26 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { |
| 27 | return uniform_type == Tegra::Shader::UniformType::UnsignedByte || | ||
| 28 | uniform_type == Tegra::Shader::UniformType::UnsignedShort; | ||
| 29 | } | ||
| 30 | |||
| 31 | u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { | ||
| 26 | switch (uniform_type) { | 32 | switch (uniform_type) { |
| 27 | case Tegra::Shader::UniformType::UnsignedByte: | 33 | case Tegra::Shader::UniformType::UnsignedByte: |
| 28 | case Tegra::Shader::UniformType::Single: | 34 | return 0b11; |
| 29 | return 1; | 35 | case Tegra::Shader::UniformType::UnsignedShort: |
| 30 | case Tegra::Shader::UniformType::Double: | 36 | return 0b10; |
| 31 | return 2; | ||
| 32 | case Tegra::Shader::UniformType::Quad: | ||
| 33 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 34 | return 4; | ||
| 35 | default: | 37 | default: |
| 36 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | 38 | UNREACHABLE(); |
| 37 | return 1; | 39 | return 0; |
| 38 | } | 40 | } |
| 39 | } | 41 | } |
| 40 | 42 | ||
| 41 | u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { | 43 | u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { |
| 42 | switch (uniform_type) { | 44 | switch (uniform_type) { |
| 45 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 46 | return 8; | ||
| 47 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 48 | return 16; | ||
| 43 | case Tegra::Shader::UniformType::Single: | 49 | case Tegra::Shader::UniformType::Single: |
| 44 | return 1; | 50 | return 32; |
| 45 | case Tegra::Shader::UniformType::Double: | 51 | case Tegra::Shader::UniformType::Double: |
| 46 | return 2; | 52 | return 64; |
| 47 | case Tegra::Shader::UniformType::Quad: | 53 | case Tegra::Shader::UniformType::Quad: |
| 48 | case Tegra::Shader::UniformType::UnsignedQuad: | 54 | case Tegra::Shader::UniformType::UnsignedQuad: |
| 49 | return 4; | 55 | return 128; |
| 50 | default: | 56 | default: |
| 51 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | 57 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); |
| 52 | return 1; | 58 | return 32; |
| 53 | } | 59 | } |
| 54 | } | 60 | } |
| 55 | 61 | ||
| @@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 184 | }(); | 190 | }(); |
| 185 | 191 | ||
| 186 | const auto [real_address_base, base_address, descriptor] = | 192 | const auto [real_address_base, base_address, descriptor] = |
| 187 | TrackGlobalMemory(bb, instr, false); | 193 | TrackGlobalMemory(bb, instr, true, false); |
| 188 | 194 | ||
| 189 | const u32 count = GetLdgMemorySize(type); | 195 | const u32 size = GetMemorySize(type); |
| 196 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 190 | if (!real_address_base || !base_address) { | 197 | if (!real_address_base || !base_address) { |
| 191 | // Tracking failed, load zeroes. | 198 | // Tracking failed, load zeroes. |
| 192 | for (u32 i = 0; i < count; ++i) { | 199 | for (u32 i = 0; i < count; ++i) { |
| @@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 200 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | 207 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 201 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 208 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 202 | 209 | ||
| 203 | if (type == Tegra::Shader::UniformType::UnsignedByte) { | 210 | // To handle unaligned loads get the bytes used to dereference global memory and extract |
| 204 | // To handle unaligned loads get the byte used to dereferenced global memory | 211 | // those bytes from the loaded u32. |
| 205 | // and extract that byte from the loaded uint32. | 212 | if (IsUnaligned(type)) { |
| 206 | Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); | 213 | Node mask = Immediate(GetUnalignedMask(type)); |
| 207 | byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); | 214 | Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); |
| 215 | offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); | ||
| 208 | 216 | ||
| 209 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), | 217 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), |
| 210 | Immediate(8)); | 218 | std::move(offset), Immediate(size)); |
| 211 | } | 219 | } |
| 212 | 220 | ||
| 213 | SetTemporary(bb, i, gmem); | 221 | SetTemporary(bb, i, gmem); |
| @@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 295 | } | 303 | } |
| 296 | }(); | 304 | }(); |
| 297 | 305 | ||
| 306 | // For unaligned reads we have to read memory too. | ||
| 307 | const bool is_read = IsUnaligned(type); | ||
| 298 | const auto [real_address_base, base_address, descriptor] = | 308 | const auto [real_address_base, base_address, descriptor] = |
| 299 | TrackGlobalMemory(bb, instr, true); | 309 | TrackGlobalMemory(bb, instr, is_read, true); |
| 300 | if (!real_address_base || !base_address) { | 310 | if (!real_address_base || !base_address) { |
| 301 | // Tracking failed, skip the store. | 311 | // Tracking failed, skip the store. |
| 302 | break; | 312 | break; |
| 303 | } | 313 | } |
| 304 | 314 | ||
| 305 | const u32 count = GetStgMemorySize(type); | 315 | const u32 size = GetMemorySize(type); |
| 316 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 306 | for (u32 i = 0; i < count; ++i) { | 317 | for (u32 i = 0; i < count; ++i) { |
| 307 | const Node it_offset = Immediate(i * 4); | 318 | const Node it_offset = Immediate(i * 4); |
| 308 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | 319 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 309 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 320 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 310 | const Node value = GetRegister(instr.gpr0.Value() + i); | 321 | Node value = GetRegister(instr.gpr0.Value() + i); |
| 322 | |||
| 323 | if (IsUnaligned(type)) { | ||
| 324 | Node mask = Immediate(GetUnalignedMask(type)); | ||
| 325 | Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); | ||
| 326 | offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); | ||
| 327 | |||
| 328 | value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, | ||
| 329 | Immediate(size)); | ||
| 330 | } | ||
| 331 | |||
| 311 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | 332 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); |
| 312 | } | 333 | } |
| 313 | break; | 334 | break; |
| @@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 336 | 357 | ||
| 337 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, | 358 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, |
| 338 | Instruction instr, | 359 | Instruction instr, |
| 339 | bool is_write) { | 360 | bool is_read, bool is_write) { |
| 340 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 361 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 341 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 362 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 342 | 363 | ||
| @@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& | |||
| 351 | const GlobalMemoryBase descriptor{index, offset}; | 372 | const GlobalMemoryBase descriptor{index, offset}; |
| 352 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 373 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); |
| 353 | auto& usage = entry->second; | 374 | auto& usage = entry->second; |
| 354 | if (is_write) { | 375 | usage.is_written |= is_write; |
| 355 | usage.is_written = true; | 376 | usage.is_read |= is_read; |
| 356 | } else { | ||
| 357 | usage.is_read = true; | ||
| 358 | } | ||
| 359 | 377 | ||
| 360 | const auto real_address = | 378 | const auto real_address = |
| 361 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); | 379 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4b14cdf58..cd984f763 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | |||
| 794 | 794 | ||
| 795 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | 795 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, |
| 796 | bool is_tld4) { | 796 | bool is_tld4) { |
| 797 | const auto [coord_offsets, size, wrap_value, | 797 | const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; |
| 798 | diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { | 798 | const u32 size = is_tld4 ? 6 : 4; |
| 799 | if (is_tld4) { | 799 | const s32 wrap_value = is_tld4 ? 32 : 8; |
| 800 | return {{0, 8, 16}, 6, 32, 64}; | 800 | const s32 diff_value = is_tld4 ? 64 : 16; |
| 801 | } else { | ||
| 802 | return {{0, 4, 8}, 4, 8, 16}; | ||
| 803 | } | ||
| 804 | }(); | ||
| 805 | const u32 mask = (1U << size) - 1; | 801 | const u32 mask = (1U << size) - 1; |
| 806 | 802 | ||
| 807 | std::vector<Node> aoffi; | 803 | std::vector<Node> aoffi; |
| @@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor | |||
| 814 | LOG_WARNING(HW_GPU, | 810 | LOG_WARNING(HW_GPU, |
| 815 | "AOFFI constant folding failed, some hardware might have graphical issues"); | 811 | "AOFFI constant folding failed, some hardware might have graphical issues"); |
| 816 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | 812 | for (std::size_t coord = 0; coord < coord_count; ++coord) { |
| 817 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); | 813 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); |
| 818 | const Node condition = | 814 | const Node condition = |
| 819 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | 815 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); |
| 820 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | 816 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); |
| @@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor | |||
| 824 | } | 820 | } |
| 825 | 821 | ||
| 826 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | 822 | for (std::size_t coord = 0; coord < coord_count; ++coord) { |
| 827 | s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; | 823 | s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; |
| 828 | if (value >= wrap_value) { | 824 | if (value >= wrap_value) { |
| 829 | value -= diff_value; | 825 | value -= diff_value; |
| 830 | } | 826 | } |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index aacd0a0da..ba1db4c11 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -394,7 +394,7 @@ private: | |||
| 394 | 394 | ||
| 395 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, | 395 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, |
| 396 | Tegra::Shader::Instruction instr, | 396 | Tegra::Shader::Instruction instr, |
| 397 | bool is_write); | 397 | bool is_read, bool is_write); |
| 398 | 398 | ||
| 399 | /// Register new amending code and obtain the reference id. | 399 | /// Register new amending code and obtain the reference id. |
| 400 | std::size_t DeclareAmend(Node new_amend); | 400 | std::size_t DeclareAmend(Node new_amend); |
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp index 3ea0b8d67..fa9052136 100644 --- a/src/yuzu/configuration/configure_hotkeys.cpp +++ b/src/yuzu/configuration/configure_hotkeys.cpp | |||
| @@ -48,6 +48,7 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) { | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | ui->hotkey_list->expandAll(); | 50 | ui->hotkey_list->expandAll(); |
| 51 | ui->hotkey_list->resizeColumnToContents(0); | ||
| 51 | } | 52 | } |
| 52 | 53 | ||
| 53 | void ConfigureHotkeys::changeEvent(QEvent* event) { | 54 | void ConfigureHotkeys::changeEvent(QEvent* event) { |