diff options
| -rw-r--r-- | src/shader_recompiler/environment.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/shader_cache.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/shader_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader_environment.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/shader_environment.h | 6 |
7 files changed, 42 insertions, 26 deletions
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 26e8307c1..15285ab0a 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h | |||
| @@ -39,7 +39,7 @@ public: | |||
| 39 | [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank, | 39 | [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank, |
| 40 | u32 offset) = 0; | 40 | u32 offset) = 0; |
| 41 | 41 | ||
| 42 | virtual void Dump(u64 hash) = 0; | 42 | virtual void Dump(u64 pipeline_hash, u64 shader_hash) = 0; |
| 43 | 43 | ||
| 44 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { | 44 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { |
| 45 | return sph; | 45 | return sph; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 618cb6354..2888e0238 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -445,7 +445,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 445 | ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, | 445 | ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, |
| 446 | std::span<Shader::Environment* const> envs, bool use_shader_workers, | 446 | std::span<Shader::Environment* const> envs, bool use_shader_workers, |
| 447 | bool force_context_flush) try { | 447 | bool force_context_flush) try { |
| 448 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | 448 | auto hash = key.Hash(); |
| 449 | LOG_INFO(Render_OpenGL, "0x{:016x}", hash); | ||
| 449 | size_t env_index{}; | 450 | size_t env_index{}; |
| 450 | u32 total_storage_buffers{}; | 451 | u32 total_storage_buffers{}; |
| 451 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | 452 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| @@ -474,7 +475,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 474 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | 475 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); |
| 475 | 476 | ||
| 476 | if (Settings::values.dump_shaders) { | 477 | if (Settings::values.dump_shaders) { |
| 477 | env.Dump(key.unique_hashes[index]); | 478 | env.Dump(hash, key.unique_hashes[index]); |
| 478 | } | 479 | } |
| 479 | 480 | ||
| 480 | if (!uses_vertex_a || index != 1) { | 481 | if (!uses_vertex_a || index != 1) { |
| @@ -566,12 +567,13 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( | |||
| 566 | std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( | 567 | std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( |
| 567 | ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env, | 568 | ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env, |
| 568 | bool force_context_flush) try { | 569 | bool force_context_flush) try { |
| 569 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | 570 | auto hash = key.Hash(); |
| 571 | LOG_INFO(Render_OpenGL, "0x{:016x}", hash); | ||
| 570 | 572 | ||
| 571 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 573 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 572 | 574 | ||
| 573 | if (Settings::values.dump_shaders) { | 575 | if (Settings::values.dump_shaders) { |
| 574 | env.Dump(key.Hash()); | 576 | env.Dump(hash, key.unique_hash); |
| 575 | } | 577 | } |
| 576 | 578 | ||
| 577 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | 579 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4f84d8497..c1314ca99 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -584,7 +584,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 584 | ShaderPools& pools, const GraphicsPipelineCacheKey& key, | 584 | ShaderPools& pools, const GraphicsPipelineCacheKey& key, |
| 585 | std::span<Shader::Environment* const> envs, PipelineStatistics* statistics, | 585 | std::span<Shader::Environment* const> envs, PipelineStatistics* statistics, |
| 586 | bool build_in_parallel) try { | 586 | bool build_in_parallel) try { |
| 587 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | 587 | auto hash = key.Hash(); |
| 588 | LOG_INFO(Render_Vulkan, "0x{:016x}", hash); | ||
| 588 | size_t env_index{0}; | 589 | size_t env_index{0}; |
| 589 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | 590 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| 590 | const bool uses_vertex_a{key.unique_hashes[0] != 0}; | 591 | const bool uses_vertex_a{key.unique_hashes[0] != 0}; |
| @@ -611,7 +612,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 611 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | 612 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; |
| 612 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | 613 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); |
| 613 | if (Settings::values.dump_shaders) { | 614 | if (Settings::values.dump_shaders) { |
| 614 | env.Dump(key.unique_hashes[index]); | 615 | env.Dump(hash, key.unique_hashes[index]); |
| 615 | } | 616 | } |
| 616 | if (!uses_vertex_a || index != 1) { | 617 | if (!uses_vertex_a || index != 1) { |
| 617 | // Normal path | 618 | // Normal path |
| @@ -712,18 +713,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 712 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | 713 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 713 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, | 714 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, |
| 714 | PipelineStatistics* statistics, bool build_in_parallel) try { | 715 | PipelineStatistics* statistics, bool build_in_parallel) try { |
| 716 | auto hash = key.Hash(); | ||
| 715 | if (device.HasBrokenCompute()) { | 717 | if (device.HasBrokenCompute()) { |
| 716 | LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); | 718 | LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", hash); |
| 717 | return nullptr; | 719 | return nullptr; |
| 718 | } | 720 | } |
| 719 | 721 | ||
| 720 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | 722 | LOG_INFO(Render_Vulkan, "0x{:016x}", hash); |
| 721 | 723 | ||
| 722 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 724 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 723 | 725 | ||
| 724 | // Dump it before error. | 726 | // Dump it before error. |
| 725 | if (Settings::values.dump_shaders) { | 727 | if (Settings::values.dump_shaders) { |
| 726 | env.Dump(key.Hash()); | 728 | env.Dump(hash, key.unique_hash); |
| 727 | } | 729 | } |
| 728 | 730 | ||
| 729 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | 731 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 01701201d..e81cd031b 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -51,6 +51,11 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | |||
| 51 | } | 51 | } |
| 52 | const auto& shader_config{maxwell3d->regs.pipelines[index]}; | 52 | const auto& shader_config{maxwell3d->regs.pipelines[index]}; |
| 53 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)}; | 53 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)}; |
| 54 | if (program == Tegra::Engines::Maxwell3D::Regs::ShaderType::Pixel && | ||
| 55 | !maxwell3d->regs.rasterize_enable) { | ||
| 56 | unique_hashes[index] = 0; | ||
| 57 | continue; | ||
| 58 | } | ||
| 54 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; | 59 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; |
| 55 | const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)}; | 60 | const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)}; |
| 56 | if (!cpu_shader_addr) { | 61 | if (!cpu_shader_addr) { |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index de8e08002..a76896620 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -70,7 +70,7 @@ public: | |||
| 70 | protected: | 70 | protected: |
| 71 | struct GraphicsEnvironments { | 71 | struct GraphicsEnvironments { |
| 72 | std::array<GraphicsEnvironment, NUM_PROGRAMS> envs; | 72 | std::array<GraphicsEnvironment, NUM_PROGRAMS> envs; |
| 73 | std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs; | 73 | std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs{}; |
| 74 | 74 | ||
| 75 | std::span<Shader::Environment* const> Span() const noexcept { | 75 | std::span<Shader::Environment* const> Span() const noexcept { |
| 76 | return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); | 76 | return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); |
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index c7cb56243..4edbe5700 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp | |||
| @@ -102,7 +102,8 @@ static std::string_view StageToPrefix(Shader::Stage stage) { | |||
| 102 | } | 102 | } |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest, | 105 | static void DumpImpl(u64 pipeline_hash, u64 shader_hash, std::span<const u64> code, |
| 106 | [[maybe_unused]] u32 read_highest, [[maybe_unused]] u32 read_lowest, | ||
| 106 | u32 initial_offset, Shader::Stage stage) { | 107 | u32 initial_offset, Shader::Stage stage) { |
| 107 | const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; | 108 | const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; |
| 108 | const auto base_dir{shader_dir / "shaders"}; | 109 | const auto base_dir{shader_dir / "shaders"}; |
| @@ -111,13 +112,18 @@ static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowes | |||
| 111 | return; | 112 | return; |
| 112 | } | 113 | } |
| 113 | const auto prefix = StageToPrefix(stage); | 114 | const auto prefix = StageToPrefix(stage); |
| 114 | const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)}; | 115 | const auto name{base_dir / |
| 115 | const size_t real_size = read_highest - read_lowest + initial_offset; | 116 | fmt::format("{:016x}_{}_{:016x}.ash", pipeline_hash, prefix, shader_hash)}; |
| 116 | const size_t padding_needed = ((32 - (real_size % 32)) % 32); | ||
| 117 | std::fstream shader_file(name, std::ios::out | std::ios::binary); | 117 | std::fstream shader_file(name, std::ios::out | std::ios::binary); |
| 118 | ASSERT(initial_offset % sizeof(u64) == 0); | ||
| 118 | const size_t jump_index = initial_offset / sizeof(u64); | 119 | const size_t jump_index = initial_offset / sizeof(u64); |
| 119 | shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size); | 120 | const size_t code_size = code.size_bytes() - initial_offset; |
| 120 | for (size_t i = 0; i < padding_needed; i++) { | 121 | shader_file.write(reinterpret_cast<const char*>(&code[jump_index]), code_size); |
| 122 | |||
| 123 | // + 1 instruction, due to the fact that we skip the final self branch instruction in the code, | ||
| 124 | // but we need to consider it for padding, otherwise nvdisasm rages. | ||
| 125 | const size_t padding_needed = (32 - ((code_size + INST_SIZE) % 32)) % 32; | ||
| 126 | for (size_t i = 0; i < INST_SIZE + padding_needed; i++) { | ||
| 121 | shader_file.put(0); | 127 | shader_file.put(0); |
| 122 | } | 128 | } |
| 123 | } | 129 | } |
| @@ -197,8 +203,8 @@ u64 GenericEnvironment::CalculateHash() const { | |||
| 197 | return Common::CityHash64(data.get(), size); | 203 | return Common::CityHash64(data.get(), size); |
| 198 | } | 204 | } |
| 199 | 205 | ||
| 200 | void GenericEnvironment::Dump(u64 hash) { | 206 | void GenericEnvironment::Dump(u64 pipeline_hash, u64 shader_hash) { |
| 201 | DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage); | 207 | DumpImpl(pipeline_hash, shader_hash, code, read_highest, read_lowest, initial_offset, stage); |
| 202 | } | 208 | } |
| 203 | 209 | ||
| 204 | void GenericEnvironment::Serialize(std::ofstream& file) const { | 210 | void GenericEnvironment::Serialize(std::ofstream& file) const { |
| @@ -282,6 +288,7 @@ std::optional<u64> GenericEnvironment::TryFindSize() { | |||
| 282 | Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, | 288 | Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, |
| 283 | bool via_header_index, u32 raw) { | 289 | bool via_header_index, u32 raw) { |
| 284 | const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; | 290 | const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; |
| 291 | ASSERT(handle.first <= tic_limit); | ||
| 285 | const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; | 292 | const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; |
| 286 | Tegra::Texture::TICEntry entry; | 293 | Tegra::Texture::TICEntry entry; |
| 287 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | 294 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); |
| @@ -465,8 +472,8 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | |||
| 465 | .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) | 472 | .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) |
| 466 | .read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state)) | 473 | .read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state)) |
| 467 | .read(reinterpret_cast<char*>(&stage), sizeof(stage)); | 474 | .read(reinterpret_cast<char*>(&stage), sizeof(stage)); |
| 468 | code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); | 475 | code.resize(Common::DivCeil(code_size, sizeof(u64))); |
| 469 | file.read(reinterpret_cast<char*>(code.get()), code_size); | 476 | file.read(reinterpret_cast<char*>(code.data()), code_size); |
| 470 | for (size_t i = 0; i < num_texture_types; ++i) { | 477 | for (size_t i = 0; i < num_texture_types; ++i) { |
| 471 | u32 key; | 478 | u32 key; |
| 472 | Shader::TextureType type; | 479 | Shader::TextureType type; |
| @@ -509,8 +516,8 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | |||
| 509 | is_propietary_driver = texture_bound == 2; | 516 | is_propietary_driver = texture_bound == 2; |
| 510 | } | 517 | } |
| 511 | 518 | ||
| 512 | void FileEnvironment::Dump(u64 hash) { | 519 | void FileEnvironment::Dump(u64 pipeline_hash, u64 shader_hash) { |
| 513 | DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage); | 520 | DumpImpl(pipeline_hash, shader_hash, code, read_highest, read_lowest, initial_offset, stage); |
| 514 | } | 521 | } |
| 515 | 522 | ||
| 516 | u64 FileEnvironment::ReadInstruction(u32 address) { | 523 | u64 FileEnvironment::ReadInstruction(u32 address) { |
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index a0f61cbda..b90f3d44e 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h | |||
| @@ -58,7 +58,7 @@ public: | |||
| 58 | 58 | ||
| 59 | [[nodiscard]] u64 CalculateHash() const; | 59 | [[nodiscard]] u64 CalculateHash() const; |
| 60 | 60 | ||
| 61 | void Dump(u64 hash) override; | 61 | void Dump(u64 pipeline_hash, u64 shader_hash) override; |
| 62 | 62 | ||
| 63 | void Serialize(std::ofstream& file) const; | 63 | void Serialize(std::ofstream& file) const; |
| 64 | 64 | ||
| @@ -188,10 +188,10 @@ public: | |||
| 188 | return cbuf_replacements.size() != 0; | 188 | return cbuf_replacements.size() != 0; |
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | void Dump(u64 hash) override; | 191 | void Dump(u64 pipeline_hash, u64 shader_hash) override; |
| 192 | 192 | ||
| 193 | private: | 193 | private: |
| 194 | std::unique_ptr<u64[]> code; | 194 | std::vector<u64> code; |
| 195 | std::unordered_map<u32, Shader::TextureType> texture_types; | 195 | std::unordered_map<u32, Shader::TextureType> texture_types; |
| 196 | std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | 196 | std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; |
| 197 | std::unordered_map<u64, u32> cbuf_values; | 197 | std::unordered_map<u64, u32> cbuf_values; |