summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp352
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h109
2 files changed, 460 insertions, 1 deletions
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 9bc027cbf..48e23d4cd 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -2,16 +2,368 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cstddef> 6#include <cstddef>
7#include <memory>
6#include <vector> 8#include <vector>
7 9
10#include "common/microprofile.h"
11#include "core/core.h"
12#include "core/memory.h"
13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h"
8#include "video_core/renderer_vulkan/declarations.h" 16#include "video_core/renderer_vulkan/declarations.h"
17#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
18#include "video_core/renderer_vulkan/maxwell_to_vk.h"
19#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
20#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_device.h" 21#include "video_core/renderer_vulkan/vk_device.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
10#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 23#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h"
11#include "video_core/renderer_vulkan/vk_update_descriptor.h" 28#include "video_core/renderer_vulkan/vk_update_descriptor.h"
29#include "video_core/shader/compiler_settings.h"
12 30
13namespace Vulkan { 31namespace Vulkan {
14 32
33MICROPROFILE_DECLARE(Vulkan_PipelineCache);
34
35using Tegra::Engines::ShaderType;
36
37namespace {
38
39constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
40 VideoCommon::Shader::CompileDepth::FullDecompile};
41
42/// Gets the address for the specified shader stage program
43GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
44 const auto& gpu{system.GPU().Maxwell3D()};
45 const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
46 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
47}
48
49/// Gets if the current instruction offset is a scheduler instruction
50constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
51 // Sched instructions appear once every 4 instructions.
52 constexpr std::size_t SchedPeriod = 4;
53 const std::size_t absolute_offset = offset - main_offset;
54 return (absolute_offset % SchedPeriod) == 0;
55}
56
57/// Calculates the size of a program stream
58std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
59 const std::size_t start_offset = is_compute ? 0 : 10;
60 // This is the encoded version of BRA that jumps to itself. All Nvidia
61 // shaders end with one.
62 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
63 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
64 std::size_t offset = start_offset;
65 while (offset < program.size()) {
66 const u64 instruction = program[offset];
67 if (!IsSchedInstruction(offset, start_offset)) {
68 if ((instruction & mask) == self_jumping_branch) {
69 // End on Maxwell's "nop" instruction
70 break;
71 }
72 if (instruction == 0) {
73 break;
74 }
75 }
76 ++offset;
77 }
78 // The last instruction is included in the program size
79 return std::min(offset + 1, program.size());
80}
81
82/// Gets the shader program code from memory for the specified address
83ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
84 const u8* host_ptr, bool is_compute) {
85 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
86 ASSERT_OR_EXECUTE(host_ptr != nullptr, {
87 std::fill(program_code.begin(), program_code.end(), 0);
88 return program_code;
89 });
90 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
91 program_code.size() * sizeof(u64));
92 program_code.resize(CalculateProgramSize(program_code, is_compute));
93 return program_code;
94}
95
96constexpr std::size_t GetStageFromProgram(std::size_t program) {
97 return program == 0 ? 0 : program - 1;
98}
99
100constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
101 return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
102}
103
104ShaderType GetShaderType(Maxwell::ShaderProgram program) {
105 switch (program) {
106 case Maxwell::ShaderProgram::VertexB:
107 return ShaderType::Vertex;
108 case Maxwell::ShaderProgram::TesselationControl:
109 return ShaderType::TesselationControl;
110 case Maxwell::ShaderProgram::TesselationEval:
111 return ShaderType::TesselationEval;
112 case Maxwell::ShaderProgram::Geometry:
113 return ShaderType::Geometry;
114 case Maxwell::ShaderProgram::Fragment:
115 return ShaderType::Fragment;
116 default:
117 UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
118 return ShaderType::Vertex;
119 }
120}
121
122u32 FillDescriptorLayout(const ShaderEntries& entries,
123 std::vector<vk::DescriptorSetLayoutBinding>& bindings,
124 Maxwell::ShaderProgram program_type, u32 base_binding) {
125 const ShaderType stage = GetStageFromProgram(program_type);
126 const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
127
128 u32 binding = base_binding;
129 const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
130 for (std::size_t i = 0; i < num_entries; ++i) {
131 bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
132 }
133 };
134 AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
135 AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
136 AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
137 AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
138 AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
139 return binding;
140}
141
142} // Anonymous namespace
143
144CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
145 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
146 ProgramCode program_code, u32 main_offset)
147 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
148 program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
149 shader_ir{this->program_code, main_offset, compiler_settings, locker},
150 entries{GenerateShaderEntries(shader_ir)} {}
151
152CachedShader::~CachedShader() = default;
153
154Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
155 Core::System& system, Tegra::Engines::ShaderType stage) {
156 if (stage == Tegra::Engines::ShaderType::Compute) {
157 return system.GPU().KeplerCompute();
158 } else {
159 return system.GPU().Maxwell3D();
160 }
161}
162
163VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
164 const VKDevice& device, VKScheduler& scheduler,
165 VKDescriptorPool& descriptor_pool,
166 VKUpdateDescriptorQueue& update_descriptor_queue)
167 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
168 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
169 renderpass_cache(device) {}
170
171VKPipelineCache::~VKPipelineCache() = default;
172
173std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
174 const auto& gpu = system.GPU().Maxwell3D();
175 auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
176 if (!dirty) {
177 return last_shaders;
178 }
179 dirty = false;
180
181 std::array<Shader, Maxwell::MaxShaderProgram> shaders;
182 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
183 const auto& shader_config = gpu.regs.shader_config[index];
184 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
185
186 // Skip stages that are not enabled
187 if (!gpu.regs.IsShaderConfigEnabled(index)) {
188 continue;
189 }
190
191 auto& memory_manager{system.GPU().MemoryManager()};
192 const GPUVAddr program_addr{GetShaderAddress(system, program)};
193 const auto host_ptr{memory_manager.GetPointer(program_addr)};
194 auto shader = TryGet(host_ptr);
195 if (!shader) {
196 // No shader found - create a new one
197 constexpr u32 stage_offset = 10;
198 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
199 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
200
201 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
202 ASSERT(cpu_addr);
203
204 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
205 host_ptr, std::move(code), stage_offset);
206 Register(shader);
207 }
208 shaders[index] = std::move(shader);
209 }
210 return last_shaders = shaders;
211}
212
213VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
214 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
215
216 if (last_graphics_pipeline && last_graphics_key == key) {
217 return *last_graphics_pipeline;
218 }
219 last_graphics_key = key;
220
221 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
222 auto& entry = pair->second;
223 if (is_cache_miss) {
224 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
225 const auto [program, bindings] = DecompileShaders(key);
226 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
227 update_descriptor_queue, renderpass_cache, key,
228 bindings, program);
229 }
230 return *(last_graphics_pipeline = entry.get());
231}
232
233VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
234 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
235
236 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
237 auto& entry = pair->second;
238 if (!is_cache_miss) {
239 return *entry;
240 }
241 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
242
243 auto& memory_manager = system.GPU().MemoryManager();
244 const auto program_addr = key.shader;
245 const auto host_ptr = memory_manager.GetPointer(program_addr);
246
247 auto shader = TryGet(host_ptr);
248 if (!shader) {
249 // No shader found - create a new one
250 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
251 ASSERT(cpu_addr);
252
253 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
254 constexpr u32 kernel_main_offset = 0;
255 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
256 program_addr, *cpu_addr, host_ptr, std::move(code),
257 kernel_main_offset);
258 Register(shader);
259 }
260
261 Specialization specialization;
262 specialization.workgroup_size = key.workgroup_size;
263 specialization.shared_memory_size = key.shared_memory_size;
264
265 const SPIRVShader spirv_shader{
266 Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
267 shader->GetEntries()};
268 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
269 update_descriptor_queue, spirv_shader);
270 return *entry;
271}
272
273void VKPipelineCache::Unregister(const Shader& shader) {
274 bool finished = false;
275 const auto Finish = [&] {
276 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
277 // flush.
278 if (finished) {
279 return;
280 }
281 finished = true;
282 scheduler.Finish();
283 };
284
285 const GPUVAddr invalidated_addr = shader->GetGpuAddr();
286 for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
287 auto& entry = it->first;
288 if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
289 entry.shaders.end()) {
290 ++it;
291 continue;
292 }
293 Finish();
294 it = graphics_cache.erase(it);
295 }
296 for (auto it = compute_cache.begin(); it != compute_cache.end();) {
297 auto& entry = it->first;
298 if (entry.shader != invalidated_addr) {
299 ++it;
300 continue;
301 }
302 Finish();
303 it = compute_cache.erase(it);
304 }
305
306 RasterizerCache::Unregister(shader);
307}
308
309std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
310VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
311 const auto& fixed_state = key.fixed_state;
312 auto& memory_manager = system.GPU().MemoryManager();
313 const auto& gpu = system.GPU().Maxwell3D();
314
315 Specialization specialization;
316 specialization.primitive_topology = fixed_state.input_assembly.topology;
317 if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
318 ASSERT(fixed_state.input_assembly.point_size != 0.0f);
319 specialization.point_size = fixed_state.input_assembly.point_size;
320 }
321 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
322 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
323 }
324 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
325 specialization.tessellation.primitive = fixed_state.tessellation.primitive;
326 specialization.tessellation.spacing = fixed_state.tessellation.spacing;
327 specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
328 for (const auto& rt : key.renderpass_params.color_attachments) {
329 specialization.enabled_rendertargets.set(rt.index);
330 }
331
332 SPIRVProgram program;
333 std::vector<vk::DescriptorSetLayoutBinding> bindings;
334
335 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
336 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
337
338 // Skip stages that are not enabled
339 if (!gpu.regs.IsShaderConfigEnabled(index)) {
340 continue;
341 }
342
343 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
344 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
345 const auto shader = TryGet(host_ptr);
346 ASSERT(shader);
347
348 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
349 const auto program_type = GetShaderType(program_enum);
350 const auto& entries = shader->GetEntries();
351 program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
352 entries};
353
354 if (program_enum == Maxwell::ShaderProgram::VertexA) {
355 // VertexB was combined with VertexA, so we skip the VertexB iteration
356 ++index;
357 }
358
359 const u32 old_binding = specialization.base_binding;
360 specialization.base_binding =
361 FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
362 ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
363 }
364 return {std::move(program), std::move(bindings)};
365}
366
15void FillDescriptorUpdateTemplateEntries( 367void FillDescriptorUpdateTemplateEntries(
16 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, 368 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
17 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { 369 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e49ed135d..8678fc9c3 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,23 +6,49 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <memory>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13#include <utility>
9#include <vector> 14#include <vector>
10 15
11#include <boost/functional/hash.hpp> 16#include <boost/functional/hash.hpp>
12 17
13#include "common/common_types.h" 18#include "common/common_types.h"
19#include "video_core/engines/const_buffer_engine_interface.h"
14#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/rasterizer_cache.h"
15#include "video_core/renderer_vulkan/declarations.h" 22#include "video_core/renderer_vulkan/declarations.h"
16#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 27#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
28#include "video_core/shader/const_buffer_locker.h"
18#include "video_core/shader/shader_ir.h" 29#include "video_core/shader/shader_ir.h"
30#include "video_core/surface.h"
31
32namespace Core {
33class System;
34}
19 35
20namespace Vulkan { 36namespace Vulkan {
21 37
38class RasterizerVulkan;
39class VKComputePipeline;
40class VKDescriptorPool;
22class VKDevice; 41class VKDevice;
42class VKFence;
43class VKScheduler;
44class VKUpdateDescriptorQueue;
23 45
46class CachedShader;
47using Shader = std::shared_ptr<CachedShader>;
24using Maxwell = Tegra::Engines::Maxwell3D::Regs; 48using Maxwell = Tegra::Engines::Maxwell3D::Regs;
25 49
50using ProgramCode = std::vector<u64>;
51
26struct GraphicsPipelineCacheKey { 52struct GraphicsPipelineCacheKey {
27 FixedPipelineState fixed_state; 53 FixedPipelineState fixed_state;
28 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; 54 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
@@ -84,7 +110,88 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
84 110
85namespace Vulkan { 111namespace Vulkan {
86 112
87class VKDevice; 113class CachedShader final : public RasterizerCacheObject {
114public:
115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
116 VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
117 ~CachedShader();
118
119 GPUVAddr GetGpuAddr() const {
120 return gpu_addr;
121 }
122
123 VAddr GetCpuAddr() const override {
124 return cpu_addr;
125 }
126
127 std::size_t GetSizeInBytes() const override {
128 return program_code.size() * sizeof(u64);
129 }
130
131 VideoCommon::Shader::ShaderIR& GetIR() {
132 return shader_ir;
133 }
134
135 const VideoCommon::Shader::ShaderIR& GetIR() const {
136 return shader_ir;
137 }
138
139 const ShaderEntries& GetEntries() const {
140 return entries;
141 }
142
143private:
144 static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
145 Tegra::Engines::ShaderType stage);
146
147 GPUVAddr gpu_addr{};
148 VAddr cpu_addr{};
149 ProgramCode program_code;
150 VideoCommon::Shader::ConstBufferLocker locker;
151 VideoCommon::Shader::ShaderIR shader_ir;
152 ShaderEntries entries;
153};
154
155class VKPipelineCache final : public RasterizerCache<Shader> {
156public:
157 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
158 const VKDevice& device, VKScheduler& scheduler,
159 VKDescriptorPool& descriptor_pool,
160 VKUpdateDescriptorQueue& update_descriptor_queue);
161 ~VKPipelineCache();
162
163 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
164
165 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
166
167 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
168
169protected:
170 void Unregister(const Shader& shader) override;
171
172 void FlushObjectInner(const Shader& object) override {}
173
174private:
175 std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
176 const GraphicsPipelineCacheKey& key);
177
178 Core::System& system;
179 const VKDevice& device;
180 VKScheduler& scheduler;
181 VKDescriptorPool& descriptor_pool;
182 VKUpdateDescriptorQueue& update_descriptor_queue;
183
184 VKRenderPassCache renderpass_cache;
185
186 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
187
188 GraphicsPipelineCacheKey last_graphics_key;
189 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
190
191 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
192 graphics_cache;
193 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
194};
88 195
89void FillDescriptorUpdateTemplateEntries( 196void FillDescriptorUpdateTemplateEntries(
90 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, 197 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,