diff options
22 files changed, 447 insertions, 232 deletions
diff --git a/externals/sirit b/externals/sirit | |||
| Subproject a62c5bbc100a5e5a31ea0ccc4a78d8fa6a4167c | Subproject eefca56afd49379bdebc97ded8b480839f93088 | ||
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index d9beaa3a4..212e442f4 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp | |||
| @@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) { | |||
| 24 | const std::size_t index{ResourceTypeToIndex(resource)}; | 24 | const std::size_t index{ResourceTypeToIndex(resource)}; |
| 25 | 25 | ||
| 26 | s64 new_value = current[index] + amount; | 26 | s64 new_value = current[index] + amount; |
| 27 | while (new_value > limit[index] && available[index] + amount <= limit[index]) { | 27 | if (new_value > limit[index] && available[index] + amount <= limit[index]) { |
| 28 | // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout | 28 | // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout |
| 29 | new_value = current[index] + amount; | 29 | new_value = current[index] + amount; |
| 30 | |||
| 31 | if (timeout >= 0) { | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | 30 | } |
| 35 | 31 | ||
| 36 | if (new_value <= limit[index]) { | 32 | if (new_value <= limit[index]) { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index b827b112f..79fc9bbea 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -598,6 +598,7 @@ public: | |||
| 598 | BitField<4, 3, u32> block_height; | 598 | BitField<4, 3, u32> block_height; |
| 599 | BitField<8, 3, u32> block_depth; | 599 | BitField<8, 3, u32> block_depth; |
| 600 | BitField<12, 1, InvMemoryLayout> type; | 600 | BitField<12, 1, InvMemoryLayout> type; |
| 601 | BitField<16, 1, u32> is_3d; | ||
| 601 | } memory_layout; | 602 | } memory_layout; |
| 602 | union { | 603 | union { |
| 603 | BitField<0, 16, u32> layers; | 604 | BitField<0, 16, u32> layers; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a14641b97..890fc6c63 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -214,7 +214,8 @@ Device::Device() | |||
| 214 | has_precise_bug = TestPreciseBug(); | 214 | has_precise_bug = TestPreciseBug(); |
| 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; |
| 216 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 216 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 217 | GLAD_GL_NV_compute_program5; | 217 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && |
| 218 | GLAD_GL_NV_transform_feedback2; | ||
| 218 | 219 | ||
| 219 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 220 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 220 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 221 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 909ca9e0e..a48cee1e5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -94,6 +94,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | |||
| 94 | return buffer.size; | 94 | return buffer.size; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | /// Translates hardware transform feedback indices | ||
| 98 | /// @param location Hardware location | ||
| 99 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | ||
| 100 | /// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt | ||
| 101 | std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | ||
| 102 | const u8 index = location / 4; | ||
| 103 | if (index >= 8 && index <= 39) { | ||
| 104 | return {GL_GENERIC_ATTRIB_NV, index - 8}; | ||
| 105 | } | ||
| 106 | if (index >= 48 && index <= 55) { | ||
| 107 | return {GL_TEXTURE_COORD_NV, index - 48}; | ||
| 108 | } | ||
| 109 | switch (index) { | ||
| 110 | case 7: | ||
| 111 | return {GL_POSITION, 0}; | ||
| 112 | case 40: | ||
| 113 | return {GL_PRIMARY_COLOR_NV, 0}; | ||
| 114 | case 41: | ||
| 115 | return {GL_SECONDARY_COLOR_NV, 0}; | ||
| 116 | case 42: | ||
| 117 | return {GL_BACK_PRIMARY_COLOR_NV, 0}; | ||
| 118 | case 43: | ||
| 119 | return {GL_BACK_SECONDARY_COLOR_NV, 0}; | ||
| 120 | } | ||
| 121 | UNIMPLEMENTED_MSG("index={}", static_cast<int>(index)); | ||
| 122 | return {GL_POSITION, 0}; | ||
| 123 | } | ||
| 124 | |||
| 97 | void oglEnable(GLenum cap, bool state) { | 125 | void oglEnable(GLenum cap, bool state) { |
| 98 | (state ? glEnable : glDisable)(cap); | 126 | (state ? glEnable : glDisable)(cap); |
| 99 | } | 127 | } |
| @@ -1548,12 +1576,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { | |||
| 1548 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); | 1576 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); |
| 1549 | } | 1577 | } |
| 1550 | 1578 | ||
| 1579 | void RasterizerOpenGL::SyncTransformFeedback() { | ||
| 1580 | // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal | ||
| 1581 | // when this is required. | ||
| 1582 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1583 | |||
| 1584 | static constexpr std::size_t STRIDE = 3; | ||
| 1585 | std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; | ||
| 1586 | std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams; | ||
| 1587 | |||
| 1588 | GLint* cursor = attribs.data(); | ||
| 1589 | GLint* current_stream = streams.data(); | ||
| 1590 | |||
| 1591 | for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { | ||
| 1592 | const auto& layout = regs.tfb_layouts[feedback]; | ||
| 1593 | UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); | ||
| 1594 | if (layout.varying_count == 0) { | ||
| 1595 | continue; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | *current_stream = static_cast<GLint>(feedback); | ||
| 1599 | if (current_stream != streams.data()) { | ||
| 1600 | // When stepping one stream, push the expected token | ||
| 1601 | cursor[0] = GL_NEXT_BUFFER_NV; | ||
| 1602 | cursor[1] = 0; | ||
| 1603 | cursor[2] = 0; | ||
| 1604 | cursor += STRIDE; | ||
| 1605 | } | ||
| 1606 | ++current_stream; | ||
| 1607 | |||
| 1608 | const auto& locations = regs.tfb_varying_locs[feedback]; | ||
| 1609 | std::optional<u8> current_index; | ||
| 1610 | for (u32 offset = 0; offset < layout.varying_count; ++offset) { | ||
| 1611 | const u8 location = locations[offset]; | ||
| 1612 | const u8 index = location / 4; | ||
| 1613 | |||
| 1614 | if (current_index == index) { | ||
| 1615 | // Increase number of components of the previous attachment | ||
| 1616 | ++cursor[-2]; | ||
| 1617 | continue; | ||
| 1618 | } | ||
| 1619 | current_index = index; | ||
| 1620 | |||
| 1621 | std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); | ||
| 1622 | cursor[1] = 1; | ||
| 1623 | cursor += STRIDE; | ||
| 1624 | } | ||
| 1625 | } | ||
| 1626 | |||
| 1627 | const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE); | ||
| 1628 | const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data()); | ||
| 1629 | glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), | ||
| 1630 | GL_INTERLEAVED_ATTRIBS); | ||
| 1631 | } | ||
| 1632 | |||
| 1551 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | 1633 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { |
| 1552 | const auto& regs = system.GPU().Maxwell3D().regs; | 1634 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 1553 | if (regs.tfb_enabled == 0) { | 1635 | if (regs.tfb_enabled == 0) { |
| 1554 | return; | 1636 | return; |
| 1555 | } | 1637 | } |
| 1556 | 1638 | ||
| 1639 | if (device.UseAssemblyShaders()) { | ||
| 1640 | SyncTransformFeedback(); | ||
| 1641 | } | ||
| 1642 | |||
| 1557 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 1643 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 1558 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 1644 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 1559 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 1645 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| @@ -1580,6 +1666,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | |||
| 1580 | static_cast<GLsizeiptr>(size)); | 1666 | static_cast<GLsizeiptr>(size)); |
| 1581 | } | 1667 | } |
| 1582 | 1668 | ||
| 1669 | // We may have to call BeginTransformFeedbackNV here since they seem to call different | ||
| 1670 | // implementations on Nvidia's driver (the pointer is different) but we are using | ||
| 1671 | // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB | ||
| 1672 | // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. | ||
| 1583 | glBeginTransformFeedback(GL_POINTS); | 1673 | glBeginTransformFeedback(GL_POINTS); |
| 1584 | } | 1674 | } |
| 1585 | 1675 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 208a4485b..4f082592f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -201,6 +201,10 @@ private: | |||
| 201 | /// Syncs the framebuffer sRGB state to match the guest state | 201 | /// Syncs the framebuffer sRGB state to match the guest state |
| 202 | void SyncFramebufferSRGB(); | 202 | void SyncFramebufferSRGB(); |
| 203 | 203 | ||
| 204 | /// Syncs transform feedback state to match guest state | ||
| 205 | /// @note Only valid on assembly shaders | ||
| 206 | void SyncTransformFeedback(); | ||
| 207 | |||
| 204 | /// Begin a transform feedback | 208 | /// Begin a transform feedback |
| 205 | void BeginTransformFeedback(GLenum primitive_mode); | 209 | void BeginTransformFeedback(GLenum primitive_mode); |
| 206 | 210 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 57db5a08b..61505879b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param | |||
| 263 | target = GetTextureTarget(params.target); | 263 | target = GetTextureTarget(params.target); |
| 264 | texture = CreateTexture(params, target, internal_format, texture_buffer); | 264 | texture = CreateTexture(params, target, internal_format, texture_buffer); |
| 265 | DecorateSurfaceName(); | 265 | DecorateSurfaceName(); |
| 266 | main_view = CreateViewInner( | 266 | |
| 267 | ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), | 267 | u32 num_layers = 1; |
| 268 | true); | 268 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { |
| 269 | num_layers = params.depth; | ||
| 270 | } | ||
| 271 | |||
| 272 | main_view = | ||
| 273 | CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); | ||
| 269 | } | 274 | } |
| 270 | 275 | ||
| 271 | CachedSurface::~CachedSurface() = default; | 276 | CachedSurface::~CachedSurface() = default; |
| @@ -413,20 +418,23 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p | |||
| 413 | 418 | ||
| 414 | CachedSurfaceView::~CachedSurfaceView() = default; | 419 | CachedSurfaceView::~CachedSurfaceView() = default; |
| 415 | 420 | ||
| 416 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | 421 | void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { |
| 417 | ASSERT(params.num_levels == 1); | 422 | ASSERT(params.num_levels == 1); |
| 418 | 423 | ||
| 424 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 425 | if (params.num_layers > 1) { | ||
| 426 | ASSERT(params.base_layer == 0); | ||
| 427 | glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); | ||
| 428 | } else { | ||
| 429 | glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, | ||
| 430 | params.base_level, params.base_layer); | ||
| 431 | } | ||
| 432 | return; | ||
| 433 | } | ||
| 434 | |||
| 419 | if (params.num_layers > 1) { | 435 | if (params.num_layers > 1) { |
| 420 | // Layered framebuffer attachments | ||
| 421 | UNIMPLEMENTED_IF(params.base_layer != 0); | 436 | UNIMPLEMENTED_IF(params.base_layer != 0); |
| 422 | 437 | glFramebufferTexture(fb_target, attachment, GetTexture(), 0); | |
| 423 | switch (params.target) { | ||
| 424 | case SurfaceTarget::Texture2DArray: | ||
| 425 | glFramebufferTexture(target, attachment, GetTexture(), 0); | ||
| 426 | break; | ||
| 427 | default: | ||
| 428 | UNIMPLEMENTED(); | ||
| 429 | } | ||
| 430 | return; | 438 | return; |
| 431 | } | 439 | } |
| 432 | 440 | ||
| @@ -434,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | |||
| 434 | const GLuint texture = surface.GetTexture(); | 442 | const GLuint texture = surface.GetTexture(); |
| 435 | switch (surface.GetSurfaceParams().target) { | 443 | switch (surface.GetSurfaceParams().target) { |
| 436 | case SurfaceTarget::Texture1D: | 444 | case SurfaceTarget::Texture1D: |
| 437 | glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); | 445 | glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); |
| 438 | break; | 446 | break; |
| 439 | case SurfaceTarget::Texture2D: | 447 | case SurfaceTarget::Texture2D: |
| 440 | glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); | 448 | glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); |
| 441 | break; | 449 | break; |
| 442 | case SurfaceTarget::Texture1DArray: | 450 | case SurfaceTarget::Texture1DArray: |
| 443 | case SurfaceTarget::Texture2DArray: | 451 | case SurfaceTarget::Texture2DArray: |
| 444 | case SurfaceTarget::TextureCubemap: | 452 | case SurfaceTarget::TextureCubemap: |
| 445 | case SurfaceTarget::TextureCubeArray: | 453 | case SurfaceTarget::TextureCubeArray: |
| 446 | glFramebufferTextureLayer(target, attachment, texture, params.base_level, | 454 | glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, |
| 447 | params.base_layer); | 455 | params.base_layer); |
| 448 | break; | 456 | break; |
| 449 | default: | 457 | default: |
| @@ -500,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { | |||
| 500 | OGLTextureView texture_view; | 508 | OGLTextureView texture_view; |
| 501 | texture_view.Create(); | 509 | texture_view.Create(); |
| 502 | 510 | ||
| 503 | glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, | 511 | if (target == GL_TEXTURE_3D) { |
| 504 | params.num_levels, params.base_layer, params.num_layers); | 512 | glTextureView(texture_view.handle, target, surface.texture.handle, format, |
| 513 | params.base_level, params.num_levels, 0, 1); | ||
| 514 | } else { | ||
| 515 | glTextureView(texture_view.handle, target, surface.texture.handle, format, | ||
| 516 | params.base_level, params.num_levels, params.base_layer, params.num_layers); | ||
| 517 | } | ||
| 505 | ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); | 518 | ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); |
| 506 | 519 | ||
| 507 | return texture_view; | 520 | return texture_view; |
| @@ -544,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 544 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 557 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 545 | const auto& src_params{src_view->GetSurfaceParams()}; | 558 | const auto& src_params{src_view->GetSurfaceParams()}; |
| 546 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 559 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 547 | UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); | 560 | UNIMPLEMENTED_IF(src_params.depth != 1); |
| 548 | UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); | 561 | UNIMPLEMENTED_IF(dst_params.depth != 1); |
| 549 | 562 | ||
| 550 | state_tracker.NotifyScissor0(); | 563 | state_tracker.NotifyScissor0(); |
| 551 | state_tracker.NotifyFramebuffer(); | 564 | state_tracker.NotifyFramebuffer(); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8a2ac8603..bfc4ddf5d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -80,8 +80,10 @@ public: | |||
| 80 | explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); | 80 | explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); |
| 81 | ~CachedSurfaceView(); | 81 | ~CachedSurfaceView(); |
| 82 | 82 | ||
| 83 | /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER | 83 | /// @brief Attaches this texture view to the currently bound fb_target framebuffer |
| 84 | void Attach(GLenum attachment, GLenum target) const; | 84 | /// @param attachment Attachment to bind textures to |
| 85 | /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) | ||
| 86 | void Attach(GLenum attachment, GLenum fb_target) const; | ||
| 85 | 87 | ||
| 86 | GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, | 88 | GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, |
| 87 | Tegra::Texture::SwizzleSource y_source, | 89 | Tegra::Texture::SwizzleSource y_source, |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e7952924a..6214fcbc3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -751,11 +751,9 @@ void RendererOpenGL::RenderScreenshot() { | |||
| 751 | } | 751 | } |
| 752 | 752 | ||
| 753 | bool RendererOpenGL::Init() { | 753 | bool RendererOpenGL::Init() { |
| 754 | if (GLAD_GL_KHR_debug) { | 754 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { |
| 755 | glEnable(GL_DEBUG_OUTPUT); | 755 | glEnable(GL_DEBUG_OUTPUT); |
| 756 | if (Settings::values.renderer_debug) { | 756 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); |
| 757 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||
| 758 | } | ||
| 759 | glDebugMessageCallback(DebugHandler, nullptr); | 757 | glDebugMessageCallback(DebugHandler, nullptr); |
| 760 | } | 758 | } |
| 761 | 759 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e1b46277..281bf9ac3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | |||
| 53 | }; | 53 | }; |
| 54 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); | 54 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); |
| 55 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); | 55 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); |
| 56 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); | 56 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); |
| 57 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); | 57 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); |
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); | ||
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); | 59 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); |
| 59 | 60 | ||
| 60 | VkDescriptorSetLayoutCreateInfo ci; | 61 | VkDescriptorSetLayoutCreateInfo ci; |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 890fd52cf..9259b618d 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp | |||
| @@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { | |||
| 42 | {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, | 42 | {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, |
| 43 | {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, | 43 | {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, |
| 44 | {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, | 44 | {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, |
| 45 | {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, | ||
| 45 | {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; | 46 | {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; |
| 46 | 47 | ||
| 47 | VkDescriptorPoolCreateInfo ci; | 48 | VkDescriptorPoolCreateInfo ci; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 150d86b62..ea66e621e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -46,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; | |||
| 46 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | 46 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; |
| 47 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; | 47 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; |
| 48 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | 48 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; |
| 49 | constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; | ||
| 49 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | 50 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; |
| 50 | 51 | ||
| 51 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 52 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ |
| @@ -105,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, | |||
| 105 | u32 binding = base_binding; | 106 | u32 binding = base_binding; |
| 106 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); | 107 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); |
| 107 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); | 108 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); |
| 108 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); | 109 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); |
| 109 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); | 110 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); |
| 111 | AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); | ||
| 110 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); | 112 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); |
| 111 | return binding; | 113 | return binding; |
| 112 | } | 114 | } |
| @@ -381,16 +383,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 | |||
| 381 | return; | 383 | return; |
| 382 | } | 384 | } |
| 383 | 385 | ||
| 384 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { | 386 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || |
| 385 | // Nvidia has a bug where updating multiple uniform texels at once causes the driver to | 387 | descriptor_type == STORAGE_TEXEL_BUFFER) { |
| 386 | // crash. | 388 | // Nvidia has a bug where updating multiple texels at once causes the driver to crash. |
| 389 | // Note: Fixed in driver Windows 443.24, Linux 440.66.15 | ||
| 387 | for (u32 i = 0; i < count; ++i) { | 390 | for (u32 i = 0; i < count; ++i) { |
| 388 | VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); | 391 | VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); |
| 389 | entry.dstBinding = binding + i; | 392 | entry.dstBinding = binding + i; |
| 390 | entry.dstArrayElement = 0; | 393 | entry.dstArrayElement = 0; |
| 391 | entry.descriptorCount = 1; | 394 | entry.descriptorCount = 1; |
| 392 | entry.descriptorType = descriptor_type; | 395 | entry.descriptorType = descriptor_type; |
| 393 | entry.offset = offset + i * entry_size; | 396 | entry.offset = static_cast<std::size_t>(offset + i * entry_size); |
| 394 | entry.stride = entry_size; | 397 | entry.stride = entry_size; |
| 395 | } | 398 | } |
| 396 | } else if (count > 0) { | 399 | } else if (count > 0) { |
| @@ -411,8 +414,9 @@ void FillDescriptorUpdateTemplateEntries( | |||
| 411 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { | 414 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { |
| 412 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); | 415 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); |
| 413 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); | 416 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); |
| 414 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); | 417 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); |
| 415 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); | 418 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); |
| 419 | AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); | ||
| 416 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); | 420 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); |
| 417 | } | 421 | } |
| 418 | 422 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ffea9ee36..3170c41f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -469,8 +469,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 469 | const auto& entries = pipeline.GetEntries(); | 469 | const auto& entries = pipeline.GetEntries(); |
| 470 | SetupComputeConstBuffers(entries); | 470 | SetupComputeConstBuffers(entries); |
| 471 | SetupComputeGlobalBuffers(entries); | 471 | SetupComputeGlobalBuffers(entries); |
| 472 | SetupComputeTexelBuffers(entries); | 472 | SetupComputeUniformTexels(entries); |
| 473 | SetupComputeTextures(entries); | 473 | SetupComputeTextures(entries); |
| 474 | SetupComputeStorageTexels(entries); | ||
| 474 | SetupComputeImages(entries); | 475 | SetupComputeImages(entries); |
| 475 | 476 | ||
| 476 | buffer_cache.Unmap(); | 477 | buffer_cache.Unmap(); |
| @@ -716,7 +717,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( | |||
| 716 | if (!view) { | 717 | if (!view) { |
| 717 | return false; | 718 | return false; |
| 718 | } | 719 | } |
| 719 | key.views.push_back(view->GetHandle()); | 720 | key.views.push_back(view->GetAttachment()); |
| 720 | key.width = std::min(key.width, view->GetWidth()); | 721 | key.width = std::min(key.width, view->GetWidth()); |
| 721 | key.height = std::min(key.height, view->GetHeight()); | 722 | key.height = std::min(key.height, view->GetHeight()); |
| 722 | key.layers = std::min(key.layers, view->GetNumLayers()); | 723 | key.layers = std::min(key.layers, view->GetNumLayers()); |
| @@ -788,8 +789,9 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 788 | const auto& entries = shader->GetEntries(); | 789 | const auto& entries = shader->GetEntries(); |
| 789 | SetupGraphicsConstBuffers(entries, stage); | 790 | SetupGraphicsConstBuffers(entries, stage); |
| 790 | SetupGraphicsGlobalBuffers(entries, stage); | 791 | SetupGraphicsGlobalBuffers(entries, stage); |
| 791 | SetupGraphicsTexelBuffers(entries, stage); | 792 | SetupGraphicsUniformTexels(entries, stage); |
| 792 | SetupGraphicsTextures(entries, stage); | 793 | SetupGraphicsTextures(entries, stage); |
| 794 | SetupGraphicsStorageTexels(entries, stage); | ||
| 793 | SetupGraphicsImages(entries, stage); | 795 | SetupGraphicsImages(entries, stage); |
| 794 | } | 796 | } |
| 795 | texture_cache.GuardSamplers(false); | 797 | texture_cache.GuardSamplers(false); |
| @@ -984,12 +986,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, | |||
| 984 | } | 986 | } |
| 985 | } | 987 | } |
| 986 | 988 | ||
| 987 | void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { | 989 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { |
| 988 | MICROPROFILE_SCOPE(Vulkan_Textures); | 990 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 989 | const auto& gpu = system.GPU().Maxwell3D(); | 991 | const auto& gpu = system.GPU().Maxwell3D(); |
| 990 | for (const auto& entry : entries.texel_buffers) { | 992 | for (const auto& entry : entries.uniform_texels) { |
| 991 | const auto image = GetTextureInfo(gpu, entry, stage).tic; | 993 | const auto image = GetTextureInfo(gpu, entry, stage).tic; |
| 992 | SetupTexelBuffer(image, entry); | 994 | SetupUniformTexels(image, entry); |
| 993 | } | 995 | } |
| 994 | } | 996 | } |
| 995 | 997 | ||
| @@ -1004,6 +1006,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: | |||
| 1004 | } | 1006 | } |
| 1005 | } | 1007 | } |
| 1006 | 1008 | ||
| 1009 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { | ||
| 1010 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1011 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 1012 | for (const auto& entry : entries.storage_texels) { | ||
| 1013 | const auto image = GetTextureInfo(gpu, entry, stage).tic; | ||
| 1014 | SetupStorageTexel(image, entry); | ||
| 1015 | } | ||
| 1016 | } | ||
| 1017 | |||
| 1007 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | 1018 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { |
| 1008 | MICROPROFILE_SCOPE(Vulkan_Images); | 1019 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1009 | const auto& gpu = system.GPU().Maxwell3D(); | 1020 | const auto& gpu = system.GPU().Maxwell3D(); |
| @@ -1036,12 +1047,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | |||
| 1036 | } | 1047 | } |
| 1037 | } | 1048 | } |
| 1038 | 1049 | ||
| 1039 | void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { | 1050 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |
| 1040 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1051 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1041 | const auto& gpu = system.GPU().KeplerCompute(); | 1052 | const auto& gpu = system.GPU().KeplerCompute(); |
| 1042 | for (const auto& entry : entries.texel_buffers) { | 1053 | for (const auto& entry : entries.uniform_texels) { |
| 1043 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | 1054 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; |
| 1044 | SetupTexelBuffer(image, entry); | 1055 | SetupUniformTexels(image, entry); |
| 1045 | } | 1056 | } |
| 1046 | } | 1057 | } |
| 1047 | 1058 | ||
| @@ -1056,6 +1067,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | |||
| 1056 | } | 1067 | } |
| 1057 | } | 1068 | } |
| 1058 | 1069 | ||
| 1070 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | ||
| 1071 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1072 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 1073 | for (const auto& entry : entries.storage_texels) { | ||
| 1074 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 1075 | SetupStorageTexel(image, entry); | ||
| 1076 | } | ||
| 1077 | } | ||
| 1078 | |||
| 1059 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | 1079 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |
| 1060 | MICROPROFILE_SCOPE(Vulkan_Images); | 1080 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1061 | const auto& gpu = system.GPU().KeplerCompute(); | 1081 | const auto& gpu = system.GPU().KeplerCompute(); |
| @@ -1105,8 +1125,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1105 | update_descriptor_queue.AddBuffer(buffer, offset, size); | 1125 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| 1106 | } | 1126 | } |
| 1107 | 1127 | ||
| 1108 | void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, | 1128 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, |
| 1109 | const TexelBufferEntry& entry) { | 1129 | const UniformTexelEntry& entry) { |
| 1110 | const auto view = texture_cache.GetTextureSurface(tic, entry); | 1130 | const auto view = texture_cache.GetTextureSurface(tic, entry); |
| 1111 | ASSERT(view->IsBufferView()); | 1131 | ASSERT(view->IsBufferView()); |
| 1112 | 1132 | ||
| @@ -1118,8 +1138,8 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu | |||
| 1118 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 1138 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); |
| 1119 | ASSERT(!view->IsBufferView()); | 1139 | ASSERT(!view->IsBufferView()); |
| 1120 | 1140 | ||
| 1121 | const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, | 1141 | const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, |
| 1122 | texture.tic.z_source, texture.tic.w_source); | 1142 | texture.tic.z_source, texture.tic.w_source); |
| 1123 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | 1143 | const auto sampler = sampler_cache.GetSampler(texture.tsc); |
| 1124 | update_descriptor_queue.AddSampledImage(sampler, image_view); | 1144 | update_descriptor_queue.AddSampledImage(sampler, image_view); |
| 1125 | 1145 | ||
| @@ -1128,6 +1148,14 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu | |||
| 1128 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | 1148 | sampled_views.push_back(ImageView{std::move(view), image_layout}); |
| 1129 | } | 1149 | } |
| 1130 | 1150 | ||
| 1151 | void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, | ||
| 1152 | const StorageTexelEntry& entry) { | ||
| 1153 | const auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1154 | ASSERT(view->IsBufferView()); | ||
| 1155 | |||
| 1156 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 1157 | } | ||
| 1158 | |||
| 1131 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { | 1159 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { |
| 1132 | auto view = texture_cache.GetImageSurface(tic, entry); | 1160 | auto view = texture_cache.GetImageSurface(tic, entry); |
| 1133 | 1161 | ||
| @@ -1137,7 +1165,8 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima | |||
| 1137 | 1165 | ||
| 1138 | UNIMPLEMENTED_IF(tic.IsBuffer()); | 1166 | UNIMPLEMENTED_IF(tic.IsBuffer()); |
| 1139 | 1167 | ||
| 1140 | const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | 1168 | const VkImageView image_view = |
| 1169 | view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1141 | update_descriptor_queue.AddImage(image_view); | 1170 | update_descriptor_queue.AddImage(image_view); |
| 1142 | 1171 | ||
| 1143 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | 1172 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ef77c3622..c8c187606 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -193,12 +193,15 @@ private: | |||
| 193 | /// Setup global buffers in the graphics pipeline. | 193 | /// Setup global buffers in the graphics pipeline. |
| 194 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | 194 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); |
| 195 | 195 | ||
| 196 | /// Setup texel buffers in the graphics pipeline. | 196 | /// Setup uniform texels in the graphics pipeline. |
| 197 | void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); | 197 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); |
| 198 | 198 | ||
| 199 | /// Setup textures in the graphics pipeline. | 199 | /// Setup textures in the graphics pipeline. |
| 200 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | 200 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); |
| 201 | 201 | ||
| 202 | /// Setup storage texels in the graphics pipeline. | ||
| 203 | void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 204 | |||
| 202 | /// Setup images in the graphics pipeline. | 205 | /// Setup images in the graphics pipeline. |
| 203 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | 206 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); |
| 204 | 207 | ||
| @@ -209,11 +212,14 @@ private: | |||
| 209 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | 212 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); |
| 210 | 213 | ||
| 211 | /// Setup texel buffers in the compute pipeline. | 214 | /// Setup texel buffers in the compute pipeline. |
| 212 | void SetupComputeTexelBuffers(const ShaderEntries& entries); | 215 | void SetupComputeUniformTexels(const ShaderEntries& entries); |
| 213 | 216 | ||
| 214 | /// Setup textures in the compute pipeline. | 217 | /// Setup textures in the compute pipeline. |
| 215 | void SetupComputeTextures(const ShaderEntries& entries); | 218 | void SetupComputeTextures(const ShaderEntries& entries); |
| 216 | 219 | ||
| 220 | /// Setup storage texels in the compute pipeline. | ||
| 221 | void SetupComputeStorageTexels(const ShaderEntries& entries); | ||
| 222 | |||
| 217 | /// Setup images in the compute pipeline. | 223 | /// Setup images in the compute pipeline. |
| 218 | void SetupComputeImages(const ShaderEntries& entries); | 224 | void SetupComputeImages(const ShaderEntries& entries); |
| 219 | 225 | ||
| @@ -222,10 +228,12 @@ private: | |||
| 222 | 228 | ||
| 223 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | 229 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); |
| 224 | 230 | ||
| 225 | void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); | 231 | void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); |
| 226 | 232 | ||
| 227 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); | 233 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); |
| 228 | 234 | ||
| 235 | void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); | ||
| 236 | |||
| 229 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | 237 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| 230 | 238 | ||
| 231 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 239 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a13e8baa7..97429cc59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -400,8 +400,9 @@ private: | |||
| 400 | u32 binding = specialization.base_binding; | 400 | u32 binding = specialization.base_binding; |
| 401 | binding = DeclareConstantBuffers(binding); | 401 | binding = DeclareConstantBuffers(binding); |
| 402 | binding = DeclareGlobalBuffers(binding); | 402 | binding = DeclareGlobalBuffers(binding); |
| 403 | binding = DeclareTexelBuffers(binding); | 403 | binding = DeclareUniformTexels(binding); |
| 404 | binding = DeclareSamplers(binding); | 404 | binding = DeclareSamplers(binding); |
| 405 | binding = DeclareStorageTexels(binding); | ||
| 405 | binding = DeclareImages(binding); | 406 | binding = DeclareImages(binding); |
| 406 | 407 | ||
| 407 | const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); | 408 | const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); |
| @@ -889,7 +890,7 @@ private: | |||
| 889 | return binding; | 890 | return binding; |
| 890 | } | 891 | } |
| 891 | 892 | ||
| 892 | u32 DeclareTexelBuffers(u32 binding) { | 893 | u32 DeclareUniformTexels(u32 binding) { |
| 893 | for (const auto& sampler : ir.GetSamplers()) { | 894 | for (const auto& sampler : ir.GetSamplers()) { |
| 894 | if (!sampler.is_buffer) { | 895 | if (!sampler.is_buffer) { |
| 895 | continue; | 896 | continue; |
| @@ -910,7 +911,7 @@ private: | |||
| 910 | Decorate(id, spv::Decoration::Binding, binding++); | 911 | Decorate(id, spv::Decoration::Binding, binding++); |
| 911 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | 912 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); |
| 912 | 913 | ||
| 913 | texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id}); | 914 | uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); |
| 914 | } | 915 | } |
| 915 | return binding; | 916 | return binding; |
| 916 | } | 917 | } |
| @@ -945,31 +946,48 @@ private: | |||
| 945 | return binding; | 946 | return binding; |
| 946 | } | 947 | } |
| 947 | 948 | ||
| 948 | u32 DeclareImages(u32 binding) { | 949 | u32 DeclareStorageTexels(u32 binding) { |
| 949 | for (const auto& image : ir.GetImages()) { | 950 | for (const auto& image : ir.GetImages()) { |
| 950 | const auto [dim, arrayed] = GetImageDim(image); | 951 | if (image.type != Tegra::Shader::ImageType::TextureBuffer) { |
| 951 | constexpr int depth = 0; | 952 | continue; |
| 952 | constexpr bool ms = false; | ||
| 953 | constexpr int sampled = 2; // This won't be accessed with a sampler | ||
| 954 | constexpr auto format = spv::ImageFormat::Unknown; | ||
| 955 | const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); | ||
| 956 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 957 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 958 | AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); | ||
| 959 | |||
| 960 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 961 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 962 | if (image.is_read && !image.is_written) { | ||
| 963 | Decorate(id, spv::Decoration::NonWritable); | ||
| 964 | } else if (image.is_written && !image.is_read) { | ||
| 965 | Decorate(id, spv::Decoration::NonReadable); | ||
| 966 | } | 953 | } |
| 954 | DeclareImage(image, binding); | ||
| 955 | } | ||
| 956 | return binding; | ||
| 957 | } | ||
| 967 | 958 | ||
| 968 | images.emplace(image.index, StorageImage{image_type, id}); | 959 | u32 DeclareImages(u32 binding) { |
| 960 | for (const auto& image : ir.GetImages()) { | ||
| 961 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { | ||
| 962 | continue; | ||
| 963 | } | ||
| 964 | DeclareImage(image, binding); | ||
| 969 | } | 965 | } |
| 970 | return binding; | 966 | return binding; |
| 971 | } | 967 | } |
| 972 | 968 | ||
| 969 | void DeclareImage(const Image& image, u32& binding) { | ||
| 970 | const auto [dim, arrayed] = GetImageDim(image); | ||
| 971 | constexpr int depth = 0; | ||
| 972 | constexpr bool ms = false; | ||
| 973 | constexpr int sampled = 2; // This won't be accessed with a sampler | ||
| 974 | const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; | ||
| 975 | const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); | ||
| 976 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 977 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 978 | AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); | ||
| 979 | |||
| 980 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 981 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 982 | if (image.is_read && !image.is_written) { | ||
| 983 | Decorate(id, spv::Decoration::NonWritable); | ||
| 984 | } else if (image.is_written && !image.is_read) { | ||
| 985 | Decorate(id, spv::Decoration::NonReadable); | ||
| 986 | } | ||
| 987 | |||
| 988 | images.emplace(image.index, StorageImage{image_type, id}); | ||
| 989 | } | ||
| 990 | |||
| 973 | bool IsRenderTargetEnabled(u32 rt) const { | 991 | bool IsRenderTargetEnabled(u32 rt) const { |
| 974 | for (u32 component = 0; component < 4; ++component) { | 992 | for (u32 component = 0; component < 4; ++component) { |
| 975 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | 993 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { |
| @@ -1256,7 +1274,7 @@ private: | |||
| 1256 | } else { | 1274 | } else { |
| 1257 | UNREACHABLE_MSG("Unmanaged offset node type"); | 1275 | UNREACHABLE_MSG("Unmanaged offset node type"); |
| 1258 | } | 1276 | } |
| 1259 | pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index, | 1277 | pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, |
| 1260 | buffer_element); | 1278 | buffer_element); |
| 1261 | } | 1279 | } |
| 1262 | return {OpLoad(t_float, pointer), Type::Float}; | 1280 | return {OpLoad(t_float, pointer), Type::Float}; |
| @@ -1611,7 +1629,7 @@ private: | |||
| 1611 | 1629 | ||
| 1612 | const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); | 1630 | const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); |
| 1613 | const Id carry = OpCompositeExtract(t_uint, result, 1); | 1631 | const Id carry = OpCompositeExtract(t_uint, result, 1); |
| 1614 | return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; | 1632 | return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; |
| 1615 | } | 1633 | } |
| 1616 | 1634 | ||
| 1617 | Expression LogicalAssign(Operation operation) { | 1635 | Expression LogicalAssign(Operation operation) { |
| @@ -1674,7 +1692,7 @@ private: | |||
| 1674 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | 1692 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); |
| 1675 | const u32 index = meta.sampler.index; | 1693 | const u32 index = meta.sampler.index; |
| 1676 | if (meta.sampler.is_buffer) { | 1694 | if (meta.sampler.is_buffer) { |
| 1677 | const auto& entry = texel_buffers.at(index); | 1695 | const auto& entry = uniform_texels.at(index); |
| 1678 | return OpLoad(entry.image_type, entry.image); | 1696 | return OpLoad(entry.image_type, entry.image); |
| 1679 | } else { | 1697 | } else { |
| 1680 | const auto& entry = sampled_images.at(index); | 1698 | const auto& entry = sampled_images.at(index); |
| @@ -1951,39 +1969,20 @@ private: | |||
| 1951 | return {}; | 1969 | return {}; |
| 1952 | } | 1970 | } |
| 1953 | 1971 | ||
| 1954 | Expression AtomicImageAdd(Operation operation) { | 1972 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> |
| 1955 | UNIMPLEMENTED(); | 1973 | Expression AtomicImage(Operation operation) { |
| 1956 | return {}; | 1974 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1957 | } | 1975 | ASSERT(meta.values.size() == 1); |
| 1958 | |||
| 1959 | Expression AtomicImageMin(Operation operation) { | ||
| 1960 | UNIMPLEMENTED(); | ||
| 1961 | return {}; | ||
| 1962 | } | ||
| 1963 | |||
| 1964 | Expression AtomicImageMax(Operation operation) { | ||
| 1965 | UNIMPLEMENTED(); | ||
| 1966 | return {}; | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | Expression AtomicImageAnd(Operation operation) { | ||
| 1970 | UNIMPLEMENTED(); | ||
| 1971 | return {}; | ||
| 1972 | } | ||
| 1973 | |||
| 1974 | Expression AtomicImageOr(Operation operation) { | ||
| 1975 | UNIMPLEMENTED(); | ||
| 1976 | return {}; | ||
| 1977 | } | ||
| 1978 | 1976 | ||
| 1979 | Expression AtomicImageXor(Operation operation) { | 1977 | const Id coordinate = GetCoordinates(operation, Type::Int); |
| 1980 | UNIMPLEMENTED(); | 1978 | const Id image = images.at(meta.image.index).image; |
| 1981 | return {}; | 1979 | const Id sample = v_uint_zero; |
| 1982 | } | 1980 | const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); |
| 1983 | 1981 | ||
| 1984 | Expression AtomicImageExchange(Operation operation) { | 1982 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); |
| 1985 | UNIMPLEMENTED(); | 1983 | const Id semantics = v_uint_zero; |
| 1986 | return {}; | 1984 | const Id value = AsUint(Visit(meta.values[0])); |
| 1985 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | ||
| 1987 | } | 1986 | } |
| 1988 | 1987 | ||
| 1989 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | 1988 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> |
| @@ -1998,7 +1997,7 @@ private: | |||
| 1998 | return {v_float_zero, Type::Float}; | 1997 | return {v_float_zero, Type::Float}; |
| 1999 | } | 1998 | } |
| 2000 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | 1999 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); |
| 2001 | const Id semantics = Constant(t_uint, 0); | 2000 | const Id semantics = v_uint_zero; |
| 2002 | const Id value = AsUint(Visit(operation[1])); | 2001 | const Id value = AsUint(Visit(operation[1])); |
| 2003 | 2002 | ||
| 2004 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | 2003 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; |
| @@ -2622,11 +2621,11 @@ private: | |||
| 2622 | 2621 | ||
| 2623 | &SPIRVDecompiler::ImageLoad, | 2622 | &SPIRVDecompiler::ImageLoad, |
| 2624 | &SPIRVDecompiler::ImageStore, | 2623 | &SPIRVDecompiler::ImageStore, |
| 2625 | &SPIRVDecompiler::AtomicImageAdd, | 2624 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, |
| 2626 | &SPIRVDecompiler::AtomicImageAnd, | 2625 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, |
| 2627 | &SPIRVDecompiler::AtomicImageOr, | 2626 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, |
| 2628 | &SPIRVDecompiler::AtomicImageXor, | 2627 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, |
| 2629 | &SPIRVDecompiler::AtomicImageExchange, | 2628 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, |
| 2630 | 2629 | ||
| 2631 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | 2630 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, |
| 2632 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | 2631 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, |
| @@ -2768,8 +2767,11 @@ private: | |||
| 2768 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | 2767 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); |
| 2769 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | 2768 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); |
| 2770 | 2769 | ||
| 2770 | const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); | ||
| 2771 | |||
| 2771 | const Id v_float_zero = Constant(t_float, 0.0f); | 2772 | const Id v_float_zero = Constant(t_float, 0.0f); |
| 2772 | const Id v_float_one = Constant(t_float, 1.0f); | 2773 | const Id v_float_one = Constant(t_float, 1.0f); |
| 2774 | const Id v_uint_zero = Constant(t_uint, 0); | ||
| 2773 | 2775 | ||
| 2774 | // Nvidia uses these defaults for varyings (e.g. position and generic attributes) | 2776 | // Nvidia uses these defaults for varyings (e.g. position and generic attributes) |
| 2775 | const Id v_varying_default = | 2777 | const Id v_varying_default = |
| @@ -2794,15 +2796,16 @@ private: | |||
| 2794 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; | 2796 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; |
| 2795 | std::map<u32, Id> constant_buffers; | 2797 | std::map<u32, Id> constant_buffers; |
| 2796 | std::map<GlobalMemoryBase, Id> global_buffers; | 2798 | std::map<GlobalMemoryBase, Id> global_buffers; |
| 2797 | std::map<u32, TexelBuffer> texel_buffers; | 2799 | std::map<u32, TexelBuffer> uniform_texels; |
| 2798 | std::map<u32, SampledImage> sampled_images; | 2800 | std::map<u32, SampledImage> sampled_images; |
| 2801 | std::map<u32, TexelBuffer> storage_texels; | ||
| 2799 | std::map<u32, StorageImage> images; | 2802 | std::map<u32, StorageImage> images; |
| 2800 | 2803 | ||
| 2804 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; | ||
| 2801 | Id instance_index{}; | 2805 | Id instance_index{}; |
| 2802 | Id vertex_index{}; | 2806 | Id vertex_index{}; |
| 2803 | Id base_instance{}; | 2807 | Id base_instance{}; |
| 2804 | Id base_vertex{}; | 2808 | Id base_vertex{}; |
| 2805 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; | ||
| 2806 | Id frag_depth{}; | 2809 | Id frag_depth{}; |
| 2807 | Id frag_coord{}; | 2810 | Id frag_coord{}; |
| 2808 | Id front_facing{}; | 2811 | Id front_facing{}; |
| @@ -3058,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3058 | } | 3061 | } |
| 3059 | for (const auto& sampler : ir.GetSamplers()) { | 3062 | for (const auto& sampler : ir.GetSamplers()) { |
| 3060 | if (sampler.is_buffer) { | 3063 | if (sampler.is_buffer) { |
| 3061 | entries.texel_buffers.emplace_back(sampler); | 3064 | entries.uniform_texels.emplace_back(sampler); |
| 3062 | } else { | 3065 | } else { |
| 3063 | entries.samplers.emplace_back(sampler); | 3066 | entries.samplers.emplace_back(sampler); |
| 3064 | } | 3067 | } |
| 3065 | } | 3068 | } |
| 3066 | for (const auto& image : ir.GetImages()) { | 3069 | for (const auto& image : ir.GetImages()) { |
| 3067 | entries.images.emplace_back(image); | 3070 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { |
| 3071 | entries.storage_texels.emplace_back(image); | ||
| 3072 | } else { | ||
| 3073 | entries.images.emplace_back(image); | ||
| 3074 | } | ||
| 3068 | } | 3075 | } |
| 3069 | for (const auto& attribute : ir.GetInputAttributes()) { | 3076 | for (const auto& attribute : ir.GetInputAttributes()) { |
| 3070 | if (IsGenericAttribute(attribute)) { | 3077 | if (IsGenericAttribute(attribute)) { |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index b7af26388..2b0e90396 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -21,8 +21,9 @@ class VKDevice; | |||
| 21 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | 22 | ||
| 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 24 | using TexelBufferEntry = VideoCommon::Shader::Sampler; | 24 | using UniformTexelEntry = VideoCommon::Shader::Sampler; |
| 25 | using SamplerEntry = VideoCommon::Shader::Sampler; | 25 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 26 | using StorageTexelEntry = VideoCommon::Shader::Image; | ||
| 26 | using ImageEntry = VideoCommon::Shader::Image; | 27 | using ImageEntry = VideoCommon::Shader::Image; |
| 27 | 28 | ||
| 28 | constexpr u32 DESCRIPTOR_SET = 0; | 29 | constexpr u32 DESCRIPTOR_SET = 0; |
| @@ -66,13 +67,15 @@ private: | |||
| 66 | struct ShaderEntries { | 67 | struct ShaderEntries { |
| 67 | u32 NumBindings() const { | 68 | u32 NumBindings() const { |
| 68 | return static_cast<u32>(const_buffers.size() + global_buffers.size() + | 69 | return static_cast<u32>(const_buffers.size() + global_buffers.size() + |
| 69 | texel_buffers.size() + samplers.size() + images.size()); | 70 | uniform_texels.size() + samplers.size() + storage_texels.size() + |
| 71 | images.size()); | ||
| 70 | } | 72 | } |
| 71 | 73 | ||
| 72 | std::vector<ConstBufferEntry> const_buffers; | 74 | std::vector<ConstBufferEntry> const_buffers; |
| 73 | std::vector<GlobalBufferEntry> global_buffers; | 75 | std::vector<GlobalBufferEntry> global_buffers; |
| 74 | std::vector<TexelBufferEntry> texel_buffers; | 76 | std::vector<UniformTexelEntry> uniform_texels; |
| 75 | std::vector<SamplerEntry> samplers; | 77 | std::vector<SamplerEntry> samplers; |
| 78 | std::vector<StorageTexelEntry> storage_texels; | ||
| 76 | std::vector<ImageEntry> images; | 79 | std::vector<ImageEntry> images; |
| 77 | std::set<u32> attributes; | 80 | std::set<u32> attributes; |
| 78 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 81 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 2f1d5021d..430031665 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, | |||
| 100 | ci.pNext = nullptr; | 100 | ci.pNext = nullptr; |
| 101 | ci.flags = 0; | 101 | ci.flags = 0; |
| 102 | ci.size = static_cast<VkDeviceSize>(host_memory_size); | 102 | ci.size = static_cast<VkDeviceSize>(host_memory_size); |
| 103 | ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | | 103 | ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | |
| 104 | VK_BUFFER_USAGE_TRANSFER_DST_BIT; | 104 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; |
| 105 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | 105 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; |
| 106 | ci.queueFamilyIndexCount = 0; | 106 | ci.queueFamilyIndexCount = 0; |
| 107 | ci.pQueueFamilyIndices = nullptr; | 107 | ci.pQueueFamilyIndices = nullptr; |
| @@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP | |||
| 167 | ci.extent = {params.width, params.height, 1}; | 167 | ci.extent = {params.width, params.height, 1}; |
| 168 | break; | 168 | break; |
| 169 | case SurfaceTarget::Texture3D: | 169 | case SurfaceTarget::Texture3D: |
| 170 | ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; | ||
| 170 | ci.extent = {params.width, params.height, params.depth}; | 171 | ci.extent = {params.width, params.height, params.depth}; |
| 171 | break; | 172 | break; |
| 172 | case SurfaceTarget::TextureBuffer: | 173 | case SurfaceTarget::TextureBuffer: |
| @@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP | |||
| 176 | return ci; | 177 | return ci; |
| 177 | } | 178 | } |
| 178 | 179 | ||
| 180 | u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, | ||
| 181 | Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) { | ||
| 182 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 183 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 184 | } | ||
| 185 | |||
| 179 | } // Anonymous namespace | 186 | } // Anonymous namespace |
| 180 | 187 | ||
| 181 | CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, | 188 | CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, |
| @@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, | |||
| 203 | } | 210 | } |
| 204 | 211 | ||
| 205 | // TODO(Rodrigo): Move this to a virtual function. | 212 | // TODO(Rodrigo): Move this to a virtual function. |
| 206 | main_view = CreateViewInner( | 213 | u32 num_layers = 1; |
| 207 | ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels), | 214 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { |
| 208 | true); | 215 | num_layers = params.depth; |
| 216 | } | ||
| 217 | main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); | ||
| 209 | } | 218 | } |
| 210 | 219 | ||
| 211 | CachedSurface::~CachedSurface() = default; | 220 | CachedSurface::~CachedSurface() = default; |
| @@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() { | |||
| 253 | } | 262 | } |
| 254 | 263 | ||
| 255 | View CachedSurface::CreateView(const ViewParams& params) { | 264 | View CachedSurface::CreateView(const ViewParams& params) { |
| 256 | return CreateViewInner(params, false); | ||
| 257 | } | ||
| 258 | |||
| 259 | View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) { | ||
| 260 | // TODO(Rodrigo): Add name decorations | 265 | // TODO(Rodrigo): Add name decorations |
| 261 | return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy); | 266 | return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params); |
| 262 | } | 267 | } |
| 263 | 268 | ||
| 264 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { | 269 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { |
| @@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { | |||
| 342 | } | 347 | } |
| 343 | 348 | ||
| 344 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | 349 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, |
| 345 | const ViewParams& params, bool is_proxy) | 350 | const ViewParams& params) |
| 346 | : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, | 351 | : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, |
| 347 | image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, | 352 | image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, |
| 348 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, | 353 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, |
| 349 | base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, | 354 | base_level{params.base_level}, num_levels{params.num_levels}, |
| 350 | num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) | 355 | image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} { |
| 351 | : VK_IMAGE_VIEW_TYPE_1D} {} | 356 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { |
| 357 | base_layer = 0; | ||
| 358 | num_layers = 1; | ||
| 359 | base_slice = params.base_layer; | ||
| 360 | num_slices = params.num_layers; | ||
| 361 | } else { | ||
| 362 | base_layer = params.base_layer; | ||
| 363 | num_layers = params.num_layers; | ||
| 364 | } | ||
| 365 | } | ||
| 352 | 366 | ||
| 353 | CachedSurfaceView::~CachedSurfaceView() = default; | 367 | CachedSurfaceView::~CachedSurfaceView() = default; |
| 354 | 368 | ||
| 355 | VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, | 369 | VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, |
| 356 | SwizzleSource z_source, SwizzleSource w_source) { | 370 | SwizzleSource z_source, SwizzleSource w_source) { |
| 357 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 371 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); |
| 358 | if (last_image_view && last_swizzle == new_swizzle) { | 372 | if (last_image_view && last_swizzle == new_swizzle) { |
| 359 | return last_image_view; | 373 | return last_image_view; |
| @@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y | |||
| 399 | }); | 413 | }); |
| 400 | } | 414 | } |
| 401 | 415 | ||
| 416 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | ||
| 417 | ASSERT(base_slice == 0); | ||
| 418 | ASSERT(num_slices == params.depth); | ||
| 419 | } | ||
| 420 | |||
| 402 | VkImageViewCreateInfo ci; | 421 | VkImageViewCreateInfo ci; |
| 403 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; | 422 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; |
| 404 | ci.pNext = nullptr; | 423 | ci.pNext = nullptr; |
| @@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y | |||
| 417 | return last_image_view = *image_view; | 436 | return last_image_view = *image_view; |
| 418 | } | 437 | } |
| 419 | 438 | ||
| 439 | VkImageView CachedSurfaceView::GetAttachment() { | ||
| 440 | if (render_target) { | ||
| 441 | return *render_target; | ||
| 442 | } | ||
| 443 | |||
| 444 | VkImageViewCreateInfo ci; | ||
| 445 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; | ||
| 446 | ci.pNext = nullptr; | ||
| 447 | ci.flags = 0; | ||
| 448 | ci.image = surface.GetImageHandle(); | ||
| 449 | ci.format = surface.GetImage().GetFormat(); | ||
| 450 | ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 451 | VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; | ||
| 452 | ci.subresourceRange.aspectMask = aspect_mask; | ||
| 453 | ci.subresourceRange.baseMipLevel = base_level; | ||
| 454 | ci.subresourceRange.levelCount = num_levels; | ||
| 455 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | ||
| 456 | ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; | ||
| 457 | ci.subresourceRange.baseArrayLayer = base_slice; | ||
| 458 | ci.subresourceRange.layerCount = num_slices; | ||
| 459 | } else { | ||
| 460 | ci.viewType = image_view_type; | ||
| 461 | ci.subresourceRange.baseArrayLayer = base_layer; | ||
| 462 | ci.subresourceRange.layerCount = num_layers; | ||
| 463 | } | ||
| 464 | render_target = device.GetLogical().CreateImageView(ci); | ||
| 465 | return *render_target; | ||
| 466 | } | ||
| 467 | |||
| 420 | VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 468 | VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 421 | const VKDevice& device, VKResourceManager& resource_manager, | 469 | const VKDevice& device, VKResourceManager& resource_manager, |
| 422 | VKMemoryManager& memory_manager, VKScheduler& scheduler, | 470 | VKMemoryManager& memory_manager, VKScheduler& scheduler, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f211ccb1e..807e26c8a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -91,7 +91,6 @@ protected: | |||
| 91 | void DecorateSurfaceName(); | 91 | void DecorateSurfaceName(); |
| 92 | 92 | ||
| 93 | View CreateView(const ViewParams& params) override; | 93 | View CreateView(const ViewParams& params) override; |
| 94 | View CreateViewInner(const ViewParams& params, bool is_proxy); | ||
| 95 | 94 | ||
| 96 | private: | 95 | private: |
| 97 | void UploadBuffer(const std::vector<u8>& staging_buffer); | 96 | void UploadBuffer(const std::vector<u8>& staging_buffer); |
| @@ -120,23 +119,20 @@ private: | |||
| 120 | class CachedSurfaceView final : public VideoCommon::ViewBase { | 119 | class CachedSurfaceView final : public VideoCommon::ViewBase { |
| 121 | public: | 120 | public: |
| 122 | explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | 121 | explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, |
| 123 | const ViewParams& params, bool is_proxy); | 122 | const ViewParams& params); |
| 124 | ~CachedSurfaceView(); | 123 | ~CachedSurfaceView(); |
| 125 | 124 | ||
| 126 | VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, | 125 | VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, |
| 127 | Tegra::Texture::SwizzleSource y_source, | 126 | Tegra::Texture::SwizzleSource y_source, |
| 128 | Tegra::Texture::SwizzleSource z_source, | 127 | Tegra::Texture::SwizzleSource z_source, |
| 129 | Tegra::Texture::SwizzleSource w_source); | 128 | Tegra::Texture::SwizzleSource w_source); |
| 129 | |||
| 130 | VkImageView GetAttachment(); | ||
| 130 | 131 | ||
| 131 | bool IsSameSurface(const CachedSurfaceView& rhs) const { | 132 | bool IsSameSurface(const CachedSurfaceView& rhs) const { |
| 132 | return &surface == &rhs.surface; | 133 | return &surface == &rhs.surface; |
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | VkImageView GetHandle() { | ||
| 136 | return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, | ||
| 137 | Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetWidth() const { | 136 | u32 GetWidth() const { |
| 141 | return params.GetMipWidth(base_level); | 137 | return params.GetMipWidth(base_level); |
| 142 | } | 138 | } |
| @@ -180,14 +176,6 @@ public: | |||
| 180 | } | 176 | } |
| 181 | 177 | ||
| 182 | private: | 178 | private: |
| 183 | static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 184 | Tegra::Texture::SwizzleSource y_source, | ||
| 185 | Tegra::Texture::SwizzleSource z_source, | ||
| 186 | Tegra::Texture::SwizzleSource w_source) { | ||
| 187 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 188 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 189 | } | ||
| 190 | |||
| 191 | // Store a copy of these values to avoid double dereference when reading them | 179 | // Store a copy of these values to avoid double dereference when reading them |
| 192 | const SurfaceParams params; | 180 | const SurfaceParams params; |
| 193 | const VkImage image; | 181 | const VkImage image; |
| @@ -196,15 +184,18 @@ private: | |||
| 196 | 184 | ||
| 197 | const VKDevice& device; | 185 | const VKDevice& device; |
| 198 | CachedSurface& surface; | 186 | CachedSurface& surface; |
| 199 | const u32 base_layer; | ||
| 200 | const u32 num_layers; | ||
| 201 | const u32 base_level; | 187 | const u32 base_level; |
| 202 | const u32 num_levels; | 188 | const u32 num_levels; |
| 203 | const VkImageViewType image_view_type; | 189 | const VkImageViewType image_view_type; |
| 190 | u32 base_layer = 0; | ||
| 191 | u32 num_layers = 0; | ||
| 192 | u32 base_slice = 0; | ||
| 193 | u32 num_slices = 0; | ||
| 204 | 194 | ||
| 205 | VkImageView last_image_view = nullptr; | 195 | VkImageView last_image_view = nullptr; |
| 206 | u32 last_swizzle = 0; | 196 | u32 last_swizzle = 0; |
| 207 | 197 | ||
| 198 | vk::ImageView render_target; | ||
| 208 | std::unordered_map<u32, vk::ImageView> view_cache; | 199 | std::unordered_map<u32, vk::ImageView> view_cache; |
| 209 | }; | 200 | }; |
| 210 | 201 | ||
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 715f39d0d..94d3a6ae5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
| 248 | 248 | ||
| 249 | // Use an extra temporal buffer | 249 | // Use an extra temporal buffer |
| 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); | 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); |
| 251 | // Special case for 3D Texture Segments | ||
| 252 | const bool must_read_current_data = | ||
| 253 | params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; | ||
| 254 | tmp_buffer.resize(guest_memory_size); | 251 | tmp_buffer.resize(guest_memory_size); |
| 255 | host_ptr = tmp_buffer.data(); | 252 | host_ptr = tmp_buffer.data(); |
| 256 | if (must_read_current_data) { | 253 | |
| 254 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 255 | // Special case for 3D texture segments | ||
| 257 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | 256 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
| 258 | } | 257 | } |
| 259 | 258 | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 79e10ffbb..173f2edba 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -217,8 +217,8 @@ public: | |||
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | bool IsProtected() const { | 219 | bool IsProtected() const { |
| 220 | // Only 3D Slices are to be protected | 220 | // Only 3D slices are to be protected |
| 221 | return is_target && params.block_depth > 0; | 221 | return is_target && params.target == SurfaceTarget::Texture3D; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | bool IsRenderTarget() const { | 224 | bool IsRenderTarget() const { |
| @@ -250,6 +250,11 @@ public: | |||
| 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); |
| 251 | } | 251 | } |
| 252 | 252 | ||
| 253 | TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { | ||
| 254 | return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, | ||
| 255 | base_level, num_levels)); | ||
| 256 | } | ||
| 257 | |||
| 253 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | 258 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, |
| 254 | const GPUVAddr view_addr, | 259 | const GPUVAddr view_addr, |
| 255 | const std::size_t candidate_size, const u32 mipmap, | 260 | const std::size_t candidate_size, const u32 mipmap, |
| @@ -272,8 +277,8 @@ public: | |||
| 272 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | 277 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, |
| 273 | const std::size_t candidate_size) { | 278 | const std::size_t candidate_size) { |
| 274 | if (params.target == SurfaceTarget::Texture3D || | 279 | if (params.target == SurfaceTarget::Texture3D || |
| 275 | (params.num_levels == 1 && !params.is_layered) || | 280 | view_params.target == SurfaceTarget::Texture3D || |
| 276 | view_params.target == SurfaceTarget::Texture3D) { | 281 | (params.num_levels == 1 && !params.is_layered)) { |
| 277 | return {}; | 282 | return {}; |
| 278 | } | 283 | } |
| 279 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | 284 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 884fabffe..0b2b2b8c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz | |||
| 215 | params.num_levels = 1; | 215 | params.num_levels = 1; |
| 216 | params.emulated_levels = 1; | 216 | params.emulated_levels = 1; |
| 217 | 217 | ||
| 218 | const bool is_layered = config.layers > 1 && params.block_depth == 0; | 218 | if (config.memory_layout.is_3d != 0) { |
| 219 | params.is_layered = is_layered; | 219 | params.depth = config.layers.Value(); |
| 220 | params.depth = is_layered ? config.layers.Value() : 1; | 220 | params.is_layered = false; |
| 221 | params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | 221 | params.target = SurfaceTarget::Texture3D; |
| 222 | } else if (config.layers > 1) { | ||
| 223 | params.depth = config.layers.Value(); | ||
| 224 | params.is_layered = true; | ||
| 225 | params.target = SurfaceTarget::Texture2DArray; | ||
| 226 | } else { | ||
| 227 | params.depth = 1; | ||
| 228 | params.is_layered = false; | ||
| 229 | params.target = SurfaceTarget::Texture2D; | ||
| 230 | } | ||
| 222 | return params; | 231 | return params; |
| 223 | } | 232 | } |
| 224 | 233 | ||
| @@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | |||
| 237 | params.width = config.width; | 246 | params.width = config.width; |
| 238 | params.height = config.height; | 247 | params.height = config.height; |
| 239 | params.pitch = config.pitch; | 248 | params.pitch = config.pitch; |
| 240 | // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters | 249 | // TODO(Rodrigo): Try to guess texture arrays from parameters |
| 241 | params.target = SurfaceTarget::Texture2D; | 250 | params.target = SurfaceTarget::Texture2D; |
| 242 | params.depth = 1; | 251 | params.depth = 1; |
| 243 | params.num_levels = 1; | 252 | params.num_levels = 1; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6f63217a2..b543fc8c0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -298,15 +298,13 @@ public: | |||
| 298 | const GPUVAddr src_gpu_addr = src_config.Address(); | 298 | const GPUVAddr src_gpu_addr = src_config.Address(); |
| 299 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | 299 | const GPUVAddr dst_gpu_addr = dst_config.Address(); |
| 300 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | 300 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); |
| 301 | const std::optional<VAddr> dst_cpu_addr = | 301 | |
| 302 | system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); | 302 | const auto& memory_manager = system.GPU().MemoryManager(); |
| 303 | const std::optional<VAddr> src_cpu_addr = | 303 | const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); |
| 304 | system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); | 304 | const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); |
| 305 | std::pair<TSurface, TView> dst_surface = | 305 | std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); |
| 306 | GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); | 306 | TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; |
| 307 | std::pair<TSurface, TView> src_surface = | 307 | ImageBlit(src_surface, dst_surface.second, copy_config); |
| 308 | GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); | ||
| 309 | ImageBlit(src_surface.second, dst_surface.second, copy_config); | ||
| 310 | dst_surface.first->MarkAsModified(true, Tick()); | 308 | dst_surface.first->MarkAsModified(true, Tick()); |
| 311 | } | 309 | } |
| 312 | 310 | ||
| @@ -508,12 +506,12 @@ private: | |||
| 508 | return RecycleStrategy::Flush; | 506 | return RecycleStrategy::Flush; |
| 509 | } | 507 | } |
| 510 | // 3D Textures decision | 508 | // 3D Textures decision |
| 511 | if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { | 509 | if (params.target == SurfaceTarget::Texture3D) { |
| 512 | return RecycleStrategy::Flush; | 510 | return RecycleStrategy::Flush; |
| 513 | } | 511 | } |
| 514 | for (const auto& s : overlaps) { | 512 | for (const auto& s : overlaps) { |
| 515 | const auto& s_params = s->GetSurfaceParams(); | 513 | const auto& s_params = s->GetSurfaceParams(); |
| 516 | if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { | 514 | if (s_params.target == SurfaceTarget::Texture3D) { |
| 517 | return RecycleStrategy::Flush; | 515 | return RecycleStrategy::Flush; |
| 518 | } | 516 | } |
| 519 | } | 517 | } |
| @@ -731,51 +729,9 @@ private: | |||
| 731 | */ | 729 | */ |
| 732 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, | 730 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, |
| 733 | const SurfaceParams& params, | 731 | const SurfaceParams& params, |
| 734 | const GPUVAddr gpu_addr, | 732 | GPUVAddr gpu_addr, VAddr cpu_addr, |
| 735 | const VAddr cpu_addr, | ||
| 736 | bool preserve_contents) { | 733 | bool preserve_contents) { |
| 737 | if (params.target == SurfaceTarget::Texture3D) { | 734 | if (params.target != SurfaceTarget::Texture3D) { |
| 738 | bool failed = false; | ||
| 739 | if (params.num_levels > 1) { | ||
| 740 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 741 | return std::nullopt; | ||
| 742 | } | ||
| 743 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 744 | bool modified = false; | ||
| 745 | for (auto& surface : overlaps) { | ||
| 746 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 747 | if (src_params.target != SurfaceTarget::Texture2D) { | ||
| 748 | failed = true; | ||
| 749 | break; | ||
| 750 | } | ||
| 751 | if (src_params.height != params.height) { | ||
| 752 | failed = true; | ||
| 753 | break; | ||
| 754 | } | ||
| 755 | if (src_params.block_depth != params.block_depth || | ||
| 756 | src_params.block_height != params.block_height) { | ||
| 757 | failed = true; | ||
| 758 | break; | ||
| 759 | } | ||
| 760 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | ||
| 761 | const auto offsets = params.GetBlockOffsetXYZ(offset); | ||
| 762 | const auto z = std::get<2>(offsets); | ||
| 763 | modified |= surface->IsModified(); | ||
| 764 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | ||
| 765 | 1); | ||
| 766 | ImageCopy(surface, new_surface, copy_params); | ||
| 767 | } | ||
| 768 | if (failed) { | ||
| 769 | return std::nullopt; | ||
| 770 | } | ||
| 771 | for (const auto& surface : overlaps) { | ||
| 772 | Unregister(surface); | ||
| 773 | } | ||
| 774 | new_surface->MarkAsModified(modified, Tick()); | ||
| 775 | Register(new_surface); | ||
| 776 | auto view = new_surface->GetMainView(); | ||
| 777 | return {{std::move(new_surface), view}}; | ||
| 778 | } else { | ||
| 779 | for (const auto& surface : overlaps) { | 735 | for (const auto& surface : overlaps) { |
| 780 | if (!surface->MatchTarget(params.target)) { | 736 | if (!surface->MatchTarget(params.target)) { |
| 781 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | 737 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { |
| @@ -791,11 +747,60 @@ private: | |||
| 791 | continue; | 747 | continue; |
| 792 | } | 748 | } |
| 793 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | 749 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { |
| 794 | return {{surface, surface->GetMainView()}}; | 750 | return std::make_pair(surface, surface->GetMainView()); |
| 795 | } | 751 | } |
| 796 | } | 752 | } |
| 797 | return InitializeSurface(gpu_addr, params, preserve_contents); | 753 | return InitializeSurface(gpu_addr, params, preserve_contents); |
| 798 | } | 754 | } |
| 755 | |||
| 756 | if (params.num_levels > 1) { | ||
| 757 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 758 | return std::nullopt; | ||
| 759 | } | ||
| 760 | |||
| 761 | if (overlaps.size() == 1) { | ||
| 762 | const auto& surface = overlaps[0]; | ||
| 763 | const SurfaceParams& overlap_params = surface->GetSurfaceParams(); | ||
| 764 | // Don't attempt to render to textures with more than one level for now | ||
| 765 | // The texture has to be to the right or the sample address if we want to render to it | ||
| 766 | if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { | ||
| 767 | const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); | ||
| 768 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | ||
| 769 | if (slice < overlap_params.depth) { | ||
| 770 | auto view = surface->Emplace3DView(slice, params.depth, 0, 1); | ||
| 771 | return std::make_pair(std::move(surface), std::move(view)); | ||
| 772 | } | ||
| 773 | } | ||
| 774 | } | ||
| 775 | |||
| 776 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 777 | bool modified = false; | ||
| 778 | |||
| 779 | for (auto& surface : overlaps) { | ||
| 780 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 781 | if (src_params.target != SurfaceTarget::Texture2D || | ||
| 782 | src_params.height != params.height || | ||
| 783 | src_params.block_depth != params.block_depth || | ||
| 784 | src_params.block_height != params.block_height) { | ||
| 785 | return std::nullopt; | ||
| 786 | } | ||
| 787 | modified |= surface->IsModified(); | ||
| 788 | |||
| 789 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | ||
| 790 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | ||
| 791 | const u32 width = params.width; | ||
| 792 | const u32 height = params.height; | ||
| 793 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); | ||
| 794 | ImageCopy(surface, new_surface, copy_params); | ||
| 795 | } | ||
| 796 | for (const auto& surface : overlaps) { | ||
| 797 | Unregister(surface); | ||
| 798 | } | ||
| 799 | new_surface->MarkAsModified(modified, Tick()); | ||
| 800 | Register(new_surface); | ||
| 801 | |||
| 802 | TView view = new_surface->GetMainView(); | ||
| 803 | return std::make_pair(std::move(new_surface), std::move(view)); | ||
| 799 | } | 804 | } |
| 800 | 805 | ||
| 801 | /** | 806 | /** |
| @@ -873,7 +878,7 @@ private: | |||
| 873 | } | 878 | } |
| 874 | } | 879 | } |
| 875 | 880 | ||
| 876 | // Check if it's a 3D texture | 881 | // Manage 3D textures |
| 877 | if (params.block_depth > 0) { | 882 | if (params.block_depth > 0) { |
| 878 | auto surface = | 883 | auto surface = |
| 879 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); | 884 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); |