diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 62 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 29 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 205 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 97 | ||||
| -rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 157 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 22 |
14 files changed, 379 insertions, 241 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 92acc57b1..21ccfe1f8 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -25,9 +25,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 25 | u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, | 25 | u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, |
| 26 | const MathUtil::Rectangle<int>& crop_rect) { | 26 | const MathUtil::Rectangle<int>& crop_rect) { |
| 27 | VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); | 27 | VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); |
| 28 | LOG_WARNING(Service, | 28 | LOG_TRACE(Service, |
| 29 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", | 29 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", |
| 30 | addr, offset, width, height, stride, format); | 30 | addr, offset, width, height, stride, format); |
| 31 | 31 | ||
| 32 | using PixelFormat = Tegra::FramebufferConfig::PixelFormat; | 32 | using PixelFormat = Tegra::FramebufferConfig::PixelFormat; |
| 33 | const Tegra::FramebufferConfig framebuffer{ | 33 | const Tegra::FramebufferConfig framebuffer{ |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 9f1533263..ec1a57226 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 21 | regs.reg_array[method_call.method] = method_call.argument; | 21 | regs.reg_array[method_call.method] = method_call.argument; |
| 22 | 22 | ||
| 23 | switch (method_call.method) { | 23 | switch (method_call.method) { |
| 24 | case FERMI2D_REG_INDEX(trigger): { | 24 | // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, |
| 25 | // so trigger on the second 32-bit write. | ||
| 26 | case FERMI2D_REG_INDEX(blit_src_y) + 1: { | ||
| 25 | HandleSurfaceCopy(); | 27 | HandleSurfaceCopy(); |
| 26 | break; | 28 | break; |
| 27 | } | 29 | } |
| @@ -32,57 +34,23 @@ void Fermi2D::HandleSurfaceCopy() { | |||
| 32 | LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", | 34 | LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", |
| 33 | static_cast<u32>(regs.operation)); | 35 | static_cast<u32>(regs.operation)); |
| 34 | 36 | ||
| 35 | const GPUVAddr source = regs.src.Address(); | ||
| 36 | const GPUVAddr dest = regs.dst.Address(); | ||
| 37 | |||
| 38 | // TODO(Subv): Only same-format and same-size copies are allowed for now. | ||
| 39 | ASSERT(regs.src.format == regs.dst.format); | ||
| 40 | ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); | ||
| 41 | |||
| 42 | // TODO(Subv): Only raw copies are implemented. | 37 | // TODO(Subv): Only raw copies are implemented. |
| 43 | ASSERT(regs.operation == Regs::Operation::SrcCopy); | 38 | ASSERT(regs.operation == Regs::Operation::SrcCopy); |
| 44 | 39 | ||
| 45 | const auto source_cpu = memory_manager.GpuToCpuAddress(source); | 40 | const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; |
| 46 | const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); | 41 | const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; |
| 47 | ASSERT_MSG(source_cpu, "Invalid source GPU address"); | 42 | const u32 src_blit_x2{ |
| 48 | ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); | 43 | static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; |
| 49 | 44 | const u32 src_blit_y2{ | |
| 50 | u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); | 45 | static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; |
| 51 | u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); | ||
| 52 | |||
| 53 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { | ||
| 54 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 55 | Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 56 | 46 | ||
| 57 | rasterizer.FlushRegion(*source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); | 47 | const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; |
| 58 | // We have to invalidate the destination region to evict any outdated surfaces from the | 48 | const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, |
| 59 | // cache. We do this before actually writing the new data because the destination address | 49 | regs.blit_dst_x + regs.blit_dst_width, |
| 60 | // might contain a dirty surface that will have to be written back to memory. | 50 | regs.blit_dst_y + regs.blit_dst_height}; |
| 61 | rasterizer.InvalidateRegion(*dest_cpu, | ||
| 62 | dst_bytes_per_pixel * regs.dst.width * regs.dst.height); | ||
| 63 | 51 | ||
| 64 | if (regs.src.linear == regs.dst.linear) { | 52 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { |
| 65 | // If the input layout and the output layout are the same, just perform a raw copy. | 53 | UNIMPLEMENTED(); |
| 66 | ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); | ||
| 67 | Memory::CopyBlock(*dest_cpu, *source_cpu, | ||
| 68 | src_bytes_per_pixel * regs.dst.width * regs.dst.height); | ||
| 69 | return; | ||
| 70 | } | ||
| 71 | u8* src_buffer = Memory::GetPointer(*source_cpu); | ||
| 72 | u8* dst_buffer = Memory::GetPointer(*dest_cpu); | ||
| 73 | if (!regs.src.linear && regs.dst.linear) { | ||
| 74 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | ||
| 75 | Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, | ||
| 76 | src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer, | ||
| 77 | dst_buffer, true, regs.src.BlockHeight(), | ||
| 78 | regs.src.BlockDepth(), 0); | ||
| 79 | } else { | ||
| 80 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | ||
| 81 | Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, | ||
| 82 | src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer, | ||
| 83 | src_buffer, false, regs.dst.BlockHeight(), | ||
| 84 | regs.dst.BlockDepth(), 0); | ||
| 85 | } | ||
| 86 | } | 54 | } |
| 87 | } | 55 | } |
| 88 | 56 | ||
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 50009bf75..c69f74cc5 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -94,12 +94,22 @@ public: | |||
| 94 | 94 | ||
| 95 | Operation operation; | 95 | Operation operation; |
| 96 | 96 | ||
| 97 | INSERT_PADDING_WORDS(0x9); | 97 | INSERT_PADDING_WORDS(0x177); |
| 98 | 98 | ||
| 99 | // TODO(Subv): This is only a guess. | 99 | u32 blit_control; |
| 100 | u32 trigger; | ||
| 101 | 100 | ||
| 102 | INSERT_PADDING_WORDS(0x1A3); | 101 | INSERT_PADDING_WORDS(0x8); |
| 102 | |||
| 103 | u32 blit_dst_x; | ||
| 104 | u32 blit_dst_y; | ||
| 105 | u32 blit_dst_width; | ||
| 106 | u32 blit_dst_height; | ||
| 107 | u64 blit_du_dx; | ||
| 108 | u64 blit_dv_dy; | ||
| 109 | u64 blit_src_x; | ||
| 110 | u64 blit_src_y; | ||
| 111 | |||
| 112 | INSERT_PADDING_WORDS(0x21); | ||
| 103 | }; | 113 | }; |
| 104 | std::array<u32, NUM_REGS> reg_array; | 114 | std::array<u32, NUM_REGS> reg_array; |
| 105 | }; | 115 | }; |
| @@ -122,7 +132,16 @@ private: | |||
| 122 | ASSERT_REG_POSITION(dst, 0x80); | 132 | ASSERT_REG_POSITION(dst, 0x80); |
| 123 | ASSERT_REG_POSITION(src, 0x8C); | 133 | ASSERT_REG_POSITION(src, 0x8C); |
| 124 | ASSERT_REG_POSITION(operation, 0xAB); | 134 | ASSERT_REG_POSITION(operation, 0xAB); |
| 125 | ASSERT_REG_POSITION(trigger, 0xB5); | 135 | ASSERT_REG_POSITION(blit_control, 0x223); |
| 136 | ASSERT_REG_POSITION(blit_dst_x, 0x22c); | ||
| 137 | ASSERT_REG_POSITION(blit_dst_y, 0x22d); | ||
| 138 | ASSERT_REG_POSITION(blit_dst_width, 0x22e); | ||
| 139 | ASSERT_REG_POSITION(blit_dst_height, 0x22f); | ||
| 140 | ASSERT_REG_POSITION(blit_du_dx, 0x230); | ||
| 141 | ASSERT_REG_POSITION(blit_dv_dy, 0x232); | ||
| 142 | ASSERT_REG_POSITION(blit_src_x, 0x234); | ||
| 143 | ASSERT_REG_POSITION(blit_src_y, 0x236); | ||
| 144 | |||
| 126 | #undef ASSERT_REG_POSITION | 145 | #undef ASSERT_REG_POSITION |
| 127 | 146 | ||
| 128 | } // namespace Tegra::Engines | 147 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 269df9437..1f425f90b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -186,7 +186,7 @@ enum class SubOp : u64 { | |||
| 186 | }; | 186 | }; |
| 187 | 187 | ||
| 188 | enum class F2iRoundingOp : u64 { | 188 | enum class F2iRoundingOp : u64 { |
| 189 | None = 0, | 189 | RoundEven = 0, |
| 190 | Floor = 1, | 190 | Floor = 1, |
| 191 | Ceil = 2, | 191 | Ceil = 2, |
| 192 | Trunc = 3, | 192 | Trunc = 3, |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 77da135a0..b2a223705 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -46,7 +46,9 @@ public: | |||
| 46 | 46 | ||
| 47 | /// Attempt to use a faster method to perform a surface copy | 47 | /// Attempt to use a faster method to perform a surface copy |
| 48 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 48 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 49 | const Tegra::Engines::Fermi2D::Regs::Surface& dst) { | 49 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 50 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 51 | const MathUtil::Rectangle<u32>& dst_rect) { | ||
| 50 | return false; | 52 | return false; |
| 51 | } | 53 | } |
| 52 | 54 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 974ca6a20..12d876120 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -778,15 +778,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 778 | } | 778 | } |
| 779 | 779 | ||
| 780 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 780 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 781 | const Tegra::Engines::Fermi2D::Regs::Surface& dst) { | 781 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 782 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 783 | const MathUtil::Rectangle<u32>& dst_rect) { | ||
| 782 | MICROPROFILE_SCOPE(OpenGL_Blits); | 784 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 783 | 785 | res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); | |
| 784 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 785 | // Skip the accelerated copy and perform a slow but more accurate copy | ||
| 786 | return false; | ||
| 787 | } | ||
| 788 | |||
| 789 | res_cache.FermiCopySurface(src, dst); | ||
| 790 | return true; | 786 | return true; |
| 791 | } | 787 | } |
| 792 | 788 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f3b607f4d..258d62259 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -61,7 +61,9 @@ public: | |||
| 61 | void InvalidateRegion(VAddr addr, u64 size) override; | 61 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 62 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 62 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 63 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 63 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 64 | const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; | 64 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 65 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 66 | const MathUtil::Rectangle<u32>& dst_rect) override; | ||
| 65 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 67 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 66 | u32 pixel_stride) override; | 68 | u32 pixel_stride) override; |
| 67 | bool AccelerateDrawBatch(bool is_indexed) override; | 69 | bool AccelerateDrawBatch(bool is_indexed) override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index a79eee03e..59f671048 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -125,6 +125,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | |||
| 125 | 125 | ||
| 126 | params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); | 126 | params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); |
| 127 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); | 127 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); |
| 128 | if (!params.is_tiled) { | ||
| 129 | params.pitch = config.tic.Pitch(); | ||
| 130 | } | ||
| 128 | params.unaligned_height = config.tic.Height(); | 131 | params.unaligned_height = config.tic.Height(); |
| 129 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); | 132 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); |
| 130 | params.identity = SurfaceClass::Uploaded; | 133 | params.identity = SurfaceClass::Uploaded; |
| @@ -191,7 +194,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | |||
| 191 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | 194 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; |
| 192 | params.component_type = ComponentTypeFromRenderTarget(config.format); | 195 | params.component_type = ComponentTypeFromRenderTarget(config.format); |
| 193 | params.type = GetFormatType(params.pixel_format); | 196 | params.type = GetFormatType(params.pixel_format); |
| 194 | params.width = config.width; | 197 | if (params.is_tiled) { |
| 198 | params.width = config.width; | ||
| 199 | } else { | ||
| 200 | params.pitch = config.width; | ||
| 201 | const u32 bpp = params.GetFormatBpp() / 8; | ||
| 202 | params.width = params.pitch / bpp; | ||
| 203 | } | ||
| 195 | params.height = config.height; | 204 | params.height = config.height; |
| 196 | params.unaligned_height = config.height; | 205 | params.unaligned_height = config.height; |
| 197 | params.target = SurfaceTarget::Texture2D; | 206 | params.target = SurfaceTarget::Texture2D; |
| @@ -428,7 +437,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, | |||
| 428 | } | 437 | } |
| 429 | } | 438 | } |
| 430 | 439 | ||
| 431 | static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { | 440 | void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface, |
| 441 | const Surface& dst_surface) { | ||
| 432 | const auto& src_params{src_surface->GetSurfaceParams()}; | 442 | const auto& src_params{src_surface->GetSurfaceParams()}; |
| 433 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | 443 | const auto& dst_params{dst_surface->GetSurfaceParams()}; |
| 434 | 444 | ||
| @@ -438,12 +448,15 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa | |||
| 438 | glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, | 448 | glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, |
| 439 | 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, | 449 | 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, |
| 440 | 0, 0, width, height, 1); | 450 | 0, 0, width, height, 1); |
| 451 | |||
| 452 | dst_surface->MarkAsModified(true, *this); | ||
| 441 | } | 453 | } |
| 442 | 454 | ||
| 443 | MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); | 455 | MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); |
| 444 | static void CopySurface(const Surface& src_surface, const Surface& dst_surface, | 456 | void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, |
| 445 | const GLuint copy_pbo_handle, const GLenum src_attachment = 0, | 457 | const GLuint copy_pbo_handle, const GLenum src_attachment, |
| 446 | const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) { | 458 | const GLenum dst_attachment, |
| 459 | const std::size_t cubemap_face) { | ||
| 447 | MICROPROFILE_SCOPE(OpenGL_CopySurface); | 460 | MICROPROFILE_SCOPE(OpenGL_CopySurface); |
| 448 | ASSERT_MSG(dst_attachment == 0, "Unimplemented"); | 461 | ASSERT_MSG(dst_attachment == 0, "Unimplemented"); |
| 449 | 462 | ||
| @@ -523,6 +536,8 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface, | |||
| 523 | } | 536 | } |
| 524 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | 537 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); |
| 525 | } | 538 | } |
| 539 | |||
| 540 | dst_surface->MarkAsModified(true, *this); | ||
| 526 | } | 541 | } |
| 527 | 542 | ||
| 528 | CachedSurface::CachedSurface(const SurfaceParams& params) | 543 | CachedSurface::CachedSurface(const SurfaceParams& params) |
| @@ -688,9 +703,20 @@ void CachedSurface::LoadGLBuffer() { | |||
| 688 | for (u32 i = 0; i < params.max_mip_level; i++) | 703 | for (u32 i = 0; i < params.max_mip_level; i++) |
| 689 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); | 704 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); |
| 690 | } else { | 705 | } else { |
| 691 | const auto texture_src_data{Memory::GetPointer(params.addr)}; | 706 | const u32 bpp = params.GetFormatBpp() / 8; |
| 692 | const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; | 707 | const u32 copy_size = params.width * bpp; |
| 693 | gl_buffer[0].assign(texture_src_data, texture_src_data_end); | 708 | if (params.pitch == copy_size) { |
| 709 | std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr), | ||
| 710 | params.size_in_bytes_gl); | ||
| 711 | } else { | ||
| 712 | const u8* start = Memory::GetPointer(params.addr); | ||
| 713 | u8* write_to = gl_buffer[0].data(); | ||
| 714 | for (u32 h = params.height; h > 0; h--) { | ||
| 715 | std::memcpy(write_to, start, copy_size); | ||
| 716 | start += params.pitch; | ||
| 717 | write_to += copy_size; | ||
| 718 | } | ||
| 719 | } | ||
| 694 | } | 720 | } |
| 695 | for (u32 i = 0; i < params.max_mip_level; i++) { | 721 | for (u32 i = 0; i < params.max_mip_level; i++) { |
| 696 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), | 722 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), |
| @@ -727,7 +753,19 @@ void CachedSurface::FlushGLBuffer() { | |||
| 727 | 753 | ||
| 728 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); | 754 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); |
| 729 | } else { | 755 | } else { |
| 730 | std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); | 756 | const u32 bpp = params.GetFormatBpp() / 8; |
| 757 | const u32 copy_size = params.width * bpp; | ||
| 758 | if (params.pitch == copy_size) { | ||
| 759 | std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes()); | ||
| 760 | } else { | ||
| 761 | u8* start = Memory::GetPointer(params.addr); | ||
| 762 | const u8* read_to = gl_buffer[0].data(); | ||
| 763 | for (u32 h = params.height; h > 0; h--) { | ||
| 764 | std::memcpy(start, read_to, copy_size); | ||
| 765 | start += params.pitch; | ||
| 766 | read_to += copy_size; | ||
| 767 | } | ||
| 768 | } | ||
| 731 | } | 769 | } |
| 732 | } | 770 | } |
| 733 | 771 | ||
| @@ -853,8 +891,8 @@ void CachedSurface::EnsureTextureView() { | |||
| 853 | constexpr GLuint min_level = 0; | 891 | constexpr GLuint min_level = 0; |
| 854 | 892 | ||
| 855 | glGenTextures(1, &texture_view.handle); | 893 | glGenTextures(1, &texture_view.handle); |
| 856 | glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0, | 894 | glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, |
| 857 | params.max_mip_level, 0, 1); | 895 | params.max_mip_level, min_layer, num_layers); |
| 858 | ApplyTextureDefaults(texture_view.handle, params.max_mip_level); | 896 | ApplyTextureDefaults(texture_view.handle, params.max_mip_level); |
| 859 | glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, | 897 | glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, |
| 860 | reinterpret_cast<const GLint*>(swizzle.data())); | 898 | reinterpret_cast<const GLint*>(swizzle.data())); |
| @@ -1019,26 +1057,161 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | |||
| 1019 | } | 1057 | } |
| 1020 | address += layer_size; | 1058 | address += layer_size; |
| 1021 | } | 1059 | } |
| 1060 | |||
| 1061 | dst_surface->MarkAsModified(true, *this); | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 1065 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 1066 | const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, | ||
| 1067 | GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, | ||
| 1068 | std::size_t cubemap_face = 0) { | ||
| 1069 | |||
| 1070 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 1071 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 1072 | |||
| 1073 | OpenGLState prev_state{OpenGLState::GetCurState()}; | ||
| 1074 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 1075 | |||
| 1076 | OpenGLState state; | ||
| 1077 | state.draw.read_framebuffer = read_fb_handle; | ||
| 1078 | state.draw.draw_framebuffer = draw_fb_handle; | ||
| 1079 | state.Apply(); | ||
| 1080 | |||
| 1081 | u32 buffers{}; | ||
| 1082 | |||
| 1083 | if (src_params.type == SurfaceType::ColorTexture) { | ||
| 1084 | switch (src_params.target) { | ||
| 1085 | case SurfaceTarget::Texture2D: | ||
| 1086 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1087 | GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||
| 1088 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1089 | 0, 0); | ||
| 1090 | break; | ||
| 1091 | case SurfaceTarget::TextureCubemap: | ||
| 1092 | glFramebufferTexture2D( | ||
| 1093 | GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1094 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||
| 1095 | src_surface->Texture().handle, 0); | ||
| 1096 | glFramebufferTexture2D( | ||
| 1097 | GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1098 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||
| 1099 | break; | ||
| 1100 | case SurfaceTarget::Texture2DArray: | ||
| 1101 | glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1102 | src_surface->Texture().handle, 0, 0); | ||
| 1103 | glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||
| 1104 | break; | ||
| 1105 | case SurfaceTarget::Texture3D: | ||
| 1106 | glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1107 | SurfaceTargetToGL(src_params.target), | ||
| 1108 | src_surface->Texture().handle, 0, 0); | ||
| 1109 | glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1110 | SurfaceTargetToGL(src_params.target), 0, 0, 0); | ||
| 1111 | break; | ||
| 1112 | default: | ||
| 1113 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1114 | GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||
| 1115 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1116 | 0, 0); | ||
| 1117 | break; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | switch (dst_params.target) { | ||
| 1121 | case SurfaceTarget::Texture2D: | ||
| 1122 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1123 | GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||
| 1124 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1125 | 0, 0); | ||
| 1126 | break; | ||
| 1127 | case SurfaceTarget::TextureCubemap: | ||
| 1128 | glFramebufferTexture2D( | ||
| 1129 | GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1130 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||
| 1131 | dst_surface->Texture().handle, 0); | ||
| 1132 | glFramebufferTexture2D( | ||
| 1133 | GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1134 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||
| 1135 | break; | ||
| 1136 | case SurfaceTarget::Texture2DArray: | ||
| 1137 | glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1138 | dst_surface->Texture().handle, 0, 0); | ||
| 1139 | glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||
| 1140 | break; | ||
| 1141 | |||
| 1142 | case SurfaceTarget::Texture3D: | ||
| 1143 | glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1144 | SurfaceTargetToGL(dst_params.target), | ||
| 1145 | dst_surface->Texture().handle, 0, 0); | ||
| 1146 | glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1147 | SurfaceTargetToGL(dst_params.target), 0, 0, 0); | ||
| 1148 | break; | ||
| 1149 | default: | ||
| 1150 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1151 | GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||
| 1152 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1153 | 0, 0); | ||
| 1154 | break; | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 1158 | } else if (src_params.type == SurfaceType::Depth) { | ||
| 1159 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1160 | GL_TEXTURE_2D, 0, 0); | ||
| 1161 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1162 | src_surface->Texture().handle, 0); | ||
| 1163 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 1164 | |||
| 1165 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1166 | GL_TEXTURE_2D, 0, 0); | ||
| 1167 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1168 | dst_surface->Texture().handle, 0); | ||
| 1169 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 1170 | |||
| 1171 | buffers = GL_DEPTH_BUFFER_BIT; | ||
| 1172 | } else if (src_params.type == SurfaceType::DepthStencil) { | ||
| 1173 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1174 | GL_TEXTURE_2D, 0, 0); | ||
| 1175 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1176 | src_surface->Texture().handle, 0); | ||
| 1177 | |||
| 1178 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1179 | GL_TEXTURE_2D, 0, 0); | ||
| 1180 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1181 | dst_surface->Texture().handle, 0); | ||
| 1182 | |||
| 1183 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, | ||
| 1187 | dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, | ||
| 1188 | buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); | ||
| 1189 | |||
| 1190 | return true; | ||
| 1022 | } | 1191 | } |
| 1023 | 1192 | ||
| 1024 | void RasterizerCacheOpenGL::FermiCopySurface( | 1193 | void RasterizerCacheOpenGL::FermiCopySurface( |
| 1025 | const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | 1194 | const Tegra::Engines::Fermi2D::Regs::Surface& src_config, |
| 1026 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { | 1195 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, |
| 1196 | const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { | ||
| 1027 | 1197 | ||
| 1028 | const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); | 1198 | const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); |
| 1029 | const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); | 1199 | const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); |
| 1030 | 1200 | ||
| 1031 | ASSERT(src_params.width == dst_params.width); | ||
| 1032 | ASSERT(src_params.height == dst_params.height); | ||
| 1033 | ASSERT(src_params.pixel_format == dst_params.pixel_format); | 1201 | ASSERT(src_params.pixel_format == dst_params.pixel_format); |
| 1034 | ASSERT(src_params.block_height == dst_params.block_height); | 1202 | ASSERT(src_params.block_height == dst_params.block_height); |
| 1035 | ASSERT(src_params.is_tiled == dst_params.is_tiled); | 1203 | ASSERT(src_params.is_tiled == dst_params.is_tiled); |
| 1036 | ASSERT(src_params.depth == dst_params.depth); | 1204 | ASSERT(src_params.depth == dst_params.depth); |
| 1037 | ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces | ||
| 1038 | ASSERT(src_params.target == dst_params.target); | 1205 | ASSERT(src_params.target == dst_params.target); |
| 1039 | ASSERT(src_params.rt.index == dst_params.rt.index); | 1206 | ASSERT(src_params.rt.index == dst_params.rt.index); |
| 1040 | 1207 | ||
| 1041 | FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); | 1208 | auto src_surface = GetSurface(src_params, true); |
| 1209 | auto dst_surface = GetSurface(dst_params, true); | ||
| 1210 | |||
| 1211 | BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle, | ||
| 1212 | draw_framebuffer.handle); | ||
| 1213 | |||
| 1214 | dst_surface->MarkAsModified(true, *this); | ||
| 1042 | } | 1215 | } |
| 1043 | 1216 | ||
| 1044 | void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, | 1217 | void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 490b8252e..b81882d04 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <map> | 8 | #include <map> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <string> | 10 | #include <string> |
| 11 | #include <unordered_set> | ||
| 11 | #include <vector> | 12 | #include <vector> |
| 12 | 13 | ||
| 13 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| @@ -272,6 +273,7 @@ struct SurfaceParams { | |||
| 272 | u32 height; | 273 | u32 height; |
| 273 | u32 depth; | 274 | u32 depth; |
| 274 | u32 unaligned_height; | 275 | u32 unaligned_height; |
| 276 | u32 pitch; | ||
| 275 | SurfaceTarget target; | 277 | SurfaceTarget target; |
| 276 | SurfaceClass identity; | 278 | SurfaceClass identity; |
| 277 | u32 max_mip_level; | 279 | u32 max_mip_level; |
| @@ -421,7 +423,9 @@ public: | |||
| 421 | 423 | ||
| 422 | /// Copies the contents of one surface to another | 424 | /// Copies the contents of one surface to another |
| 423 | void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | 425 | void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, |
| 424 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); | 426 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, |
| 427 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 428 | const MathUtil::Rectangle<u32>& dst_rect); | ||
| 425 | 429 | ||
| 426 | private: | 430 | private: |
| 427 | void LoadSurface(const Surface& surface); | 431 | void LoadSurface(const Surface& surface); |
| @@ -442,6 +446,10 @@ private: | |||
| 442 | /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data | 446 | /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data |
| 443 | void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); | 447 | void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); |
| 444 | void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); | 448 | void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); |
| 449 | void FastCopySurface(const Surface& src_surface, const Surface& dst_surface); | ||
| 450 | void CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 451 | const GLuint copy_pbo_handle, const GLenum src_attachment = 0, | ||
| 452 | const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0); | ||
| 445 | 453 | ||
| 446 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | 454 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have |
| 447 | /// previously been used. This is to prevent surfaces from being constantly created and | 455 | /// previously been used. This is to prevent surfaces from being constantly created and |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 70e124dc4..b39bb4843 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -719,45 +719,51 @@ private: | |||
| 719 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | 719 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 720 | 720 | ||
| 721 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 721 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 722 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 723 | ASSERT(meta); | 722 | ASSERT(meta); |
| 724 | 723 | ||
| 724 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 725 | const bool has_array = meta->sampler.IsArray(); | ||
| 726 | const bool has_shadow = meta->sampler.IsShadow(); | ||
| 727 | |||
| 725 | std::string expr = func; | 728 | std::string expr = func; |
| 726 | expr += '('; | 729 | expr += '('; |
| 727 | expr += GetSampler(meta->sampler); | 730 | expr += GetSampler(meta->sampler); |
| 728 | expr += ", "; | 731 | expr += ", "; |
| 729 | 732 | ||
| 730 | expr += coord_constructors[meta->coords_count - 1]; | 733 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); |
| 731 | expr += '('; | 734 | expr += '('; |
| 732 | for (u32 i = 0; i < count; ++i) { | 735 | for (u32 i = 0; i < count; ++i) { |
| 733 | const bool is_extra = i >= meta->coords_count; | 736 | expr += Visit(operation[i]); |
| 734 | const bool is_array = i == meta->array_index; | ||
| 735 | |||
| 736 | std::string operand = [&]() { | ||
| 737 | if (is_extra && is_extra_int) { | ||
| 738 | if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) { | ||
| 739 | return std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 740 | } else { | ||
| 741 | return "ftoi(" + Visit(operation[i]) + ')'; | ||
| 742 | } | ||
| 743 | } else { | ||
| 744 | return Visit(operation[i]); | ||
| 745 | } | ||
| 746 | }(); | ||
| 747 | if (is_array) { | ||
| 748 | ASSERT(!is_extra); | ||
| 749 | operand = "float(ftoi(" + operand + "))"; | ||
| 750 | } | ||
| 751 | |||
| 752 | expr += operand; | ||
| 753 | 737 | ||
| 754 | if (i + 1 == meta->coords_count) { | 738 | const u32 next = i + 1; |
| 755 | expr += ')'; | 739 | if (next < count || has_array || has_shadow) |
| 756 | } | 740 | expr += ", "; |
| 757 | if (i + 1 < count) { | 741 | } |
| 742 | if (has_array) { | ||
| 743 | expr += "float(ftoi(" + Visit(meta->array) + "))"; | ||
| 744 | } | ||
| 745 | if (has_shadow) { | ||
| 746 | if (has_array) | ||
| 758 | expr += ", "; | 747 | expr += ", "; |
| 748 | expr += Visit(meta->depth_compare); | ||
| 749 | } | ||
| 750 | expr += ')'; | ||
| 751 | |||
| 752 | for (const Node extra : meta->extras) { | ||
| 753 | expr += ", "; | ||
| 754 | if (is_extra_int) { | ||
| 755 | if (const auto immediate = std::get_if<ImmediateNode>(extra)) { | ||
| 756 | // Inline the string as an immediate integer in GLSL (some extra arguments are | ||
| 757 | // required to be constant) | ||
| 758 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 759 | } else { | ||
| 760 | expr += "ftoi(" + Visit(extra) + ')'; | ||
| 761 | } | ||
| 762 | } else { | ||
| 763 | expr += Visit(extra); | ||
| 759 | } | 764 | } |
| 760 | } | 765 | } |
| 766 | |||
| 761 | expr += ')'; | 767 | expr += ')'; |
| 762 | return expr; | 768 | return expr; |
| 763 | } | 769 | } |
| @@ -1134,7 +1140,7 @@ private: | |||
| 1134 | Type::HalfFloat); | 1140 | Type::HalfFloat); |
| 1135 | } | 1141 | } |
| 1136 | 1142 | ||
| 1137 | std::string F4Texture(Operation operation) { | 1143 | std::string Texture(Operation operation) { |
| 1138 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1144 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1139 | ASSERT(meta); | 1145 | ASSERT(meta); |
| 1140 | 1146 | ||
| @@ -1145,7 +1151,7 @@ private: | |||
| 1145 | return expr + GetSwizzle(meta->element); | 1151 | return expr + GetSwizzle(meta->element); |
| 1146 | } | 1152 | } |
| 1147 | 1153 | ||
| 1148 | std::string F4TextureLod(Operation operation) { | 1154 | std::string TextureLod(Operation operation) { |
| 1149 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1155 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1150 | ASSERT(meta); | 1156 | ASSERT(meta); |
| 1151 | 1157 | ||
| @@ -1156,7 +1162,7 @@ private: | |||
| 1156 | return expr + GetSwizzle(meta->element); | 1162 | return expr + GetSwizzle(meta->element); |
| 1157 | } | 1163 | } |
| 1158 | 1164 | ||
| 1159 | std::string F4TextureGather(Operation operation) { | 1165 | std::string TextureGather(Operation operation) { |
| 1160 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1166 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1161 | ASSERT(meta); | 1167 | ASSERT(meta); |
| 1162 | 1168 | ||
| @@ -1164,7 +1170,7 @@ private: | |||
| 1164 | GetSwizzle(meta->element); | 1170 | GetSwizzle(meta->element); |
| 1165 | } | 1171 | } |
| 1166 | 1172 | ||
| 1167 | std::string F4TextureQueryDimensions(Operation operation) { | 1173 | std::string TextureQueryDimensions(Operation operation) { |
| 1168 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1174 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1169 | ASSERT(meta); | 1175 | ASSERT(meta); |
| 1170 | 1176 | ||
| @@ -1184,7 +1190,7 @@ private: | |||
| 1184 | return "0"; | 1190 | return "0"; |
| 1185 | } | 1191 | } |
| 1186 | 1192 | ||
| 1187 | std::string F4TextureQueryLod(Operation operation) { | 1193 | std::string TextureQueryLod(Operation operation) { |
| 1188 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1194 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1189 | ASSERT(meta); | 1195 | ASSERT(meta); |
| 1190 | 1196 | ||
| @@ -1195,29 +1201,32 @@ private: | |||
| 1195 | return "0"; | 1201 | return "0"; |
| 1196 | } | 1202 | } |
| 1197 | 1203 | ||
| 1198 | std::string F4TexelFetch(Operation operation) { | 1204 | std::string TexelFetch(Operation operation) { |
| 1199 | constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; | 1205 | constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; |
| 1200 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1206 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1201 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 1202 | ASSERT(meta); | 1207 | ASSERT(meta); |
| 1208 | UNIMPLEMENTED_IF(meta->sampler.IsArray()); | ||
| 1209 | UNIMPLEMENTED_IF(!meta->extras.empty()); | ||
| 1210 | |||
| 1211 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 1203 | 1212 | ||
| 1204 | std::string expr = "texelFetch("; | 1213 | std::string expr = "texelFetch("; |
| 1205 | expr += GetSampler(meta->sampler); | 1214 | expr += GetSampler(meta->sampler); |
| 1206 | expr += ", "; | 1215 | expr += ", "; |
| 1207 | 1216 | ||
| 1208 | expr += constructors[meta->coords_count - 1]; | 1217 | expr += constructors.at(count - 1); |
| 1209 | expr += '('; | 1218 | expr += '('; |
| 1210 | for (u32 i = 0; i < count; ++i) { | 1219 | for (u32 i = 0; i < count; ++i) { |
| 1211 | expr += VisitOperand(operation, i, Type::Int); | 1220 | expr += VisitOperand(operation, i, Type::Int); |
| 1212 | 1221 | ||
| 1213 | if (i + 1 == meta->coords_count) { | 1222 | const u32 next = i + 1; |
| 1223 | if (next == count) | ||
| 1214 | expr += ')'; | 1224 | expr += ')'; |
| 1215 | } | 1225 | if (next < count) |
| 1216 | if (i + 1 < count) { | ||
| 1217 | expr += ", "; | 1226 | expr += ", "; |
| 1218 | } | ||
| 1219 | } | 1227 | } |
| 1220 | expr += ')'; | 1228 | expr += ')'; |
| 1229 | |||
| 1221 | return expr + GetSwizzle(meta->element); | 1230 | return expr + GetSwizzle(meta->element); |
| 1222 | } | 1231 | } |
| 1223 | 1232 | ||
| @@ -1454,12 +1463,12 @@ private: | |||
| 1454 | &GLSLDecompiler::Logical2HNotEqual, | 1463 | &GLSLDecompiler::Logical2HNotEqual, |
| 1455 | &GLSLDecompiler::Logical2HGreaterEqual, | 1464 | &GLSLDecompiler::Logical2HGreaterEqual, |
| 1456 | 1465 | ||
| 1457 | &GLSLDecompiler::F4Texture, | 1466 | &GLSLDecompiler::Texture, |
| 1458 | &GLSLDecompiler::F4TextureLod, | 1467 | &GLSLDecompiler::TextureLod, |
| 1459 | &GLSLDecompiler::F4TextureGather, | 1468 | &GLSLDecompiler::TextureGather, |
| 1460 | &GLSLDecompiler::F4TextureQueryDimensions, | 1469 | &GLSLDecompiler::TextureQueryDimensions, |
| 1461 | &GLSLDecompiler::F4TextureQueryLod, | 1470 | &GLSLDecompiler::TextureQueryLod, |
| 1462 | &GLSLDecompiler::F4TexelFetch, | 1471 | &GLSLDecompiler::TexelFetch, |
| 1463 | 1472 | ||
| 1464 | &GLSLDecompiler::Branch, | 1473 | &GLSLDecompiler::Branch, |
| 1465 | &GLSLDecompiler::PushFlowStack, | 1474 | &GLSLDecompiler::PushFlowStack, |
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 38bb692d6..9fd4b273e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp | |||
| @@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | |||
| 41 | 41 | ||
| 42 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | 42 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); |
| 43 | 43 | ||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | 44 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
| 45 | SetRegister(bb, instr.gpr0, value); | 45 | SetRegister(bb, instr.gpr0, value); |
| 46 | break; | 46 | break; |
| 47 | } | 47 | } |
| @@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod | |||
| 284 | SetRegister(bb, dest, value); | 284 | SetRegister(bb, dest, value); |
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | } // namespace VideoCommon::Shader \ No newline at end of file | 287 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index a992f73f8..55a6fbbf2 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 118 | 118 | ||
| 119 | value = [&]() { | 119 | value = [&]() { |
| 120 | switch (instr.conversion.f2i.rounding) { | 120 | switch (instr.conversion.f2i.rounding) { |
| 121 | case Tegra::Shader::F2iRoundingOp::None: | 121 | case Tegra::Shader::F2iRoundingOp::RoundEven: |
| 122 | return value; | 122 | return Operation(OperationCode::FRoundEven, PRECISE, value); |
| 123 | case Tegra::Shader::F2iRoundingOp::Floor: | 123 | case Tegra::Shader::F2iRoundingOp::Floor: |
| 124 | return Operation(OperationCode::FFloor, PRECISE, value); | 124 | return Operation(OperationCode::FFloor, PRECISE, value); |
| 125 | case Tegra::Shader::F2iRoundingOp::Ceil: | 125 | case Tegra::Shader::F2iRoundingOp::Ceil: |
| @@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 146 | return pc; | 146 | return pc; |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | } // namespace VideoCommon::Shader \ No newline at end of file | 149 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e006f8138..523421794 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -306,7 +306,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 306 | case OpCode::Id::TLD4S: { | 306 | case OpCode::Id::TLD4S: { |
| 307 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | 307 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), |
| 308 | "AOFFI is not implemented"); | 308 | "AOFFI is not implemented"); |
| 309 | |||
| 310 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | 309 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { |
| 311 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | 310 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); |
| 312 | } | 311 | } |
| @@ -315,9 +314,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 315 | const Node op_a = GetRegister(instr.gpr8); | 314 | const Node op_a = GetRegister(instr.gpr8); |
| 316 | const Node op_b = GetRegister(instr.gpr20); | 315 | const Node op_b = GetRegister(instr.gpr20); |
| 317 | 316 | ||
| 318 | std::vector<Node> coords; | ||
| 319 | |||
| 320 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | 317 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. |
| 318 | std::vector<Node> coords; | ||
| 321 | if (depth_compare) { | 319 | if (depth_compare) { |
| 322 | // Note: TLD4S coordinate encoding works just like TEXS's | 320 | // Note: TLD4S coordinate encoding works just like TEXS's |
| 323 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | 321 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); |
| @@ -328,18 +326,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 328 | coords.push_back(op_a); | 326 | coords.push_back(op_a); |
| 329 | coords.push_back(op_b); | 327 | coords.push_back(op_b); |
| 330 | } | 328 | } |
| 331 | const auto num_coords = static_cast<u32>(coords.size()); | 329 | std::vector<Node> extras; |
| 332 | coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | 330 | extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); |
| 333 | 331 | ||
| 334 | const auto& sampler = | 332 | const auto& sampler = |
| 335 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | 333 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); |
| 336 | 334 | ||
| 337 | Node4 values; | 335 | Node4 values; |
| 338 | for (u32 element = 0; element < values.size(); ++element) { | 336 | for (u32 element = 0; element < values.size(); ++element) { |
| 339 | auto params = coords; | 337 | auto coords_copy = coords; |
| 340 | MetaTexture meta{sampler, element, num_coords}; | 338 | MetaTexture meta{sampler, {}, {}, extras, element}; |
| 341 | values[element] = | 339 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 342 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 343 | } | 340 | } |
| 344 | 341 | ||
| 345 | WriteTexsInstructionFloat(bb, instr, values); | 342 | WriteTexsInstructionFloat(bb, instr, values); |
| @@ -360,12 +357,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 360 | switch (instr.txq.query_type) { | 357 | switch (instr.txq.query_type) { |
| 361 | case Tegra::Shader::TextureQueryType::Dimension: { | 358 | case Tegra::Shader::TextureQueryType::Dimension: { |
| 362 | for (u32 element = 0; element < 4; ++element) { | 359 | for (u32 element = 0; element < 4; ++element) { |
| 363 | if (instr.txq.IsComponentEnabled(element)) { | 360 | if (!instr.txq.IsComponentEnabled(element)) { |
| 364 | MetaTexture meta{sampler, element}; | 361 | continue; |
| 365 | const Node value = Operation(OperationCode::F4TextureQueryDimensions, | ||
| 366 | std::move(meta), GetRegister(instr.gpr8)); | ||
| 367 | SetTemporal(bb, indexer++, value); | ||
| 368 | } | 362 | } |
| 363 | MetaTexture meta{sampler, {}, {}, {}, element}; | ||
| 364 | const Node value = | ||
| 365 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||
| 366 | SetTemporal(bb, indexer++, value); | ||
| 369 | } | 367 | } |
| 370 | for (u32 i = 0; i < indexer; ++i) { | 368 | for (u32 i = 0; i < indexer; ++i) { |
| 371 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | 369 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
| @@ -412,9 +410,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 412 | 410 | ||
| 413 | for (u32 element = 0; element < 2; ++element) { | 411 | for (u32 element = 0; element < 2; ++element) { |
| 414 | auto params = coords; | 412 | auto params = coords; |
| 415 | MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; | 413 | MetaTexture meta{sampler, {}, {}, {}, element}; |
| 416 | const Node value = | 414 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 417 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); | ||
| 418 | SetTemporal(bb, element, value); | 415 | SetTemporal(bb, element, value); |
| 419 | } | 416 | } |
| 420 | for (u32 element = 0; element < 2; ++element) { | 417 | for (u32 element = 0; element < 2; ++element) { |
| @@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 535 | } | 532 | } |
| 536 | 533 | ||
| 537 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 534 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| 538 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | 535 | TextureProcessMode process_mode, std::vector<Node> coords, |
| 539 | std::size_t array_offset, std::size_t bias_offset, | 536 | Node array, Node depth_compare, u32 bias_offset) { |
| 540 | std::vector<Node>&& coords) { | 537 | const bool is_array = array; |
| 541 | UNIMPLEMENTED_IF_MSG( | 538 | const bool is_shadow = depth_compare; |
| 542 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || | ||
| 543 | (texture_type == TextureType::TextureCube && is_array && depth_compare), | ||
| 544 | "This method is not supported."); | ||
| 545 | 539 | ||
| 546 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 540 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || |
| 541 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | ||
| 542 | "This method is not supported."); | ||
| 543 | |||
| 544 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); | ||
| 547 | 545 | ||
| 548 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | 546 | const bool lod_needed = process_mode == TextureProcessMode::LZ || |
| 549 | process_mode == TextureProcessMode::LL || | 547 | process_mode == TextureProcessMode::LL || |
| @@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 552 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | 550 | // LOD selection (either via bias or explicit textureLod) not supported in GL for |
| 553 | // sampler2DArrayShadow and samplerCubeArrayShadow. | 551 | // sampler2DArrayShadow and samplerCubeArrayShadow. |
| 554 | const bool gl_lod_supported = | 552 | const bool gl_lod_supported = |
| 555 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | 553 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || |
| 556 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | 554 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); |
| 557 | 555 | ||
| 558 | const OperationCode read_method = | 556 | const OperationCode read_method = |
| 559 | lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; | 557 | lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture; |
| 560 | 558 | ||
| 561 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | 559 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); |
| 562 | 560 | ||
| 563 | std::optional<u32> array_offset_value; | 561 | std::vector<Node> extras; |
| 564 | if (is_array) | ||
| 565 | array_offset_value = static_cast<u32>(array_offset); | ||
| 566 | |||
| 567 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 568 | |||
| 569 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | 562 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { |
| 570 | if (process_mode == TextureProcessMode::LZ) { | 563 | if (process_mode == TextureProcessMode::LZ) { |
| 571 | coords.push_back(Immediate(0.0f)); | 564 | extras.push_back(Immediate(0.0f)); |
| 572 | } else { | 565 | } else { |
| 573 | // If present, lod or bias are always stored in the register indexed by the gpr20 | 566 | // If present, lod or bias are always stored in the register indexed by the gpr20 |
| 574 | // field with an offset depending on the usage of the other registers | 567 | // field with an offset depending on the usage of the other registers |
| 575 | coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | 568 | extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); |
| 576 | } | 569 | } |
| 577 | } | 570 | } |
| 578 | 571 | ||
| 579 | Node4 values; | 572 | Node4 values; |
| 580 | for (u32 element = 0; element < values.size(); ++element) { | 573 | for (u32 element = 0; element < values.size(); ++element) { |
| 581 | auto params = coords; | 574 | auto copy_coords = coords; |
| 582 | MetaTexture meta{sampler, element, coords_count, array_offset_value}; | 575 | MetaTexture meta{sampler, array, depth_compare, extras, element}; |
| 583 | values[element] = Operation(read_method, std::move(meta), std::move(params)); | 576 | values[element] = Operation(read_method, meta, std::move(copy_coords)); |
| 584 | } | 577 | } |
| 585 | 578 | ||
| 586 | return values; | 579 | return values; |
| @@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 602 | for (std::size_t i = 0; i < coord_count; ++i) { | 595 | for (std::size_t i = 0; i < coord_count; ++i) { |
| 603 | coords.push_back(GetRegister(coord_register + i)); | 596 | coords.push_back(GetRegister(coord_register + i)); |
| 604 | } | 597 | } |
| 605 | // 1D.DC in opengl the 2nd component is ignored. | 598 | // 1D.DC in OpenGL the 2nd component is ignored. |
| 606 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | 599 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { |
| 607 | coords.push_back(Immediate(0.0f)); | 600 | coords.push_back(Immediate(0.0f)); |
| 608 | } | 601 | } |
| 609 | std::size_t array_offset{}; | 602 | |
| 610 | if (is_array) { | 603 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 611 | array_offset = coords.size(); | 604 | |
| 612 | coords.push_back(GetRegister(array_register)); | 605 | Node dc{}; |
| 613 | } | ||
| 614 | if (depth_compare) { | 606 | if (depth_compare) { |
| 615 | // Depth is always stored in the register signaled by gpr20 | 607 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod |
| 616 | // or in the next register if lod or bias are used | 608 | // or bias are used |
| 617 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | 609 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
| 618 | coords.push_back(GetRegister(depth_register)); | 610 | dc = GetRegister(depth_register); |
| 619 | } | ||
| 620 | // Fill ignored coordinates | ||
| 621 | while (coords.size() < total_coord_count) { | ||
| 622 | coords.push_back(Immediate(0)); | ||
| 623 | } | 611 | } |
| 624 | 612 | ||
| 625 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | 613 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); |
| 626 | 0, std::move(coords)); | ||
| 627 | } | 614 | } |
| 628 | 615 | ||
| 629 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | 616 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
| @@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 641 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | 628 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) |
| 642 | ? static_cast<u64>(instr.gpr20.Value()) | 629 | ? static_cast<u64>(instr.gpr20.Value()) |
| 643 | : coord_register + 1; | 630 | : coord_register + 1; |
| 631 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 644 | 632 | ||
| 645 | std::vector<Node> coords; | 633 | std::vector<Node> coords; |
| 646 | for (std::size_t i = 0; i < coord_count; ++i) { | 634 | for (std::size_t i = 0; i < coord_count; ++i) { |
| @@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 648 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | 636 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
| 649 | } | 637 | } |
| 650 | 638 | ||
| 651 | std::size_t array_offset{}; | 639 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 652 | if (is_array) { | 640 | |
| 653 | array_offset = coords.size(); | 641 | Node dc{}; |
| 654 | coords.push_back(GetRegister(array_register)); | ||
| 655 | } | ||
| 656 | if (depth_compare) { | 642 | if (depth_compare) { |
| 657 | // Depth is always stored in the register signaled by gpr20 | 643 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod |
| 658 | // or in the next register if lod or bias are used | 644 | // or bias are used |
| 659 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | 645 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
| 660 | coords.push_back(GetRegister(depth_register)); | 646 | dc = GetRegister(depth_register); |
| 661 | } | ||
| 662 | // Fill ignored coordinates | ||
| 663 | while (coords.size() < total_coord_count) { | ||
| 664 | coords.push_back(Immediate(0)); | ||
| 665 | } | 647 | } |
| 666 | 648 | ||
| 667 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | 649 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); |
| 668 | (coord_count > 2 ? 1 : 0), std::move(coords)); | ||
| 669 | } | 650 | } |
| 670 | 651 | ||
| 671 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | 652 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
| @@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 680 | const u64 coord_register = array_register + (is_array ? 1 : 0); | 661 | const u64 coord_register = array_register + (is_array ? 1 : 0); |
| 681 | 662 | ||
| 682 | std::vector<Node> coords; | 663 | std::vector<Node> coords; |
| 683 | 664 | for (size_t i = 0; i < coord_count; ++i) | |
| 684 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 685 | coords.push_back(GetRegister(coord_register + i)); | 665 | coords.push_back(GetRegister(coord_register + i)); |
| 686 | } | ||
| 687 | std::optional<u32> array_offset; | ||
| 688 | if (is_array) { | ||
| 689 | array_offset = static_cast<u32>(coords.size()); | ||
| 690 | coords.push_back(GetRegister(array_register)); | ||
| 691 | } | ||
| 692 | 666 | ||
| 693 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 667 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 694 | 668 | ||
| 695 | Node4 values; | 669 | Node4 values; |
| 696 | for (u32 element = 0; element < values.size(); ++element) { | 670 | for (u32 element = 0; element < values.size(); ++element) { |
| 697 | auto params = coords; | 671 | auto coords_copy = coords; |
| 698 | MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; | 672 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element}; |
| 699 | values[element] = | 673 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 700 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 701 | } | 674 | } |
| 702 | 675 | ||
| 703 | return values; | 676 | return values; |
| @@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 705 | 678 | ||
| 706 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | 679 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
| 707 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 680 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 708 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); | ||
| 709 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 681 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
| 710 | 682 | ||
| 711 | // If enabled arrays index is always stored in the gpr8 field | 683 | // If enabled arrays index is always stored in the gpr8 field |
| @@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 719 | : coord_register + 1; | 691 | : coord_register + 1; |
| 720 | 692 | ||
| 721 | std::vector<Node> coords; | 693 | std::vector<Node> coords; |
| 722 | |||
| 723 | for (std::size_t i = 0; i < type_coord_count; ++i) { | 694 | for (std::size_t i = 0; i < type_coord_count; ++i) { |
| 724 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | 695 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); |
| 725 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | 696 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
| 726 | } | 697 | } |
| 727 | std::optional<u32> array_offset; | ||
| 728 | if (is_array) { | ||
| 729 | array_offset = static_cast<u32>(coords.size()); | ||
| 730 | coords.push_back(GetRegister(array_register)); | ||
| 731 | } | ||
| 732 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 733 | 698 | ||
| 734 | if (lod_enabled) { | 699 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 735 | // When lod is used always is in grp20 | 700 | // When lod is used always is in gpr20 |
| 736 | coords.push_back(GetRegister(instr.gpr20)); | 701 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 737 | } else { | ||
| 738 | coords.push_back(Immediate(0)); | ||
| 739 | } | ||
| 740 | 702 | ||
| 741 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 703 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
| 742 | 704 | ||
| 743 | Node4 values; | 705 | Node4 values; |
| 744 | for (u32 element = 0; element < values.size(); ++element) { | 706 | for (u32 element = 0; element < values.size(); ++element) { |
| 745 | auto params = coords; | 707 | auto coords_copy = coords; |
| 746 | MetaTexture meta{sampler, element, coords_count, array_offset}; | 708 | MetaTexture meta{sampler, array, {}, {lod}, element}; |
| 747 | values[element] = | 709 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 748 | Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | ||
| 749 | } | 710 | } |
| 750 | return values; | 711 | return values; |
| 751 | } | 712 | } |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1d4fbef53..52c7f2c4e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -156,12 +156,12 @@ enum class OperationCode { | |||
| 156 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 156 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 157 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 157 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 158 | 158 | ||
| 159 | F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | 159 | Texture, /// (MetaTexture, float[N] coords) -> float4 |
| 160 | F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | 160 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 |
| 161 | F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | 161 | TextureGather, /// (MetaTexture, float[N] coords) -> float4 |
| 162 | F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | 162 | TextureQueryDimensions, /// (MetaTexture, float a) -> float4 |
| 163 | F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | 163 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
| 164 | F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 | 164 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
| 165 | 165 | ||
| 166 | Branch, /// (uint branch_target) -> void | 166 | Branch, /// (uint branch_target) -> void |
| 167 | PushFlowStack, /// (uint branch_target) -> void | 167 | PushFlowStack, /// (uint branch_target) -> void |
| @@ -288,9 +288,10 @@ struct MetaHalfArithmetic { | |||
| 288 | 288 | ||
| 289 | struct MetaTexture { | 289 | struct MetaTexture { |
| 290 | const Sampler& sampler; | 290 | const Sampler& sampler; |
| 291 | Node array{}; | ||
| 292 | Node depth_compare{}; | ||
| 293 | std::vector<Node> extras; | ||
| 291 | u32 element{}; | 294 | u32 element{}; |
| 292 | u32 coords_count{}; | ||
| 293 | std::optional<u32> array_index; | ||
| 294 | }; | 295 | }; |
| 295 | 296 | ||
| 296 | constexpr MetaArithmetic PRECISE = {true}; | 297 | constexpr MetaArithmetic PRECISE = {true}; |
| @@ -754,9 +755,8 @@ private: | |||
| 754 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | 755 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |
| 755 | 756 | ||
| 756 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 757 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 757 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 758 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, |
| 758 | bool is_array, std::size_t array_offset, std::size_t bias_offset, | 759 | Node array, Node depth_compare, u32 bias_offset); |
| 759 | std::vector<Node>&& coords); | ||
| 760 | 760 | ||
| 761 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | 761 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
| 762 | u64 byte_height); | 762 | u64 byte_height); |