diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 134 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 13 |
2 files changed, 119 insertions, 28 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 801d45144..1bb842fe7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -336,20 +336,22 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d | |||
| 336 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | 336 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 337 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | 337 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
| 338 | 338 | ||
| 339 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 340 | // pixel values. | ||
| 341 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; | ||
| 342 | |||
| 339 | if (morton_to_gl) { | 343 | if (morton_to_gl) { |
| 340 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 341 | // pixel values. | ||
| 342 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; | ||
| 343 | const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( | 344 | const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( |
| 344 | addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth); | 345 | addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth); |
| 345 | const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; | 346 | const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; |
| 346 | memcpy(gl_buffer, data.data(), size_to_copy); | 347 | memcpy(gl_buffer, data.data(), size_to_copy); |
| 347 | } else { | 348 | } else { |
| 348 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should | 349 | std::vector<u8> data(height * stride * bytes_per_pixel); |
| 349 | // check the configuration for this and perform more generic un/swizzle | 350 | Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth, |
| 350 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); | 351 | bytes_per_pixel, bytes_per_pixel, data.data(), gl_buffer, |
| 351 | VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, | 352 | false, block_height, block_depth); |
| 352 | Memory::GetPointer(addr), gl_buffer, morton_to_gl); | 353 | const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; |
| 354 | memcpy(Memory::GetPointer(addr), data.data(), size_to_copy); | ||
| 353 | } | 355 | } |
| 354 | } | 356 | } |
| 355 | 357 | ||
| @@ -430,17 +432,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, | |||
| 430 | MortonCopy<false, PixelFormat::RGBA16UI>, | 432 | MortonCopy<false, PixelFormat::RGBA16UI>, |
| 431 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | 433 | MortonCopy<false, PixelFormat::R11FG11FB10F>, |
| 432 | MortonCopy<false, PixelFormat::RGBA32UI>, | 434 | MortonCopy<false, PixelFormat::RGBA32UI>, |
| 433 | // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4 | 435 | MortonCopy<false, PixelFormat::DXT1>, |
| 434 | // formats are not supported | 436 | MortonCopy<false, PixelFormat::DXT23>, |
| 435 | nullptr, | 437 | MortonCopy<false, PixelFormat::DXT45>, |
| 436 | nullptr, | 438 | MortonCopy<false, PixelFormat::DXN1>, |
| 437 | nullptr, | 439 | MortonCopy<false, PixelFormat::DXN2UNORM>, |
| 438 | nullptr, | 440 | MortonCopy<false, PixelFormat::DXN2SNORM>, |
| 439 | nullptr, | 441 | MortonCopy<false, PixelFormat::BC7U>, |
| 440 | nullptr, | 442 | MortonCopy<false, PixelFormat::BC6H_UF16>, |
| 441 | nullptr, | 443 | MortonCopy<false, PixelFormat::BC6H_SF16>, |
| 442 | nullptr, | 444 | // TODO(Subv): Swizzling ASTC formats are not supported |
| 443 | nullptr, | ||
| 444 | nullptr, | 445 | nullptr, |
| 445 | MortonCopy<false, PixelFormat::G8R8U>, | 446 | MortonCopy<false, PixelFormat::G8R8U>, |
| 446 | MortonCopy<false, PixelFormat::G8R8S>, | 447 | MortonCopy<false, PixelFormat::G8R8S>, |
| @@ -754,7 +755,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 754 | SurfaceParams::SurfaceTargetName(params.target)); | 755 | SurfaceParams::SurfaceTargetName(params.target)); |
| 755 | } | 756 | } |
| 756 | 757 | ||
| 757 | static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { | 758 | static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) { |
| 758 | union S8Z24 { | 759 | union S8Z24 { |
| 759 | BitField<0, 24, u32> z24; | 760 | BitField<0, 24, u32> z24; |
| 760 | BitField<24, 8, u32> s8; | 761 | BitField<24, 8, u32> s8; |
| @@ -767,16 +768,23 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { | |||
| 767 | }; | 768 | }; |
| 768 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | 769 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); |
| 769 | 770 | ||
| 770 | S8Z24 input_pixel{}; | 771 | S8Z24 s8z24_pixel{}; |
| 771 | Z24S8 output_pixel{}; | 772 | Z24S8 z24s8_pixel{}; |
| 772 | constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; | 773 | constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; |
| 773 | for (std::size_t y = 0; y < height; ++y) { | 774 | for (std::size_t y = 0; y < height; ++y) { |
| 774 | for (std::size_t x = 0; x < width; ++x) { | 775 | for (std::size_t x = 0; x < width; ++x) { |
| 775 | const std::size_t offset{bpp * (y * width + x)}; | 776 | const std::size_t offset{bpp * (y * width + x)}; |
| 776 | std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); | 777 | if (reverse) { |
| 777 | output_pixel.s8.Assign(input_pixel.s8); | 778 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); |
| 778 | output_pixel.z24.Assign(input_pixel.z24); | 779 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); |
| 779 | std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8)); | 780 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); |
| 781 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 782 | } else { | ||
| 783 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 784 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 785 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 786 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 787 | } | ||
| 780 | } | 788 | } |
| 781 | } | 789 | } |
| 782 | } | 790 | } |
| @@ -814,7 +822,7 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma | |||
| 814 | } | 822 | } |
| 815 | case PixelFormat::S8Z24: | 823 | case PixelFormat::S8Z24: |
| 816 | // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. | 824 | // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. |
| 817 | ConvertS8Z24ToZ24S8(data, width, height); | 825 | ConvertS8Z24ToZ24S8(data, width, height, false); |
| 818 | break; | 826 | break; |
| 819 | 827 | ||
| 820 | case PixelFormat::G8R8U: | 828 | case PixelFormat::G8R8U: |
| @@ -825,6 +833,30 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma | |||
| 825 | } | 833 | } |
| 826 | } | 834 | } |
| 827 | 835 | ||
| 836 | /** | ||
| 837 | * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to | ||
| 838 | * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or | ||
| 839 | * with typical desktop GPUs. | ||
| 840 | */ | ||
| 841 | static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||
| 842 | u32 width, u32 height) { | ||
| 843 | switch (pixel_format) { | ||
| 844 | case PixelFormat::G8R8U: | ||
| 845 | case PixelFormat::G8R8S: | ||
| 846 | case PixelFormat::ASTC_2D_4X4: | ||
| 847 | case PixelFormat::ASTC_2D_8X8: { | ||
| 848 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 849 | static_cast<u32>(pixel_format)); | ||
| 850 | UNREACHABLE(); | ||
| 851 | break; | ||
| 852 | } | ||
| 853 | case PixelFormat::S8Z24: | ||
| 854 | // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. | ||
| 855 | ConvertS8Z24ToZ24S8(data, width, height, true); | ||
| 856 | break; | ||
| 857 | } | ||
| 858 | } | ||
| 859 | |||
| 828 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); | 860 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); |
| 829 | void CachedSurface::LoadGLBuffer() { | 861 | void CachedSurface::LoadGLBuffer() { |
| 830 | ASSERT(params.type != SurfaceType::Fill); | 862 | ASSERT(params.type != SurfaceType::Fill); |
| @@ -864,11 +896,57 @@ void CachedSurface::LoadGLBuffer() { | |||
| 864 | } | 896 | } |
| 865 | 897 | ||
| 866 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); | 898 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); |
| 899 | |||
| 900 | dirty = false; | ||
| 867 | } | 901 | } |
| 868 | 902 | ||
| 869 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | 903 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); |
| 870 | void CachedSurface::FlushGLBuffer() { | 904 | void CachedSurface::FlushGLBuffer() { |
| 871 | ASSERT_MSG(false, "Unimplemented"); | 905 | MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); |
| 906 | const auto& rect{params.GetRect()}; | ||
| 907 | // Load data from memory to the surface | ||
| 908 | const GLint x0 = static_cast<GLint>(rect.left); | ||
| 909 | const GLint y0 = static_cast<GLint>(rect.bottom); | ||
| 910 | const size_t buffer_offset = | ||
| 911 | static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * | ||
| 912 | GetGLBytesPerPixel(params.pixel_format); | ||
| 913 | const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); | ||
| 914 | const u32 copy_size = params.width * params.height * bytes_per_pixel; | ||
| 915 | gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); | ||
| 916 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 917 | // Ensure no bad interactions with GL_UNPACK_ALIGNMENT | ||
| 918 | ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); | ||
| 919 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); | ||
| 920 | ASSERT(!tuple.compressed); | ||
| 921 | ASSERT(x0 == 0 && y0 == 0); | ||
| 922 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 923 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(), | ||
| 924 | gl_buffer.data()); | ||
| 925 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||
| 926 | ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width, | ||
| 927 | params.height); | ||
| 928 | ASSERT(params.type != SurfaceType::Fill); | ||
| 929 | const u8* const texture_src_data = Memory::GetPointer(params.addr); | ||
| 930 | ASSERT(texture_src_data); | ||
| 931 | if (params.is_tiled) { | ||
| 932 | u32 depth = params.depth; | ||
| 933 | u32 block_depth = params.block_depth; | ||
| 934 | |||
| 935 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||
| 936 | params.block_width, static_cast<u32>(params.target)); | ||
| 937 | |||
| 938 | if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { | ||
| 939 | // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. | ||
| 940 | depth = 1U; | ||
| 941 | block_depth = 1U; | ||
| 942 | } | ||
| 943 | gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( | ||
| 944 | params.width, params.block_height, params.height, block_depth, depth, | ||
| 945 | &gl_buffer[buffer_offset], copy_size, params.addr + buffer_offset); | ||
| 946 | } else { | ||
| 947 | Memory::WriteBlock(params.addr + buffer_offset, &gl_buffer[buffer_offset], | ||
| 948 | gl_buffer.size() - buffer_offset); | ||
| 949 | } | ||
| 872 | } | 950 | } |
| 873 | 951 | ||
| 874 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); | 952 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0b8ae3eb4..a15fb7b07 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -802,6 +802,18 @@ public: | |||
| 802 | return params.size_in_bytes_total; | 802 | return params.size_in_bytes_total; |
| 803 | } | 803 | } |
| 804 | 804 | ||
| 805 | void Flush() { | ||
| 806 | // There is no need to flush the surface if it hasn't been modified by us. | ||
| 807 | if (!dirty) | ||
| 808 | return; | ||
| 809 | FlushGLBuffer(); | ||
| 810 | dirty = false; | ||
| 811 | } | ||
| 812 | |||
| 813 | void MarkAsDirty() { | ||
| 814 | dirty = true; | ||
| 815 | } | ||
| 816 | |||
| 805 | const OGLTexture& Texture() const { | 817 | const OGLTexture& Texture() const { |
| 806 | return texture; | 818 | return texture; |
| 807 | } | 819 | } |
| @@ -833,6 +845,7 @@ private: | |||
| 833 | std::vector<u8> gl_buffer; | 845 | std::vector<u8> gl_buffer; |
| 834 | SurfaceParams params; | 846 | SurfaceParams params; |
| 835 | GLenum gl_target; | 847 | GLenum gl_target; |
| 848 | bool dirty = false; | ||
| 836 | }; | 849 | }; |
| 837 | 850 | ||
| 838 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | 851 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { |