diff options
| author | 2019-03-06 21:56:20 -0500 | |
|---|---|---|
| committer | 2019-03-06 21:56:20 -0500 | |
| commit | 076c76f4e41602f94c06e23f5d1f1ff1cddca95b (patch) | |
| tree | b24cdf1f99ae12647dab27acec7fd0e00efdf506 | |
| parent | Merge pull request #2197 from lioncash/include (diff) | |
| parent | gl_rasterizer_cache: Move format conversion to its own file (diff) | |
| download | yuzu-076c76f4e41602f94c06e23f5d1f1ff1cddca95b.tar.gz yuzu-076c76f4e41602f94c06e23f5d1f1ff1cddca95b.tar.xz yuzu-076c76f4e41602f94c06e23f5d1f1ff1cddca95b.zip | |
Merge pull request #2149 from ReinUsesLisp/decoders-style
gl_rasterizer_cache: Move format conversion functions to their own file
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 115 | ||||
| -rw-r--r-- | src/video_core/textures/astc.cpp | 80 | ||||
| -rw-r--r-- | src/video_core/textures/astc.h | 2 | ||||
| -rw-r--r-- | src/video_core/textures/convert.cpp | 92 | ||||
| -rw-r--r-- | src/video_core/textures/convert.h | 18 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 18 |
8 files changed, 183 insertions, 150 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e9d2b3be..c1ae83f4d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -94,6 +94,8 @@ add_library(video_core STATIC | |||
| 94 | surface.h | 94 | surface.h |
| 95 | textures/astc.cpp | 95 | textures/astc.cpp |
| 96 | textures/astc.h | 96 | textures/astc.h |
| 97 | textures/convert.cpp | ||
| 98 | textures/convert.h | ||
| 97 | textures/decoders.cpp | 99 | textures/decoders.cpp |
| 98 | textures/decoders.h | 100 | textures/decoders.h |
| 99 | textures/texture.h | 101 | textures/texture.h |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index b5a9722f9..876698b37 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 22 | #include "video_core/renderer_opengl/utils.h" | 22 | #include "video_core/renderer_opengl/utils.h" |
| 23 | #include "video_core/surface.h" | 23 | #include "video_core/surface.h" |
| 24 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/convert.h" |
| 25 | #include "video_core/textures/decoders.h" | 25 | #include "video_core/textures/decoders.h" |
| 26 | 26 | ||
| 27 | namespace OpenGL { | 27 | namespace OpenGL { |
| @@ -597,103 +597,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 597 | } | 597 | } |
| 598 | } | 598 | } |
| 599 | 599 | ||
| 600 | static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) { | ||
| 601 | union S8Z24 { | ||
| 602 | BitField<0, 24, u32> z24; | ||
| 603 | BitField<24, 8, u32> s8; | ||
| 604 | }; | ||
| 605 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 606 | |||
| 607 | union Z24S8 { | ||
| 608 | BitField<0, 8, u32> s8; | ||
| 609 | BitField<8, 24, u32> z24; | ||
| 610 | }; | ||
| 611 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 612 | |||
| 613 | S8Z24 s8z24_pixel{}; | ||
| 614 | Z24S8 z24s8_pixel{}; | ||
| 615 | constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)}; | ||
| 616 | for (std::size_t y = 0; y < height; ++y) { | ||
| 617 | for (std::size_t x = 0; x < width; ++x) { | ||
| 618 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 619 | if (reverse) { | ||
| 620 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 621 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 622 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 623 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 624 | } else { | ||
| 625 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 626 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 627 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 628 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 629 | } | ||
| 630 | } | ||
| 631 | } | ||
| 632 | } | ||
| 633 | |||
| 634 | /** | ||
| 635 | * Helper function to perform software conversion (as needed) when loading a buffer from Switch | ||
| 636 | * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with | ||
| 637 | * typical desktop GPUs. | ||
| 638 | */ | ||
| 639 | static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||
| 640 | u32 width, u32 height, u32 depth) { | ||
| 641 | switch (pixel_format) { | ||
| 642 | case PixelFormat::ASTC_2D_4X4: | ||
| 643 | case PixelFormat::ASTC_2D_8X8: | ||
| 644 | case PixelFormat::ASTC_2D_8X5: | ||
| 645 | case PixelFormat::ASTC_2D_5X4: | ||
| 646 | case PixelFormat::ASTC_2D_5X5: | ||
| 647 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 648 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 649 | case PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 650 | case PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 651 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 652 | case PixelFormat::ASTC_2D_10X8: | ||
| 653 | case PixelFormat::ASTC_2D_10X8_SRGB: { | ||
| 654 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 655 | u32 block_width{}; | ||
| 656 | u32 block_height{}; | ||
| 657 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 658 | data = | ||
| 659 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | ||
| 660 | break; | ||
| 661 | } | ||
| 662 | case PixelFormat::S8Z24: | ||
| 663 | // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. | ||
| 664 | ConvertS8Z24ToZ24S8(data, width, height, false); | ||
| 665 | break; | ||
| 666 | } | ||
| 667 | } | ||
| 668 | |||
| 669 | /** | ||
| 670 | * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to | ||
| 671 | * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or | ||
| 672 | * with typical desktop GPUs. | ||
| 673 | */ | ||
| 674 | static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||
| 675 | u32 width, u32 height) { | ||
| 676 | switch (pixel_format) { | ||
| 677 | case PixelFormat::ASTC_2D_4X4: | ||
| 678 | case PixelFormat::ASTC_2D_8X8: | ||
| 679 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 680 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 681 | case PixelFormat::ASTC_2D_5X5: | ||
| 682 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 683 | case PixelFormat::ASTC_2D_10X8: | ||
| 684 | case PixelFormat::ASTC_2D_10X8_SRGB: { | ||
| 685 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 686 | static_cast<u32>(pixel_format)); | ||
| 687 | UNREACHABLE(); | ||
| 688 | break; | ||
| 689 | } | ||
| 690 | case PixelFormat::S8Z24: | ||
| 691 | // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. | ||
| 692 | ConvertS8Z24ToZ24S8(data, width, height, true); | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | } | ||
| 696 | |||
| 697 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | 600 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); |
| 698 | void CachedSurface::LoadGLBuffer() { | 601 | void CachedSurface::LoadGLBuffer() { |
| 699 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | 602 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); |
| @@ -722,8 +625,16 @@ void CachedSurface::LoadGLBuffer() { | |||
| 722 | } | 625 | } |
| 723 | } | 626 | } |
| 724 | for (u32 i = 0; i < params.max_mip_level; i++) { | 627 | for (u32 i = 0; i < params.max_mip_level; i++) { |
| 725 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), | 628 | const u32 width = params.MipWidth(i); |
| 726 | params.MipHeight(i), params.MipDepth(i)); | 629 | const u32 height = params.MipHeight(i); |
| 630 | const u32 depth = params.MipDepth(i); | ||
| 631 | if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { | ||
| 632 | // Reserve size for RGBA8 conversion | ||
| 633 | constexpr std::size_t rgba_bpp = 4; | ||
| 634 | gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); | ||
| 635 | } | ||
| 636 | Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, | ||
| 637 | height, depth, true, true); | ||
| 727 | } | 638 | } |
| 728 | } | 639 | } |
| 729 | 640 | ||
| @@ -746,8 +657,8 @@ void CachedSurface::FlushGLBuffer() { | |||
| 746 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, | 657 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, |
| 747 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); | 658 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); |
| 748 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | 659 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); |
| 749 | ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, | 660 | Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, |
| 750 | params.height); | 661 | params.height, params.depth, true, true); |
| 751 | const u8* const texture_src_data = Memory::GetPointer(params.addr); | 662 | const u8* const texture_src_data = Memory::GetPointer(params.addr); |
| 752 | ASSERT(texture_src_data); | 663 | ASSERT(texture_src_data); |
| 753 | if (params.is_tiled) { | 664 | if (params.is_tiled) { |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index bc50a4876..b508d64e9 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -23,28 +23,12 @@ | |||
| 23 | 23 | ||
| 24 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/astc.h" |
| 25 | 25 | ||
| 26 | class BitStream { | 26 | class InputBitStream { |
| 27 | public: | 27 | public: |
| 28 | explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | 28 | explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0) |
| 29 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 29 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 30 | 30 | ||
| 31 | ~BitStream() = default; | 31 | ~InputBitStream() = default; |
| 32 | |||
| 33 | int GetBitsWritten() const { | ||
| 34 | return m_BitsWritten; | ||
| 35 | } | ||
| 36 | |||
| 37 | void WriteBitsR(unsigned int val, unsigned int nBits) { | ||
| 38 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 39 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | void WriteBits(unsigned int val, unsigned int nBits) { | ||
| 44 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 45 | WriteBit((val >> i) & 1); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | 32 | ||
| 49 | int GetBitsRead() const { | 33 | int GetBitsRead() const { |
| 50 | return m_BitsRead; | 34 | return m_BitsRead; |
| @@ -71,6 +55,38 @@ public: | |||
| 71 | } | 55 | } |
| 72 | 56 | ||
| 73 | private: | 57 | private: |
| 58 | const int m_NumBits; | ||
| 59 | const unsigned char* m_CurByte; | ||
| 60 | int m_NextBit = 0; | ||
| 61 | int m_BitsRead = 0; | ||
| 62 | |||
| 63 | bool done = false; | ||
| 64 | }; | ||
| 65 | |||
| 66 | class OutputBitStream { | ||
| 67 | public: | ||
| 68 | explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | ||
| 69 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | ||
| 70 | |||
| 71 | ~OutputBitStream() = default; | ||
| 72 | |||
| 73 | int GetBitsWritten() const { | ||
| 74 | return m_BitsWritten; | ||
| 75 | } | ||
| 76 | |||
| 77 | void WriteBitsR(unsigned int val, unsigned int nBits) { | ||
| 78 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 79 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | void WriteBits(unsigned int val, unsigned int nBits) { | ||
| 84 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 85 | WriteBit((val >> i) & 1); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | private: | ||
| 74 | void WriteBit(int b) { | 90 | void WriteBit(int b) { |
| 75 | 91 | ||
| 76 | if (done) | 92 | if (done) |
| @@ -238,8 +254,8 @@ public: | |||
| 238 | // Fills result with the values that are encoded in the given | 254 | // Fills result with the values that are encoded in the given |
| 239 | // bitstream. We must know beforehand what the maximum possible | 255 | // bitstream. We must know beforehand what the maximum possible |
| 240 | // value is, and how many values we're decoding. | 256 | // value is, and how many values we're decoding. |
| 241 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, | 257 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, |
| 242 | uint32_t maxRange, uint32_t nValues) { | 258 | InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { |
| 243 | // Determine encoding parameters | 259 | // Determine encoding parameters |
| 244 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); | 260 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); |
| 245 | 261 | ||
| @@ -267,7 +283,7 @@ public: | |||
| 267 | } | 283 | } |
| 268 | 284 | ||
| 269 | private: | 285 | private: |
| 270 | static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, | 286 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 271 | uint32_t nBitsPerValue) { | 287 | uint32_t nBitsPerValue) { |
| 272 | // Implement the algorithm in section C.2.12 | 288 | // Implement the algorithm in section C.2.12 |
| 273 | uint32_t m[5]; | 289 | uint32_t m[5]; |
| @@ -327,7 +343,7 @@ private: | |||
| 327 | } | 343 | } |
| 328 | } | 344 | } |
| 329 | 345 | ||
| 330 | static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, | 346 | static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 331 | uint32_t nBitsPerValue) { | 347 | uint32_t nBitsPerValue) { |
| 332 | // Implement the algorithm in section C.2.12 | 348 | // Implement the algorithm in section C.2.12 |
| 333 | uint32_t m[3]; | 349 | uint32_t m[3]; |
| @@ -406,7 +422,7 @@ struct TexelWeightParams { | |||
| 406 | } | 422 | } |
| 407 | }; | 423 | }; |
| 408 | 424 | ||
| 409 | static TexelWeightParams DecodeBlockInfo(BitStream& strm) { | 425 | static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { |
| 410 | TexelWeightParams params; | 426 | TexelWeightParams params; |
| 411 | 427 | ||
| 412 | // Read the entire block mode all at once | 428 | // Read the entire block mode all at once |
| @@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) { | |||
| 605 | return params; | 621 | return params; |
| 606 | } | 622 | } |
| 607 | 623 | ||
| 608 | static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, | 624 | static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, |
| 609 | uint32_t blockHeight) { | 625 | uint32_t blockHeight) { |
| 610 | // Don't actually care about the void extent, just read the bits... | 626 | // Don't actually care about the void extent, just read the bits... |
| 611 | for (int i = 0; i < 4; ++i) { | 627 | for (int i = 0; i < 4; ++i) { |
| @@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 821 | 837 | ||
| 822 | // We now have enough to decode our integer sequence. | 838 | // We now have enough to decode our integer sequence. |
| 823 | std::vector<IntegerEncodedValue> decodedColorValues; | 839 | std::vector<IntegerEncodedValue> decodedColorValues; |
| 824 | BitStream colorStream(data); | 840 | InputBitStream colorStream(data); |
| 825 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 841 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| 826 | 842 | ||
| 827 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | 843 | // Once we have the decoded values, we need to dequantize them to the 0-255 range |
| @@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1365 | #undef READ_INT_VALUES | 1381 | #undef READ_INT_VALUES |
| 1366 | } | 1382 | } |
| 1367 | 1383 | ||
| 1368 | static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | 1384 | static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, |
| 1369 | const uint32_t blockHeight, uint32_t* outBuf) { | 1385 | const uint32_t blockHeight, uint32_t* outBuf) { |
| 1370 | BitStream strm(inBuf); | 1386 | InputBitStream strm(inBuf); |
| 1371 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1387 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1372 | 1388 | ||
| 1373 | // Was there an error? | 1389 | // Was there an error? |
| @@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1421 | // Define color data. | 1437 | // Define color data. |
| 1422 | uint8_t colorEndpointData[16]; | 1438 | uint8_t colorEndpointData[16]; |
| 1423 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); | 1439 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); |
| 1424 | BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); | 1440 | OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); |
| 1425 | 1441 | ||
| 1426 | // Read extra config data... | 1442 | // Read extra config data... |
| 1427 | uint32_t baseCEM = 0; | 1443 | uint32_t baseCEM = 0; |
| @@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1549 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1565 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1550 | 1566 | ||
| 1551 | std::vector<IntegerEncodedValue> texelWeightValues; | 1567 | std::vector<IntegerEncodedValue> texelWeightValues; |
| 1552 | BitStream weightStream(texelWeightData); | 1568 | InputBitStream weightStream(texelWeightData); |
| 1553 | 1569 | ||
| 1554 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, | 1570 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, |
| 1555 | weightParams.m_MaxWeight, | 1571 | weightParams.m_MaxWeight, |
| @@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1597 | 1613 | ||
| 1598 | namespace Tegra::Texture::ASTC { | 1614 | namespace Tegra::Texture::ASTC { |
| 1599 | 1615 | ||
| 1600 | std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, | 1616 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, |
| 1601 | uint32_t depth, uint32_t block_width, uint32_t block_height) { | 1617 | uint32_t depth, uint32_t block_width, uint32_t block_height) { |
| 1602 | uint32_t blockIdx = 0; | 1618 | uint32_t blockIdx = 0; |
| 1603 | std::vector<uint8_t> outData(height * width * depth * 4); | 1619 | std::vector<uint8_t> outData(height * width * depth * 4); |
| @@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint | |||
| 1605 | for (uint32_t j = 0; j < height; j += block_height) { | 1621 | for (uint32_t j = 0; j < height; j += block_height) { |
| 1606 | for (uint32_t i = 0; i < width; i += block_width) { | 1622 | for (uint32_t i = 0; i < width; i += block_width) { |
| 1607 | 1623 | ||
| 1608 | uint8_t* blockPtr = data.data() + blockIdx * 16; | 1624 | const uint8_t* blockPtr = data + blockIdx * 16; |
| 1609 | 1625 | ||
| 1610 | // Blocks can be at most 12x12 | 1626 | // Blocks can be at most 12x12 |
| 1611 | uint32_t uncompData[144]; | 1627 | uint32_t uncompData[144]; |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index d419dd025..991cdba72 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | namespace Tegra::Texture::ASTC { | 10 | namespace Tegra::Texture::ASTC { |
| 11 | 11 | ||
| 12 | std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, | 12 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, |
| 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); | 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); |
| 14 | 14 | ||
| 15 | } // namespace Tegra::Texture::ASTC | 15 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp new file mode 100644 index 000000000..5e439f036 --- /dev/null +++ b/src/video_core/textures/convert.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstring> | ||
| 7 | #include <tuple> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/textures/astc.h" | ||
| 14 | #include "video_core/textures/convert.h" | ||
| 15 | |||
| 16 | namespace Tegra::Texture { | ||
| 17 | |||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | template <bool reverse> | ||
| 21 | void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 22 | union S8Z24 { | ||
| 23 | BitField<0, 24, u32> z24; | ||
| 24 | BitField<24, 8, u32> s8; | ||
| 25 | }; | ||
| 26 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 27 | |||
| 28 | union Z24S8 { | ||
| 29 | BitField<0, 8, u32> s8; | ||
| 30 | BitField<8, 24, u32> z24; | ||
| 31 | }; | ||
| 32 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 33 | |||
| 34 | S8Z24 s8z24_pixel{}; | ||
| 35 | Z24S8 z24s8_pixel{}; | ||
| 36 | constexpr auto bpp{ | ||
| 37 | VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)}; | ||
| 38 | for (std::size_t y = 0; y < height; ++y) { | ||
| 39 | for (std::size_t x = 0; x < width; ++x) { | ||
| 40 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 41 | if constexpr (reverse) { | ||
| 42 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 43 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 44 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 45 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 46 | } else { | ||
| 47 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 48 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 49 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 50 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 57 | SwapS8Z24ToZ24S8<false>(data, width, height); | ||
| 58 | } | ||
| 59 | |||
| 60 | static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | ||
| 61 | SwapS8Z24ToZ24S8<true>(data, width, height); | ||
| 62 | } | ||
| 63 | |||
| 64 | void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 65 | bool convert_astc, bool convert_s8z24) { | ||
| 66 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 67 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 68 | u32 block_width{}; | ||
| 69 | u32 block_height{}; | ||
| 70 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 71 | const std::vector<u8> rgba8_data = | ||
| 72 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | ||
| 73 | std::copy(rgba8_data.begin(), rgba8_data.end(), data); | ||
| 74 | |||
| 75 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | ||
| 76 | Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 81 | bool convert_astc, bool convert_s8z24) { | ||
| 82 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 83 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 84 | static_cast<u32>(pixel_format)); | ||
| 85 | UNREACHABLE(); | ||
| 86 | |||
| 87 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | ||
| 88 | Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Tegra::Texture \ No newline at end of file | ||
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h new file mode 100644 index 000000000..07cd8b5da --- /dev/null +++ b/src/video_core/textures/convert.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | |||
| 10 | namespace Tegra::Texture { | ||
| 11 | |||
| 12 | void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 13 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 14 | |||
| 15 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 16 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 17 | |||
| 18 | } // namespace Tegra::Texture \ No newline at end of file | ||
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 5db75de22..cad7340f5 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const | |||
| 103 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; | 103 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; |
| 104 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | 104 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; |
| 105 | const u32 pixel_index{out_x + pixel_base}; | 105 | const u32 pixel_index{out_x + pixel_base}; |
| 106 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | 106 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; |
| 107 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | 107 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; |
| 108 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); | 108 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); |
| 109 | } | 109 | } |
| 110 | pixel_base += stride_x; | 110 | pixel_base += stride_x; |
| @@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 154 | for (u32 xb = 0; xb < blocks_on_x; xb++) { | 154 | for (u32 xb = 0; xb < blocks_on_x; xb++) { |
| 155 | const u32 x_start = xb * block_x_elements; | 155 | const u32 x_start = xb * block_x_elements; |
| 156 | const u32 x_end = std::min(width, x_start + block_x_elements); | 156 | const u32 x_end = std::min(width, x_start + block_x_elements); |
| 157 | if (fast) { | 157 | if constexpr (fast) { |
| 158 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | 158 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, |
| 159 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | 159 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, |
| 160 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | 160 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 85b7e9f7b..65df86890 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() { | |||
| 16 | return 512; | 16 | return 512; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /** | 19 | /// Unswizzles a swizzled texture without changing its format. |
| 20 | * Unswizzles a swizzled texture without changing its format. | ||
| 21 | */ | ||
| 22 | void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, | 20 | void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, |
| 23 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 21 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 24 | u32 block_height = TICEntry::DefaultBlockHeight, | 22 | u32 block_height = TICEntry::DefaultBlockHeight, |
| 25 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | 23 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); |
| 26 | /** | 24 | |
| 27 | * Unswizzles a swizzled texture without changing its format. | 25 | /// Unswizzles a swizzled texture without changing its format. |
| 28 | */ | ||
| 29 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, | 26 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, |
| 30 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 27 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 31 | u32 block_height = TICEntry::DefaultBlockHeight, | 28 | u32 block_height = TICEntry::DefaultBlockHeight, |
| @@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | |||
| 37 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | 34 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 38 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); | 35 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); |
| 39 | 36 | ||
| 40 | /** | 37 | /// Decodes an unswizzled texture into a A8R8G8B8 texture. |
| 41 | * Decodes an unswizzled texture into a A8R8G8B8 texture. | ||
| 42 | */ | ||
| 43 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | 38 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, |
| 44 | u32 height); | 39 | u32 height); |
| 45 | 40 | ||
| 46 | /** | 41 | /// This function calculates the correct size of a texture depending if it's tiled or not. |
| 47 | * This function calculates the correct size of a texture depending if it's tiled or not. | ||
| 48 | */ | ||
| 49 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 42 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 50 | u32 block_height, u32 block_depth); | 43 | u32 block_height, u32 block_depth); |
| 51 | 44 | ||
| @@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 53 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 54 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 47 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |
| 55 | u32 block_height); | 48 | u32 block_height); |
| 49 | |||
| 56 | /// Copies a tiled subrectangle into a linear surface. | 50 | /// Copies a tiled subrectangle into a linear surface. |
| 57 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 51 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 58 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 52 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |