diff options
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 14 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 24 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 36 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 82 | ||||
| -rw-r--r-- | src/video_core/textures/texture.h | 17 |
8 files changed, 203 insertions, 21 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 7555bbe7d..8d194e175 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -167,10 +167,11 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 167 | auto& system_instance = Core::System::GetInstance(); | 167 | auto& system_instance = Core::System::GetInstance(); |
| 168 | 168 | ||
| 169 | // Remove this memory region from the rasterizer cache. | 169 | // Remove this memory region from the rasterizer cache. |
| 170 | system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(params.offset, | ||
| 171 | itr->second.size); | ||
| 172 | |||
| 173 | auto& gpu = system_instance.GPU(); | 170 | auto& gpu = system_instance.GPU(); |
| 171 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); | ||
| 172 | ASSERT(cpu_addr); | ||
| 173 | system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); | ||
| 174 | |||
| 174 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); | 175 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 175 | 176 | ||
| 176 | buffer_mappings.erase(itr->second.offset); | 177 | buffer_mappings.erase(itr->second.offset); |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 81d15c62a..2a6e8bbbb 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -36,9 +36,9 @@ public: | |||
| 36 | RenderTargetFormat format; | 36 | RenderTargetFormat format; |
| 37 | BitField<0, 1, u32> linear; | 37 | BitField<0, 1, u32> linear; |
| 38 | union { | 38 | union { |
| 39 | BitField<0, 4, u32> block_depth; | 39 | BitField<0, 4, u32> block_width; |
| 40 | BitField<4, 4, u32> block_height; | 40 | BitField<4, 4, u32> block_height; |
| 41 | BitField<8, 4, u32> block_width; | 41 | BitField<8, 4, u32> block_depth; |
| 42 | }; | 42 | }; |
| 43 | u32 depth; | 43 | u32 depth; |
| 44 | u32 layer; | 44 | u32 layer; |
| @@ -53,10 +53,20 @@ public: | |||
| 53 | address_low); | 53 | address_low); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | u32 BlockWidth() const { | ||
| 57 | // The block width is stored in log2 format. | ||
| 58 | return 1 << block_width; | ||
| 59 | } | ||
| 60 | |||
| 56 | u32 BlockHeight() const { | 61 | u32 BlockHeight() const { |
| 57 | // The block height is stored in log2 format. | 62 | // The block height is stored in log2 format. |
| 58 | return 1 << block_height; | 63 | return 1 << block_height; |
| 59 | } | 64 | } |
| 65 | |||
| 66 | u32 BlockDepth() const { | ||
| 67 | // The block depth is stored in log2 format. | ||
| 68 | return 1 << block_depth; | ||
| 69 | } | ||
| 60 | }; | 70 | }; |
| 61 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | 71 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); |
| 62 | 72 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 20e1884da..c8d1b6478 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -347,6 +347,16 @@ public: | |||
| 347 | DecrWrap = 8, | 347 | DecrWrap = 8, |
| 348 | }; | 348 | }; |
| 349 | 349 | ||
| 350 | enum class MemoryLayout : u32 { | ||
| 351 | Linear = 0, | ||
| 352 | BlockLinear = 1, | ||
| 353 | }; | ||
| 354 | |||
| 355 | enum class InvMemoryLayout : u32 { | ||
| 356 | BlockLinear = 0, | ||
| 357 | Linear = 1, | ||
| 358 | }; | ||
| 359 | |||
| 350 | struct Cull { | 360 | struct Cull { |
| 351 | enum class FrontFace : u32 { | 361 | enum class FrontFace : u32 { |
| 352 | ClockWise = 0x0900, | 362 | ClockWise = 0x0900, |
| @@ -432,7 +442,12 @@ public: | |||
| 432 | u32 width; | 442 | u32 width; |
| 433 | u32 height; | 443 | u32 height; |
| 434 | Tegra::RenderTargetFormat format; | 444 | Tegra::RenderTargetFormat format; |
| 435 | u32 block_dimensions; | 445 | union { |
| 446 | BitField<0, 3, u32> block_width; | ||
| 447 | BitField<4, 3, u32> block_height; | ||
| 448 | BitField<8, 3, u32> block_depth; | ||
| 449 | BitField<12, 1, InvMemoryLayout> type; | ||
| 450 | } memory_layout; | ||
| 436 | u32 array_mode; | 451 | u32 array_mode; |
| 437 | u32 layer_stride; | 452 | u32 layer_stride; |
| 438 | u32 base_layer; | 453 | u32 base_layer; |
| @@ -562,7 +577,12 @@ public: | |||
| 562 | u32 address_high; | 577 | u32 address_high; |
| 563 | u32 address_low; | 578 | u32 address_low; |
| 564 | Tegra::DepthFormat format; | 579 | Tegra::DepthFormat format; |
| 565 | u32 block_dimensions; | 580 | union { |
| 581 | BitField<0, 4, u32> block_width; | ||
| 582 | BitField<4, 4, u32> block_height; | ||
| 583 | BitField<8, 4, u32> block_depth; | ||
| 584 | BitField<20, 1, InvMemoryLayout> type; | ||
| 585 | } memory_layout; | ||
| 566 | u32 layer_stride; | 586 | u32 layer_stride; |
| 567 | 587 | ||
| 568 | GPUVAddr Address() const { | 588 | GPUVAddr Address() const { |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 550ab1148..9a59b65b3 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 { | |||
| 214 | XHi = 3, | 214 | XHi = 3, |
| 215 | }; | 215 | }; |
| 216 | 216 | ||
| 217 | enum class VmadType : u64 { | ||
| 218 | Size16_Low = 0, | ||
| 219 | Size16_High = 1, | ||
| 220 | Size32 = 2, | ||
| 221 | Invalid = 3, | ||
| 222 | }; | ||
| 223 | |||
| 224 | enum class VmadShr : u64 { | ||
| 225 | Shr7 = 1, | ||
| 226 | Shr15 = 2, | ||
| 227 | }; | ||
| 228 | |||
| 217 | enum class XmadMode : u64 { | 229 | enum class XmadMode : u64 { |
| 218 | None = 0, | 230 | None = 0, |
| 219 | CLo = 1, | 231 | CLo = 1, |
| @@ -452,6 +464,7 @@ union Instruction { | |||
| 452 | BitField<48, 16, u64> opcode; | 464 | BitField<48, 16, u64> opcode; |
| 453 | 465 | ||
| 454 | union { | 466 | union { |
| 467 | BitField<20, 16, u64> imm20_16; | ||
| 455 | BitField<20, 19, u64> imm20_19; | 468 | BitField<20, 19, u64> imm20_19; |
| 456 | BitField<20, 32, s64> imm20_32; | 469 | BitField<20, 32, s64> imm20_32; |
| 457 | BitField<45, 1, u64> negate_b; | 470 | BitField<45, 1, u64> negate_b; |
| @@ -493,6 +506,10 @@ union Instruction { | |||
| 493 | } | 506 | } |
| 494 | } lop3; | 507 | } lop3; |
| 495 | 508 | ||
| 509 | u16 GetImm20_16() const { | ||
| 510 | return static_cast<u16>(imm20_16); | ||
| 511 | } | ||
| 512 | |||
| 496 | u32 GetImm20_19() const { | 513 | u32 GetImm20_19() const { |
| 497 | u32 imm{static_cast<u32>(imm20_19)}; | 514 | u32 imm{static_cast<u32>(imm20_19)}; |
| 498 | imm <<= 12; | 515 | imm <<= 12; |
| @@ -1017,6 +1034,23 @@ union Instruction { | |||
| 1017 | } isberd; | 1034 | } isberd; |
| 1018 | 1035 | ||
| 1019 | union { | 1036 | union { |
| 1037 | BitField<48, 1, u64> signed_a; | ||
| 1038 | BitField<38, 1, u64> is_byte_chunk_a; | ||
| 1039 | BitField<36, 2, VmadType> type_a; | ||
| 1040 | BitField<36, 2, u64> byte_height_a; | ||
| 1041 | |||
| 1042 | BitField<49, 1, u64> signed_b; | ||
| 1043 | BitField<50, 1, u64> use_register_b; | ||
| 1044 | BitField<30, 1, u64> is_byte_chunk_b; | ||
| 1045 | BitField<28, 2, VmadType> type_b; | ||
| 1046 | BitField<28, 2, u64> byte_height_b; | ||
| 1047 | |||
| 1048 | BitField<51, 2, VmadShr> shr; | ||
| 1049 | BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) | ||
| 1050 | BitField<47, 1, u64> cc; | ||
| 1051 | } vmad; | ||
| 1052 | |||
| 1053 | union { | ||
| 1020 | BitField<20, 16, u64> imm20_16; | 1054 | BitField<20, 16, u64> imm20_16; |
| 1021 | BitField<36, 1, u64> product_shift_left; | 1055 | BitField<36, 1, u64> product_shift_left; |
| 1022 | BitField<37, 1, u64> merge_37; | 1056 | BitField<37, 1, u64> merge_37; |
| @@ -1083,6 +1117,7 @@ public: | |||
| 1083 | IPA, | 1117 | IPA, |
| 1084 | OUT_R, // Emit vertex/primitive | 1118 | OUT_R, // Emit vertex/primitive |
| 1085 | ISBERD, | 1119 | ISBERD, |
| 1120 | VMAD, | ||
| 1086 | FFMA_IMM, // Fused Multiply and Add | 1121 | FFMA_IMM, // Fused Multiply and Add |
| 1087 | FFMA_CR, | 1122 | FFMA_CR, |
| 1088 | FFMA_RC, | 1123 | FFMA_RC, |
| @@ -1320,6 +1355,7 @@ private: | |||
| 1320 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1355 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1321 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1356 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 1322 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 1357 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
| 1358 | INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), | ||
| 1323 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), | 1359 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), |
| 1324 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), | 1360 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), |
| 1325 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), | 1361 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 56ff83eff..65a220c41 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -45,7 +45,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | |||
| 45 | SurfaceParams params{}; | 45 | SurfaceParams params{}; |
| 46 | params.addr = TryGetCpuAddr(config.tic.Address()); | 46 | params.addr = TryGetCpuAddr(config.tic.Address()); |
| 47 | params.is_tiled = config.tic.IsTiled(); | 47 | params.is_tiled = config.tic.IsTiled(); |
| 48 | params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, | ||
| 48 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | 49 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, |
| 50 | params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, | ||
| 49 | params.pixel_format = | 51 | params.pixel_format = |
| 50 | PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value()); | 52 | PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value()); |
| 51 | params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); | 53 | params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); |
| @@ -97,8 +99,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | |||
| 97 | const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; | 99 | const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; |
| 98 | SurfaceParams params{}; | 100 | SurfaceParams params{}; |
| 99 | params.addr = TryGetCpuAddr(config.Address()); | 101 | params.addr = TryGetCpuAddr(config.Address()); |
| 100 | params.is_tiled = true; | 102 | params.is_tiled = |
| 101 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | 103 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
| 104 | params.block_width = 1 << config.memory_layout.block_width; | ||
| 105 | params.block_height = 1 << config.memory_layout.block_height; | ||
| 106 | params.block_depth = 1 << config.memory_layout.block_depth; | ||
| 102 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 107 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| 103 | params.component_type = ComponentTypeFromRenderTarget(config.format); | 108 | params.component_type = ComponentTypeFromRenderTarget(config.format); |
| 104 | params.type = GetFormatType(params.pixel_format); | 109 | params.type = GetFormatType(params.pixel_format); |
| @@ -120,13 +125,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | |||
| 120 | return params; | 125 | return params; |
| 121 | } | 126 | } |
| 122 | 127 | ||
| 123 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, | 128 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( |
| 124 | Tegra::GPUVAddr zeta_address, | 129 | u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, |
| 125 | Tegra::DepthFormat format) { | 130 | u32 block_width, u32 block_height, u32 block_depth, |
| 131 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 126 | SurfaceParams params{}; | 132 | SurfaceParams params{}; |
| 127 | params.addr = TryGetCpuAddr(zeta_address); | 133 | params.addr = TryGetCpuAddr(zeta_address); |
| 128 | params.is_tiled = true; | 134 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
| 129 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | 135 | params.block_width = 1 << std::min(block_width, 5U); |
| 136 | params.block_height = 1 << std::min(block_height, 5U); | ||
| 137 | params.block_depth = 1 << std::min(block_depth, 5U); | ||
| 130 | params.pixel_format = PixelFormatFromDepthFormat(format); | 138 | params.pixel_format = PixelFormatFromDepthFormat(format); |
| 131 | params.component_type = ComponentTypeFromDepthFormat(format); | 139 | params.component_type = ComponentTypeFromDepthFormat(format); |
| 132 | params.type = GetFormatType(params.pixel_format); | 140 | params.type = GetFormatType(params.pixel_format); |
| @@ -148,7 +156,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | |||
| 148 | SurfaceParams params{}; | 156 | SurfaceParams params{}; |
| 149 | params.addr = TryGetCpuAddr(config.Address()); | 157 | params.addr = TryGetCpuAddr(config.Address()); |
| 150 | params.is_tiled = !config.linear; | 158 | params.is_tiled = !config.linear; |
| 151 | params.block_height = params.is_tiled ? config.BlockHeight() : 0, | 159 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, |
| 160 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, | ||
| 161 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, | ||
| 152 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 162 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| 153 | params.component_type = ComponentTypeFromRenderTarget(config.format); | 163 | params.component_type = ComponentTypeFromRenderTarget(config.format); |
| 154 | params.type = GetFormatType(params.pixel_format); | 164 | params.type = GetFormatType(params.pixel_format); |
| @@ -818,6 +828,11 @@ void CachedSurface::LoadGLBuffer() { | |||
| 818 | if (params.is_tiled) { | 828 | if (params.is_tiled) { |
| 819 | gl_buffer.resize(total_size); | 829 | gl_buffer.resize(total_size); |
| 820 | 830 | ||
| 831 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||
| 832 | params.block_width, static_cast<u32>(params.target)); | ||
| 833 | ASSERT_MSG(params.block_depth == 1, "Block depth is defined as {} on texture type {}", | ||
| 834 | params.block_depth, static_cast<u32>(params.target)); | ||
| 835 | |||
| 821 | // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do | 836 | // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do |
| 822 | // this for 3D textures, etc. | 837 | // this for 3D textures, etc. |
| 823 | switch (params.target) { | 838 | switch (params.target) { |
| @@ -989,7 +1004,9 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { | |||
| 989 | } | 1004 | } |
| 990 | 1005 | ||
| 991 | SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( | 1006 | SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( |
| 992 | regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)}; | 1007 | regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format, |
| 1008 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 1009 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 993 | 1010 | ||
| 994 | return GetSurface(depth_params, preserve_contents); | 1011 | return GetSurface(depth_params, preserve_contents); |
| 995 | } | 1012 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0b4940b3c..66d98ad4e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -716,9 +716,10 @@ struct SurfaceParams { | |||
| 716 | static SurfaceParams CreateForFramebuffer(std::size_t index); | 716 | static SurfaceParams CreateForFramebuffer(std::size_t index); |
| 717 | 717 | ||
| 718 | /// Creates SurfaceParams for a depth buffer configuration | 718 | /// Creates SurfaceParams for a depth buffer configuration |
| 719 | static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, | 719 | static SurfaceParams CreateForDepthBuffer( |
| 720 | Tegra::GPUVAddr zeta_address, | 720 | u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, |
| 721 | Tegra::DepthFormat format); | 721 | u32 block_width, u32 block_height, u32 block_depth, |
| 722 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 722 | 723 | ||
| 723 | /// Creates SurfaceParams for a Fermi2D surface copy | 724 | /// Creates SurfaceParams for a Fermi2D surface copy |
| 724 | static SurfaceParams CreateForFermiCopySurface( | 725 | static SurfaceParams CreateForFermiCopySurface( |
| @@ -733,7 +734,9 @@ struct SurfaceParams { | |||
| 733 | 734 | ||
| 734 | VAddr addr; | 735 | VAddr addr; |
| 735 | bool is_tiled; | 736 | bool is_tiled; |
| 737 | u32 block_width; | ||
| 736 | u32 block_height; | 738 | u32 block_height; |
| 739 | u32 block_depth; | ||
| 737 | PixelFormat pixel_format; | 740 | PixelFormat pixel_format; |
| 738 | ComponentType component_type; | 741 | ComponentType component_type; |
| 739 | SurfaceType type; | 742 | SurfaceType type; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c82a0dcfa..8dfb49507 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -2953,6 +2953,88 @@ private: | |||
| 2953 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); | 2953 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); |
| 2954 | break; | 2954 | break; |
| 2955 | } | 2955 | } |
| 2956 | case OpCode::Id::VMAD: { | ||
| 2957 | const bool signed_a = instr.vmad.signed_a == 1; | ||
| 2958 | const bool signed_b = instr.vmad.signed_b == 1; | ||
| 2959 | const bool result_signed = signed_a || signed_b; | ||
| 2960 | boost::optional<std::string> forced_result; | ||
| 2961 | |||
| 2962 | auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed, | ||
| 2963 | Tegra::Shader::VmadType type, u64 byte_height) { | ||
| 2964 | const std::string value = [&]() { | ||
| 2965 | if (!is_chunk) { | ||
| 2966 | const auto offset = static_cast<u32>(byte_height * 8); | ||
| 2967 | return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; | ||
| 2968 | } | ||
| 2969 | const std::string zero = "0"; | ||
| 2970 | |||
| 2971 | switch (type) { | ||
| 2972 | case Tegra::Shader::VmadType::Size16_Low: | ||
| 2973 | return '(' + op + " & 0xffff)"; | ||
| 2974 | case Tegra::Shader::VmadType::Size16_High: | ||
| 2975 | return '(' + op + " >> 16)"; | ||
| 2976 | case Tegra::Shader::VmadType::Size32: | ||
| 2977 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||
| 2978 | // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||
| 2979 | // explanation is found: assert. | ||
| 2980 | UNREACHABLE_MSG("Unimplemented"); | ||
| 2981 | return zero; | ||
| 2982 | case Tegra::Shader::VmadType::Invalid: | ||
| 2983 | // Note(Rodrigo): This flag is invalid according to nvdisasm. From my | ||
| 2984 | // testing (even though it's invalid) this makes the whole instruction | ||
| 2985 | // assign zero to target register. | ||
| 2986 | forced_result = boost::make_optional(zero); | ||
| 2987 | return zero; | ||
| 2988 | default: | ||
| 2989 | UNREACHABLE(); | ||
| 2990 | return zero; | ||
| 2991 | } | ||
| 2992 | }(); | ||
| 2993 | |||
| 2994 | if (is_signed) { | ||
| 2995 | return "int(" + value + ')'; | ||
| 2996 | } | ||
| 2997 | return value; | ||
| 2998 | }; | ||
| 2999 | |||
| 3000 | const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||
| 3001 | instr.vmad.is_byte_chunk_a != 0, signed_a, | ||
| 3002 | instr.vmad.type_a, instr.vmad.byte_height_a); | ||
| 3003 | |||
| 3004 | std::string op_b; | ||
| 3005 | if (instr.vmad.use_register_b) { | ||
| 3006 | op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3007 | instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b, | ||
| 3008 | instr.vmad.byte_height_b); | ||
| 3009 | } else { | ||
| 3010 | op_b = '(' + | ||
| 3011 | std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||
| 3012 | : instr.alu.GetImm20_16()) + | ||
| 3013 | ')'; | ||
| 3014 | } | ||
| 3015 | |||
| 3016 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); | ||
| 3017 | |||
| 3018 | std::string result; | ||
| 3019 | if (forced_result) { | ||
| 3020 | result = *forced_result; | ||
| 3021 | } else { | ||
| 3022 | result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||
| 3023 | |||
| 3024 | switch (instr.vmad.shr) { | ||
| 3025 | case Tegra::Shader::VmadShr::Shr7: | ||
| 3026 | result = '(' + result + " >> 7)"; | ||
| 3027 | break; | ||
| 3028 | case Tegra::Shader::VmadShr::Shr15: | ||
| 3029 | result = '(' + result + " >> 15)"; | ||
| 3030 | break; | ||
| 3031 | } | ||
| 3032 | } | ||
| 3033 | regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, | ||
| 3034 | instr.vmad.saturate == 1, 0, Register::Size::Word, | ||
| 3035 | instr.vmad.cc); | ||
| 3036 | break; | ||
| 3037 | } | ||
| 2956 | default: { | 3038 | default: { |
| 2957 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); | 3039 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); |
| 2958 | UNREACHABLE(); | 3040 | UNREACHABLE(); |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 8f31d825a..58d17abcb 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -161,7 +161,9 @@ struct TICEntry { | |||
| 161 | BitField<21, 3, TICHeaderVersion> header_version; | 161 | BitField<21, 3, TICHeaderVersion> header_version; |
| 162 | }; | 162 | }; |
| 163 | union { | 163 | union { |
| 164 | BitField<0, 3, u32> block_width; | ||
| 164 | BitField<3, 3, u32> block_height; | 165 | BitField<3, 3, u32> block_height; |
| 166 | BitField<6, 3, u32> block_depth; | ||
| 165 | 167 | ||
| 166 | // High 16 bits of the pitch value | 168 | // High 16 bits of the pitch value |
| 167 | BitField<0, 16, u32> pitch_high; | 169 | BitField<0, 16, u32> pitch_high; |
| @@ -202,13 +204,24 @@ struct TICEntry { | |||
| 202 | return depth_minus_1 + 1; | 204 | return depth_minus_1 + 1; |
| 203 | } | 205 | } |
| 204 | 206 | ||
| 207 | u32 BlockWidth() const { | ||
| 208 | ASSERT(IsTiled()); | ||
| 209 | // The block height is stored in log2 format. | ||
| 210 | return 1 << block_width; | ||
| 211 | } | ||
| 212 | |||
| 205 | u32 BlockHeight() const { | 213 | u32 BlockHeight() const { |
| 206 | ASSERT(header_version == TICHeaderVersion::BlockLinear || | 214 | ASSERT(IsTiled()); |
| 207 | header_version == TICHeaderVersion::BlockLinearColorKey); | ||
| 208 | // The block height is stored in log2 format. | 215 | // The block height is stored in log2 format. |
| 209 | return 1 << block_height; | 216 | return 1 << block_height; |
| 210 | } | 217 | } |
| 211 | 218 | ||
| 219 | u32 BlockDepth() const { | ||
| 220 | ASSERT(IsTiled()); | ||
| 221 | // The block height is stored in log2 format. | ||
| 222 | return 1 << block_depth; | ||
| 223 | } | ||
| 224 | |||
| 212 | bool IsTiled() const { | 225 | bool IsTiled() const { |
| 213 | return header_version == TICHeaderVersion::BlockLinear || | 226 | return header_version == TICHeaderVersion::BlockLinear || |
| 214 | header_version == TICHeaderVersion::BlockLinearColorKey; | 227 | header_version == TICHeaderVersion::BlockLinearColorKey; |