diff options
| author | 2018-08-11 10:35:47 +1000 | |
|---|---|---|
| committer | 2018-08-11 10:35:47 +1000 | |
| commit | b76ddb7647cbb390cce4143d91a1db171b0fa503 (patch) | |
| tree | a6e2e334e82b035923c41458150604dd5fb31d65 /src/video_core | |
| parent | Added IsUserRegistrationRequestPermitted (diff) | |
| parent | Merge pull request #1007 from MerryMage/dynarmic (diff) | |
| download | yuzu-b76ddb7647cbb390cce4143d91a1db171b0fa503.tar.gz yuzu-b76ddb7647cbb390cce4143d91a1db171b0fa503.tar.xz yuzu-b76ddb7647cbb390cce4143d91a1db171b0fa503.zip | |
Merge remote-tracking branch 'origin/master' into better-account
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 15 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 82 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 147 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 86 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 4 |
14 files changed, 165 insertions, 248 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5c0ae8009..a46ed4bd7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -23,12 +23,17 @@ Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& | |||
| 23 | : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {} | 23 | : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {} |
| 24 | 24 | ||
| 25 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | 25 | void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { |
| 26 | auto macro_code = uploaded_macros.find(method); | 26 | // Reset the current macro. |
| 27 | executing_macro = 0; | ||
| 28 | |||
| 27 | // The requested macro must have been uploaded already. | 29 | // The requested macro must have been uploaded already. |
| 28 | ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method); | 30 | auto macro_code = uploaded_macros.find(method); |
| 31 | if (macro_code == uploaded_macros.end()) { | ||
| 32 | LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); | ||
| 33 | return; | ||
| 34 | } | ||
| 29 | 35 | ||
| 30 | // Reset the current macro and execute it. | 36 | // Execute the current macro. |
| 31 | executing_macro = 0; | ||
| 32 | macro_interpreter.Execute(macro_code->second, std::move(parameters)); | 37 | macro_interpreter.Execute(macro_code->second, std::move(parameters)); |
| 33 | } | 38 | } |
| 34 | 39 | ||
| @@ -238,6 +243,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | |||
| 238 | 243 | ||
| 239 | auto& buffer = shader.const_buffers[bind_data.index]; | 244 | auto& buffer = shader.const_buffers[bind_data.index]; |
| 240 | 245 | ||
| 246 | ASSERT(bind_data.index < Regs::MaxConstBuffers); | ||
| 247 | |||
| 241 | buffer.enabled = bind_data.valid.Value() != 0; | 248 | buffer.enabled = bind_data.valid.Value() != 0; |
| 242 | buffer.index = bind_data.index; | 249 | buffer.index = bind_data.index; |
| 243 | buffer.address = regs.const_buffer.BufferAddress(); | 250 | buffer.address = regs.const_buffer.BufferAddress(); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 4d0ff96a5..0506ac8fe 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -44,7 +44,7 @@ public: | |||
| 44 | static constexpr size_t MaxShaderProgram = 6; | 44 | static constexpr size_t MaxShaderProgram = 6; |
| 45 | static constexpr size_t MaxShaderStage = 5; | 45 | static constexpr size_t MaxShaderStage = 5; |
| 46 | // Maximum number of const buffers per shader stage. | 46 | // Maximum number of const buffers per shader stage. |
| 47 | static constexpr size_t MaxConstBuffers = 16; | 47 | static constexpr size_t MaxConstBuffers = 18; |
| 48 | 48 | ||
| 49 | enum class QueryMode : u32 { | 49 | enum class QueryMode : u32 { |
| 50 | Write = 0, | 50 | Write = 0, |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c7e3fb4b1..3d4557b7e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -78,6 +78,8 @@ union Attribute { | |||
| 78 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval | 78 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval |
| 79 | // shader. | 79 | // shader. |
| 80 | TessCoordInstanceIDVertexID = 47, | 80 | TessCoordInstanceIDVertexID = 47, |
| 81 | // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. | ||
| 82 | Unknown_63 = 63, | ||
| 81 | }; | 83 | }; |
| 82 | 84 | ||
| 83 | union { | 85 | union { |
| @@ -254,20 +256,15 @@ union Instruction { | |||
| 254 | BitField<56, 1, u64> invert_b; | 256 | BitField<56, 1, u64> invert_b; |
| 255 | } lop32i; | 257 | } lop32i; |
| 256 | 258 | ||
| 257 | float GetImm20_19() const { | 259 | u32 GetImm20_19() const { |
| 258 | float result{}; | ||
| 259 | u32 imm{static_cast<u32>(imm20_19)}; | 260 | u32 imm{static_cast<u32>(imm20_19)}; |
| 260 | imm <<= 12; | 261 | imm <<= 12; |
| 261 | imm |= negate_imm ? 0x80000000 : 0; | 262 | imm |= negate_imm ? 0x80000000 : 0; |
| 262 | std::memcpy(&result, &imm, sizeof(imm)); | 263 | return imm; |
| 263 | return result; | ||
| 264 | } | 264 | } |
| 265 | 265 | ||
| 266 | float GetImm20_32() const { | 266 | u32 GetImm20_32() const { |
| 267 | float result{}; | 267 | return static_cast<u32>(imm20_32); |
| 268 | s32 imm{static_cast<s32>(imm20_32)}; | ||
| 269 | std::memcpy(&result, &imm, sizeof(imm)); | ||
| 270 | return result; | ||
| 271 | } | 268 | } |
| 272 | 269 | ||
| 273 | s32 GetSignedImm20_20() const { | 270 | s32 GetSignedImm20_20() const { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b2a83ce0b..4ff4d71c5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -42,6 +42,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | |||
| 42 | case RenderTargetFormat::RGB10_A2_UNORM: | 42 | case RenderTargetFormat::RGB10_A2_UNORM: |
| 43 | case RenderTargetFormat::BGRA8_UNORM: | 43 | case RenderTargetFormat::BGRA8_UNORM: |
| 44 | case RenderTargetFormat::R32_FLOAT: | 44 | case RenderTargetFormat::R32_FLOAT: |
| 45 | case RenderTargetFormat::R11G11B10_FLOAT: | ||
| 45 | return 4; | 46 | return 4; |
| 46 | default: | 47 | default: |
| 47 | UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); | 48 | UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 440505c9d..874eddd78 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -34,6 +34,7 @@ enum class RenderTargetFormat : u32 { | |||
| 34 | RG16_FLOAT = 0xDE, | 34 | RG16_FLOAT = 0xDE, |
| 35 | R11G11B10_FLOAT = 0xE0, | 35 | R11G11B10_FLOAT = 0xE0, |
| 36 | R32_FLOAT = 0xE5, | 36 | R32_FLOAT = 0xE5, |
| 37 | B5G6R5_UNORM = 0xE8, | ||
| 37 | R16_FLOAT = 0xF2, | 38 | R16_FLOAT = 0xF2, |
| 38 | R8_UNORM = 0xF3, | 39 | R8_UNORM = 0xF3, |
| 39 | }; | 40 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c2a931469..8360feb5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -161,7 +161,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | |||
| 161 | // assume every shader uses them all. | 161 | // assume every shader uses them all. |
| 162 | for (unsigned index = 0; index < 16; ++index) { | 162 | for (unsigned index = 0; index < 16; ++index) { |
| 163 | auto& attrib = regs.vertex_attrib_format[index]; | 163 | auto& attrib = regs.vertex_attrib_format[index]; |
| 164 | LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | 164 | LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", |
| 165 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | 165 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), |
| 166 | attrib.offset.Value(), attrib.IsNormalized()); | 166 | attrib.offset.Value(), attrib.IsNormalized()); |
| 167 | 167 | ||
| @@ -324,11 +324,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c | |||
| 324 | bool using_depth_fb) { | 324 | bool using_depth_fb) { |
| 325 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 325 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 326 | 326 | ||
| 327 | if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) { | ||
| 328 | LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured"); | ||
| 329 | using_color_fb = false; | ||
| 330 | } | ||
| 331 | |||
| 327 | // TODO(bunnei): Implement this | 332 | // TODO(bunnei): Implement this |
| 328 | const bool has_stencil = false; | 333 | const bool has_stencil = false; |
| 329 | 334 | ||
| 330 | const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; | ||
| 331 | |||
| 332 | const bool write_color_fb = | 335 | const bool write_color_fb = |
| 333 | state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || | 336 | state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || |
| 334 | state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; | 337 | state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; |
| @@ -341,9 +344,10 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c | |||
| 341 | Surface depth_surface; | 344 | Surface depth_surface; |
| 342 | MathUtil::Rectangle<u32> surfaces_rect; | 345 | MathUtil::Rectangle<u32> surfaces_rect; |
| 343 | std::tie(color_surface, depth_surface, surfaces_rect) = | 346 | std::tie(color_surface, depth_surface, surfaces_rect) = |
| 344 | res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); | 347 | res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); |
| 345 | 348 | ||
| 346 | MathUtil::Rectangle<u32> draw_rect{ | 349 | const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; |
| 350 | const MathUtil::Rectangle<u32> draw_rect{ | ||
| 347 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, | 351 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, |
| 348 | surfaces_rect.left, surfaces_rect.right)), // Left | 352 | surfaces_rect.left, surfaces_rect.right)), // Left |
| 349 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, | 353 | static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, |
| @@ -659,7 +663,10 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr | |||
| 659 | auto& buffer_draw_state = | 663 | auto& buffer_draw_state = |
| 660 | state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; | 664 | state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; |
| 661 | 665 | ||
| 662 | ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer"); | 666 | if (!buffer.enabled) { |
| 667 | continue; | ||
| 668 | } | ||
| 669 | |||
| 663 | buffer_draw_state.enabled = true; | 670 | buffer_draw_state.enabled = true; |
| 664 | buffer_draw_state.bindpoint = current_bindpoint + bindpoint; | 671 | buffer_draw_state.bindpoint = current_bindpoint + bindpoint; |
| 665 | 672 | ||
| @@ -804,9 +811,7 @@ void RasterizerOpenGL::SyncClipCoef() { | |||
| 804 | void RasterizerOpenGL::SyncCullMode() { | 811 | void RasterizerOpenGL::SyncCullMode() { |
| 805 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 812 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 806 | 813 | ||
| 807 | // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions | 814 | state.cull.enabled = regs.cull.enabled != 0; |
| 808 | // state.cull.enabled = regs.cull.enabled != 0; | ||
| 809 | state.cull.enabled = false; | ||
| 810 | 815 | ||
| 811 | if (state.cull.enabled) { | 816 | if (state.cull.enabled) { |
| 812 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); | 817 | state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 257aa9571..9fb734b77 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -109,6 +109,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form | |||
| 109 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | 109 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
| 110 | true}, // DXT45 | 110 | true}, // DXT45 |
| 111 | {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 | 111 | {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 |
| 112 | {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 113 | true}, // DXN2UNORM | ||
| 114 | {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM | ||
| 112 | {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | 115 | {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
| 113 | true}, // BC7U | 116 | true}, // BC7U |
| 114 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 | 117 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 |
| @@ -180,36 +183,49 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { | |||
| 180 | return {0, actual_height, width, 0}; | 183 | return {0, actual_height, width, 0}; |
| 181 | } | 184 | } |
| 182 | 185 | ||
| 186 | /// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN | ||
| 187 | static bool IsFormatBCn(PixelFormat format) { | ||
| 188 | switch (format) { | ||
| 189 | case PixelFormat::DXT1: | ||
| 190 | case PixelFormat::DXT23: | ||
| 191 | case PixelFormat::DXT45: | ||
| 192 | case PixelFormat::DXN1: | ||
| 193 | case PixelFormat::DXN2SNORM: | ||
| 194 | case PixelFormat::DXN2UNORM: | ||
| 195 | case PixelFormat::BC7U: | ||
| 196 | return true; | ||
| 197 | } | ||
| 198 | return false; | ||
| 199 | } | ||
| 200 | |||
| 183 | template <bool morton_to_gl, PixelFormat format> | 201 | template <bool morton_to_gl, PixelFormat format> |
| 184 | void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { | 202 | void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, |
| 203 | Tegra::GPUVAddr addr) { | ||
| 185 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | 204 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 186 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | 205 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
| 187 | const auto& gpu = Core::System::GetInstance().GPU(); | 206 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 188 | 207 | ||
| 189 | if (morton_to_gl) { | 208 | if (morton_to_gl) { |
| 190 | if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { | 209 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual |
| 191 | auto data = Tegra::Texture::UnswizzleTexture( | 210 | // pixel values. |
| 192 | *gpu.memory_manager->GpuToCpuAddress(addr), | 211 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; |
| 193 | SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); | 212 | const std::vector<u8> data = |
| 194 | std::memcpy(gl_buffer, data.data(), data.size()); | 213 | Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size, |
| 195 | } else { | 214 | bytes_per_pixel, stride, height, block_height); |
| 196 | auto data = Tegra::Texture::UnswizzleDepthTexture( | 215 | const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; |
| 197 | *gpu.memory_manager->GpuToCpuAddress(addr), | 216 | gl_buffer.assign(data.begin(), data.begin() + size_to_copy); |
| 198 | SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); | ||
| 199 | std::memcpy(gl_buffer, data.data(), data.size()); | ||
| 200 | } | ||
| 201 | } else { | 217 | } else { |
| 202 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should | 218 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should |
| 203 | // check the configuration for this and perform more generic un/swizzle | 219 | // check the configuration for this and perform more generic un/swizzle |
| 204 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); | 220 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); |
| 205 | VideoCore::MortonCopyPixels128( | 221 | VideoCore::MortonCopyPixels128( |
| 206 | stride, height, bytes_per_pixel, gl_bytes_per_pixel, | 222 | stride, height, bytes_per_pixel, gl_bytes_per_pixel, |
| 207 | Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, | 223 | Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(), |
| 208 | morton_to_gl); | 224 | morton_to_gl); |
| 209 | } | 225 | } |
| 210 | } | 226 | } |
| 211 | 227 | ||
| 212 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), | 228 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), |
| 213 | SurfaceParams::MaxPixelFormat> | 229 | SurfaceParams::MaxPixelFormat> |
| 214 | morton_to_gl_fns = { | 230 | morton_to_gl_fns = { |
| 215 | MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, | 231 | MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, |
| @@ -218,6 +234,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), | |||
| 218 | MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, | 234 | MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, |
| 219 | MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, | 235 | MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, |
| 220 | MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, | 236 | MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, |
| 237 | MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>, | ||
| 221 | MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, | 238 | MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, |
| 222 | MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, | 239 | MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, |
| 223 | MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, | 240 | MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, |
| @@ -231,7 +248,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), | |||
| 231 | MortonCopy<true, PixelFormat::Z32FS8>, | 248 | MortonCopy<true, PixelFormat::Z32FS8>, |
| 232 | }; | 249 | }; |
| 233 | 250 | ||
| 234 | static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), | 251 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), |
| 235 | SurfaceParams::MaxPixelFormat> | 252 | SurfaceParams::MaxPixelFormat> |
| 236 | gl_to_morton_fns = { | 253 | gl_to_morton_fns = { |
| 237 | MortonCopy<false, PixelFormat::ABGR8>, | 254 | MortonCopy<false, PixelFormat::ABGR8>, |
| @@ -242,7 +259,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), | |||
| 242 | MortonCopy<false, PixelFormat::RGBA16F>, | 259 | MortonCopy<false, PixelFormat::RGBA16F>, |
| 243 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | 260 | MortonCopy<false, PixelFormat::R11FG11FB10F>, |
| 244 | MortonCopy<false, PixelFormat::RGBA32UI>, | 261 | MortonCopy<false, PixelFormat::RGBA32UI>, |
| 245 | // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported | 262 | // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not |
| 263 | // supported | ||
| 264 | nullptr, | ||
| 265 | nullptr, | ||
| 246 | nullptr, | 266 | nullptr, |
| 247 | nullptr, | 267 | nullptr, |
| 248 | nullptr, | 268 | nullptr, |
| @@ -447,22 +467,24 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 | |||
| 447 | void CachedSurface::LoadGLBuffer() { | 467 | void CachedSurface::LoadGLBuffer() { |
| 448 | ASSERT(params.type != SurfaceType::Fill); | 468 | ASSERT(params.type != SurfaceType::Fill); |
| 449 | 469 | ||
| 450 | u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); | 470 | const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); |
| 451 | 471 | ||
| 452 | ASSERT(texture_src_data); | 472 | ASSERT(texture_src_data); |
| 453 | 473 | ||
| 454 | gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); | 474 | const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); |
| 475 | const u32 copy_size = params.width * params.height * bytes_per_pixel; | ||
| 455 | 476 | ||
| 456 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | 477 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); |
| 457 | 478 | ||
| 458 | if (!params.is_tiled) { | 479 | if (params.is_tiled) { |
| 459 | const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; | 480 | gl_buffer.resize(copy_size); |
| 460 | 481 | ||
| 461 | std::memcpy(gl_buffer.data(), texture_src_data, | ||
| 462 | bytes_per_pixel * params.width * params.height); | ||
| 463 | } else { | ||
| 464 | morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( | 482 | morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( |
| 465 | params.width, params.block_height, params.height, gl_buffer.data(), params.addr); | 483 | params.width, params.block_height, params.height, gl_buffer, params.addr); |
| 484 | } else { | ||
| 485 | const u8* const texture_src_data_end = texture_src_data + copy_size; | ||
| 486 | |||
| 487 | gl_buffer.assign(texture_src_data, texture_src_data_end); | ||
| 466 | } | 488 | } |
| 467 | 489 | ||
| 468 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); | 490 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); |
| @@ -485,7 +507,7 @@ void CachedSurface::FlushGLBuffer() { | |||
| 485 | std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); | 507 | std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); |
| 486 | } else { | 508 | } else { |
| 487 | gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( | 509 | gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( |
| 488 | params.width, params.block_height, params.height, gl_buffer.data(), params.addr); | 510 | params.width, params.block_height, params.height, gl_buffer, params.addr); |
| 489 | } | 511 | } |
| 490 | } | 512 | } |
| 491 | 513 | ||
| @@ -600,8 +622,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu | |||
| 600 | return GetSurface(SurfaceParams::CreateForTexture(config)); | 622 | return GetSurface(SurfaceParams::CreateForTexture(config)); |
| 601 | } | 623 | } |
| 602 | 624 | ||
| 603 | SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | 625 | SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, |
| 604 | bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { | 626 | bool using_depth_fb) { |
| 605 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 627 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 606 | 628 | ||
| 607 | // TODO(bunnei): This is hard corded to use just the first render buffer | 629 | // TODO(bunnei): This is hard corded to use just the first render buffer |
| @@ -757,10 +779,12 @@ void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size* | |||
| 757 | } | 779 | } |
| 758 | 780 | ||
| 759 | void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { | 781 | void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { |
| 760 | for (const auto& pair : surface_cache) { | 782 | for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) { |
| 761 | const auto& surface{pair.second}; | 783 | const auto& surface{iter->second}; |
| 762 | const auto& params{surface->GetSurfaceParams()}; | 784 | const auto& params{surface->GetSurfaceParams()}; |
| 763 | 785 | ||
| 786 | ++iter; | ||
| 787 | |||
| 764 | if (params.IsOverlappingRegion(addr, size)) { | 788 | if (params.IsOverlappingRegion(addr, size)) { |
| 765 | UnregisterSurface(surface); | 789 | UnregisterSurface(surface); |
| 766 | } | 790 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0c6652c7a..829a76dfe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -35,31 +35,33 @@ struct SurfaceParams { | |||
| 35 | DXT23 = 9, | 35 | DXT23 = 9, |
| 36 | DXT45 = 10, | 36 | DXT45 = 10, |
| 37 | DXN1 = 11, // This is also known as BC4 | 37 | DXN1 = 11, // This is also known as BC4 |
| 38 | BC7U = 12, | 38 | DXN2UNORM = 12, |
| 39 | ASTC_2D_4X4 = 13, | 39 | DXN2SNORM = 13, |
| 40 | G8R8 = 14, | 40 | BC7U = 14, |
| 41 | BGRA8 = 15, | 41 | ASTC_2D_4X4 = 15, |
| 42 | RGBA32F = 16, | 42 | G8R8 = 16, |
| 43 | RG32F = 17, | 43 | BGRA8 = 17, |
| 44 | R32F = 18, | 44 | RGBA32F = 18, |
| 45 | R16F = 19, | 45 | RG32F = 19, |
| 46 | R16UNORM = 20, | 46 | R32F = 20, |
| 47 | RG16 = 21, | 47 | R16F = 21, |
| 48 | RG16F = 22, | 48 | R16UNORM = 22, |
| 49 | RG16UI = 23, | 49 | RG16 = 23, |
| 50 | RG16I = 24, | 50 | RG16F = 24, |
| 51 | RG16S = 25, | 51 | RG16UI = 25, |
| 52 | RGB32F = 26, | 52 | RG16I = 26, |
| 53 | SRGBA8 = 27, | 53 | RG16S = 27, |
| 54 | RGB32F = 28, | ||
| 55 | SRGBA8 = 29, | ||
| 54 | 56 | ||
| 55 | MaxColorFormat, | 57 | MaxColorFormat, |
| 56 | 58 | ||
| 57 | // DepthStencil formats | 59 | // DepthStencil formats |
| 58 | Z24S8 = 28, | 60 | Z24S8 = 30, |
| 59 | S8Z24 = 29, | 61 | S8Z24 = 31, |
| 60 | Z32F = 30, | 62 | Z32F = 32, |
| 61 | Z16 = 31, | 63 | Z16 = 33, |
| 62 | Z32FS8 = 32, | 64 | Z32FS8 = 34, |
| 63 | 65 | ||
| 64 | MaxDepthStencilFormat, | 66 | MaxDepthStencilFormat, |
| 65 | 67 | ||
| @@ -109,6 +111,8 @@ struct SurfaceParams { | |||
| 109 | 4, // DXT23 | 111 | 4, // DXT23 |
| 110 | 4, // DXT45 | 112 | 4, // DXT45 |
| 111 | 4, // DXN1 | 113 | 4, // DXN1 |
| 114 | 4, // DXN2UNORM | ||
| 115 | 4, // DXN2SNORM | ||
| 112 | 4, // BC7U | 116 | 4, // BC7U |
| 113 | 4, // ASTC_2D_4X4 | 117 | 4, // ASTC_2D_4X4 |
| 114 | 1, // G8R8 | 118 | 1, // G8R8 |
| @@ -153,6 +157,8 @@ struct SurfaceParams { | |||
| 153 | 128, // DXT23 | 157 | 128, // DXT23 |
| 154 | 128, // DXT45 | 158 | 128, // DXT45 |
| 155 | 64, // DXN1 | 159 | 64, // DXN1 |
| 160 | 128, // DXN2UNORM | ||
| 161 | 128, // DXN2SNORM | ||
| 156 | 128, // BC7U | 162 | 128, // BC7U |
| 157 | 32, // ASTC_2D_4X4 | 163 | 32, // ASTC_2D_4X4 |
| 158 | 16, // G8R8 | 164 | 16, // G8R8 |
| @@ -221,6 +227,8 @@ struct SurfaceParams { | |||
| 221 | return PixelFormat::RG32F; | 227 | return PixelFormat::RG32F; |
| 222 | case Tegra::RenderTargetFormat::R11G11B10_FLOAT: | 228 | case Tegra::RenderTargetFormat::R11G11B10_FLOAT: |
| 223 | return PixelFormat::R11FG11FB10F; | 229 | return PixelFormat::R11FG11FB10F; |
| 230 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: | ||
| 231 | return PixelFormat::B5G6R5; | ||
| 224 | case Tegra::RenderTargetFormat::RGBA32_UINT: | 232 | case Tegra::RenderTargetFormat::RGBA32_UINT: |
| 225 | return PixelFormat::RGBA32UI; | 233 | return PixelFormat::RGBA32UI; |
| 226 | case Tegra::RenderTargetFormat::R8_UNORM: | 234 | case Tegra::RenderTargetFormat::R8_UNORM: |
| @@ -303,6 +311,16 @@ struct SurfaceParams { | |||
| 303 | return PixelFormat::DXT45; | 311 | return PixelFormat::DXT45; |
| 304 | case Tegra::Texture::TextureFormat::DXN1: | 312 | case Tegra::Texture::TextureFormat::DXN1: |
| 305 | return PixelFormat::DXN1; | 313 | return PixelFormat::DXN1; |
| 314 | case Tegra::Texture::TextureFormat::DXN2: | ||
| 315 | switch (component_type) { | ||
| 316 | case Tegra::Texture::ComponentType::UNORM: | ||
| 317 | return PixelFormat::DXN2UNORM; | ||
| 318 | case Tegra::Texture::ComponentType::SNORM: | ||
| 319 | return PixelFormat::DXN2SNORM; | ||
| 320 | } | ||
| 321 | LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", | ||
| 322 | static_cast<u32>(component_type)); | ||
| 323 | UNREACHABLE(); | ||
| 306 | case Tegra::Texture::TextureFormat::BC7U: | 324 | case Tegra::Texture::TextureFormat::BC7U: |
| 307 | return PixelFormat::BC7U; | 325 | return PixelFormat::BC7U; |
| 308 | case Tegra::Texture::TextureFormat::ASTC_2D_4X4: | 326 | case Tegra::Texture::TextureFormat::ASTC_2D_4X4: |
| @@ -330,89 +348,6 @@ struct SurfaceParams { | |||
| 330 | } | 348 | } |
| 331 | } | 349 | } |
| 332 | 350 | ||
| 333 | static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { | ||
| 334 | // TODO(Subv): Properly implement this | ||
| 335 | switch (format) { | ||
| 336 | case PixelFormat::ABGR8: | ||
| 337 | case PixelFormat::SRGBA8: | ||
| 338 | return Tegra::Texture::TextureFormat::A8R8G8B8; | ||
| 339 | case PixelFormat::B5G6R5: | ||
| 340 | return Tegra::Texture::TextureFormat::B5G6R5; | ||
| 341 | case PixelFormat::A2B10G10R10: | ||
| 342 | return Tegra::Texture::TextureFormat::A2B10G10R10; | ||
| 343 | case PixelFormat::A1B5G5R5: | ||
| 344 | return Tegra::Texture::TextureFormat::A1B5G5R5; | ||
| 345 | case PixelFormat::R8: | ||
| 346 | return Tegra::Texture::TextureFormat::R8; | ||
| 347 | case PixelFormat::G8R8: | ||
| 348 | return Tegra::Texture::TextureFormat::G8R8; | ||
| 349 | case PixelFormat::RGBA16F: | ||
| 350 | return Tegra::Texture::TextureFormat::R16_G16_B16_A16; | ||
| 351 | case PixelFormat::R11FG11FB10F: | ||
| 352 | return Tegra::Texture::TextureFormat::BF10GF11RF11; | ||
| 353 | case PixelFormat::RGBA32UI: | ||
| 354 | return Tegra::Texture::TextureFormat::R32_G32_B32_A32; | ||
| 355 | case PixelFormat::DXT1: | ||
| 356 | return Tegra::Texture::TextureFormat::DXT1; | ||
| 357 | case PixelFormat::DXT23: | ||
| 358 | return Tegra::Texture::TextureFormat::DXT23; | ||
| 359 | case PixelFormat::DXT45: | ||
| 360 | return Tegra::Texture::TextureFormat::DXT45; | ||
| 361 | case PixelFormat::DXN1: | ||
| 362 | return Tegra::Texture::TextureFormat::DXN1; | ||
| 363 | case PixelFormat::BC7U: | ||
| 364 | return Tegra::Texture::TextureFormat::BC7U; | ||
| 365 | case PixelFormat::ASTC_2D_4X4: | ||
| 366 | return Tegra::Texture::TextureFormat::ASTC_2D_4X4; | ||
| 367 | case PixelFormat::BGRA8: | ||
| 368 | // TODO(bunnei): This is fine for unswizzling (since we just need the right component | ||
| 369 | // sizes), but could be a bug if we used this function in different ways. | ||
| 370 | return Tegra::Texture::TextureFormat::A8R8G8B8; | ||
| 371 | case PixelFormat::RGBA32F: | ||
| 372 | return Tegra::Texture::TextureFormat::R32_G32_B32_A32; | ||
| 373 | case PixelFormat::RGB32F: | ||
| 374 | return Tegra::Texture::TextureFormat::R32_G32_B32; | ||
| 375 | case PixelFormat::RG32F: | ||
| 376 | return Tegra::Texture::TextureFormat::R32_G32; | ||
| 377 | case PixelFormat::R32F: | ||
| 378 | return Tegra::Texture::TextureFormat::R32; | ||
| 379 | case PixelFormat::R16F: | ||
| 380 | case PixelFormat::R16UNORM: | ||
| 381 | return Tegra::Texture::TextureFormat::R16; | ||
| 382 | case PixelFormat::Z32F: | ||
| 383 | return Tegra::Texture::TextureFormat::ZF32; | ||
| 384 | case PixelFormat::Z24S8: | ||
| 385 | return Tegra::Texture::TextureFormat::Z24S8; | ||
| 386 | case PixelFormat::RG16F: | ||
| 387 | case PixelFormat::RG16: | ||
| 388 | case PixelFormat::RG16UI: | ||
| 389 | case PixelFormat::RG16I: | ||
| 390 | case PixelFormat::RG16S: | ||
| 391 | return Tegra::Texture::TextureFormat::R16_G16; | ||
| 392 | default: | ||
| 393 | LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); | ||
| 394 | UNREACHABLE(); | ||
| 395 | } | ||
| 396 | } | ||
| 397 | |||
| 398 | static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { | ||
| 399 | switch (format) { | ||
| 400 | case PixelFormat::S8Z24: | ||
| 401 | return Tegra::DepthFormat::S8_Z24_UNORM; | ||
| 402 | case PixelFormat::Z24S8: | ||
| 403 | return Tegra::DepthFormat::Z24_S8_UNORM; | ||
| 404 | case PixelFormat::Z32F: | ||
| 405 | return Tegra::DepthFormat::Z32_FLOAT; | ||
| 406 | case PixelFormat::Z16: | ||
| 407 | return Tegra::DepthFormat::Z16_UNORM; | ||
| 408 | case PixelFormat::Z32FS8: | ||
| 409 | return Tegra::DepthFormat::Z32_S8_X24_FLOAT; | ||
| 410 | default: | ||
| 411 | LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); | ||
| 412 | UNREACHABLE(); | ||
| 413 | } | ||
| 414 | } | ||
| 415 | |||
| 416 | static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { | 351 | static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { |
| 417 | // TODO(Subv): Implement more component types | 352 | // TODO(Subv): Implement more component types |
| 418 | switch (type) { | 353 | switch (type) { |
| @@ -441,6 +376,7 @@ struct SurfaceParams { | |||
| 441 | case Tegra::RenderTargetFormat::RGB10_A2_UNORM: | 376 | case Tegra::RenderTargetFormat::RGB10_A2_UNORM: |
| 442 | case Tegra::RenderTargetFormat::R8_UNORM: | 377 | case Tegra::RenderTargetFormat::R8_UNORM: |
| 443 | case Tegra::RenderTargetFormat::RG16_UNORM: | 378 | case Tegra::RenderTargetFormat::RG16_UNORM: |
| 379 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: | ||
| 444 | return ComponentType::UNorm; | 380 | return ComponentType::UNorm; |
| 445 | case Tegra::RenderTargetFormat::RG16_SNORM: | 381 | case Tegra::RenderTargetFormat::RG16_SNORM: |
| 446 | return ComponentType::SNorm; | 382 | return ComponentType::SNorm; |
| @@ -612,8 +548,7 @@ public: | |||
| 612 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); | 548 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); |
| 613 | 549 | ||
| 614 | /// Get the color and depth surfaces based on the framebuffer configuration | 550 | /// Get the color and depth surfaces based on the framebuffer configuration |
| 615 | SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, | 551 | SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); |
| 616 | const MathUtil::Rectangle<s32>& viewport); | ||
| 617 | 552 | ||
| 618 | /// Flushes the surface to Switch memory | 553 | /// Flushes the surface to Switch memory |
| 619 | void FlushSurface(const Surface& surface); | 554 | void FlushSurface(const Surface& surface); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e3217db81..32f06f409 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -507,6 +507,8 @@ private: | |||
| 507 | 507 | ||
| 508 | /// Build the GLSL register list. | 508 | /// Build the GLSL register list. |
| 509 | void BuildRegisterList() { | 509 | void BuildRegisterList() { |
| 510 | regs.reserve(Register::NumRegisters); | ||
| 511 | |||
| 510 | for (size_t index = 0; index < Register::NumRegisters; ++index) { | 512 | for (size_t index = 0; index < Register::NumRegisters; ++index) { |
| 511 | regs.emplace_back(index, suffix); | 513 | regs.emplace_back(index, suffix); |
| 512 | } | 514 | } |
| @@ -523,6 +525,11 @@ private: | |||
| 523 | // shader. | 525 | // shader. |
| 524 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); | 526 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); |
| 525 | return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))"; | 527 | return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))"; |
| 528 | case Attribute::Index::Unknown_63: | ||
| 529 | // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. | ||
| 530 | LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63"); | ||
| 531 | UNREACHABLE(); | ||
| 532 | break; | ||
| 526 | default: | 533 | default: |
| 527 | const u32 index{static_cast<u32>(attribute) - | 534 | const u32 index{static_cast<u32>(attribute) - |
| 528 | static_cast<u32>(Attribute::Index::Attribute_0)}; | 535 | static_cast<u32>(Attribute::Index::Attribute_0)}; |
| @@ -534,6 +541,8 @@ private: | |||
| 534 | LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); | 541 | LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); |
| 535 | UNREACHABLE(); | 542 | UNREACHABLE(); |
| 536 | } | 543 | } |
| 544 | |||
| 545 | return "vec4(0, 0, 0, 0)"; | ||
| 537 | } | 546 | } |
| 538 | 547 | ||
| 539 | /// Generates code representing an output attribute register. | 548 | /// Generates code representing an output attribute register. |
| @@ -602,12 +611,12 @@ private: | |||
| 602 | 611 | ||
| 603 | /// Generates code representing a 19-bit immediate value | 612 | /// Generates code representing a 19-bit immediate value |
| 604 | static std::string GetImmediate19(const Instruction& instr) { | 613 | static std::string GetImmediate19(const Instruction& instr) { |
| 605 | return std::to_string(instr.alu.GetImm20_19()); | 614 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19()); |
| 606 | } | 615 | } |
| 607 | 616 | ||
| 608 | /// Generates code representing a 32-bit immediate value | 617 | /// Generates code representing a 32-bit immediate value |
| 609 | static std::string GetImmediate32(const Instruction& instr) { | 618 | static std::string GetImmediate32(const Instruction& instr) { |
| 610 | return std::to_string(instr.alu.GetImm20_32()); | 619 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); |
| 611 | } | 620 | } |
| 612 | 621 | ||
| 613 | /// Generates code representing a texture sampler. | 622 | /// Generates code representing a texture sampler. |
| @@ -650,16 +659,17 @@ private: | |||
| 650 | * @param instr Instruction to generate the if condition for. | 659 | * @param instr Instruction to generate the if condition for. |
| 651 | * @returns string containing the predicate condition. | 660 | * @returns string containing the predicate condition. |
| 652 | */ | 661 | */ |
| 653 | std::string GetPredicateCondition(u64 index, bool negate) const { | 662 | std::string GetPredicateCondition(u64 index, bool negate) { |
| 654 | using Tegra::Shader::Pred; | 663 | using Tegra::Shader::Pred; |
| 655 | std::string variable; | 664 | std::string variable; |
| 656 | 665 | ||
| 657 | // Index 7 is used as an 'Always True' condition. | 666 | // Index 7 is used as an 'Always True' condition. |
| 658 | if (index == static_cast<u64>(Pred::UnusedIndex)) | 667 | if (index == static_cast<u64>(Pred::UnusedIndex)) { |
| 659 | variable = "true"; | 668 | variable = "true"; |
| 660 | else | 669 | } else { |
| 661 | variable = 'p' + std::to_string(index) + '_' + suffix; | 670 | variable = 'p' + std::to_string(index) + '_' + suffix; |
| 662 | 671 | declr_predicates.insert(variable); | |
| 672 | } | ||
| 663 | if (negate) { | 673 | if (negate) { |
| 664 | return "!(" + variable + ')'; | 674 | return "!(" + variable + ')'; |
| 665 | } | 675 | } |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 24b1d956b..5c7b636e4 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -7,6 +7,10 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <glad/glad.h> | 8 | #include <glad/glad.h> |
| 9 | 9 | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | |||
| 12 | using Regs = Tegra::Engines::Maxwell3D::Regs; | ||
| 13 | |||
| 10 | namespace TextureUnits { | 14 | namespace TextureUnits { |
| 11 | 15 | ||
| 12 | struct TextureUnit { | 16 | struct TextureUnit { |
| @@ -120,7 +124,7 @@ public: | |||
| 120 | GLuint bindpoint; | 124 | GLuint bindpoint; |
| 121 | GLuint ssbo; | 125 | GLuint ssbo; |
| 122 | }; | 126 | }; |
| 123 | std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{}; | 127 | std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers; |
| 124 | } draw; | 128 | } draw; |
| 125 | 129 | ||
| 126 | struct { | 130 | struct { |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 16b1bd606..c439446b1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -27,9 +27,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | |||
| 27 | case Maxwell::VertexAttribute::Type::UnsignedNorm: { | 27 | case Maxwell::VertexAttribute::Type::UnsignedNorm: { |
| 28 | 28 | ||
| 29 | switch (attrib.size) { | 29 | switch (attrib.size) { |
| 30 | case Maxwell::VertexAttribute::Size::Size_8_8: | ||
| 30 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 31 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 31 | return GL_UNSIGNED_BYTE; | 32 | return GL_UNSIGNED_BYTE; |
| 32 | case Maxwell::VertexAttribute::Size::Size_16_16: | 33 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 34 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 33 | return GL_UNSIGNED_SHORT; | 35 | return GL_UNSIGNED_SHORT; |
| 34 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | 36 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: |
| 35 | return GL_UNSIGNED_INT_2_10_10_10_REV; | 37 | return GL_UNSIGNED_INT_2_10_10_10_REV; |
| @@ -43,6 +45,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | |||
| 43 | case Maxwell::VertexAttribute::Type::SignedNorm: { | 45 | case Maxwell::VertexAttribute::Type::SignedNorm: { |
| 44 | 46 | ||
| 45 | switch (attrib.size) { | 47 | switch (attrib.size) { |
| 48 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 49 | return GL_INT; | ||
| 50 | case Maxwell::VertexAttribute::Size::Size_8_8: | ||
| 46 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 51 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 47 | return GL_BYTE; | 52 | return GL_BYTE; |
| 48 | case Maxwell::VertexAttribute::Size::Size_16_16: | 53 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| @@ -84,6 +89,8 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { | |||
| 84 | 89 | ||
| 85 | inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { | 90 | inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { |
| 86 | switch (topology) { | 91 | switch (topology) { |
| 92 | case Maxwell::PrimitiveTopology::Points: | ||
| 93 | return GL_POINTS; | ||
| 87 | case Maxwell::PrimitiveTopology::Triangles: | 94 | case Maxwell::PrimitiveTopology::Triangles: |
| 88 | return GL_TRIANGLES; | 95 | return GL_TRIANGLES; |
| 89 | case Maxwell::PrimitiveTopology::TriangleStrip: | 96 | case Maxwell::PrimitiveTopology::TriangleStrip: |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bf9131193..899865e3b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -430,7 +430,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum | |||
| 430 | break; | 430 | break; |
| 431 | case GL_DEBUG_SEVERITY_NOTIFICATION: | 431 | case GL_DEBUG_SEVERITY_NOTIFICATION: |
| 432 | case GL_DEBUG_SEVERITY_LOW: | 432 | case GL_DEBUG_SEVERITY_LOW: |
| 433 | LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); | 433 | LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message); |
| 434 | break; | 434 | break; |
| 435 | } | 435 | } |
| 436 | } | 436 | } |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 65db84ad3..70746a34e 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) { | |||
| 54 | return 8; | 54 | return 8; |
| 55 | case TextureFormat::DXT23: | 55 | case TextureFormat::DXT23: |
| 56 | case TextureFormat::DXT45: | 56 | case TextureFormat::DXT45: |
| 57 | case TextureFormat::DXN2: | ||
| 57 | case TextureFormat::BC7U: | 58 | case TextureFormat::BC7U: |
| 58 | // In this case a 'pixel' actually refers to a 4x4 tile. | 59 | // In this case a 'pixel' actually refers to a 4x4 tile. |
| 59 | return 16; | 60 | return 16; |
| @@ -85,87 +86,11 @@ u32 BytesPerPixel(TextureFormat format) { | |||
| 85 | } | 86 | } |
| 86 | } | 87 | } |
| 87 | 88 | ||
| 88 | static u32 DepthBytesPerPixel(DepthFormat format) { | 89 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, |
| 89 | switch (format) { | 90 | u32 height, u32 block_height) { |
| 90 | case DepthFormat::Z16_UNORM: | ||
| 91 | return 2; | ||
| 92 | case DepthFormat::S8_Z24_UNORM: | ||
| 93 | case DepthFormat::Z24_S8_UNORM: | ||
| 94 | case DepthFormat::Z32_FLOAT: | ||
| 95 | return 4; | ||
| 96 | case DepthFormat::Z32_S8_X24_FLOAT: | ||
| 97 | return 8; | ||
| 98 | default: | ||
| 99 | UNIMPLEMENTED_MSG("Format not implemented"); | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, | ||
| 105 | u32 block_height) { | ||
| 106 | u8* data = Memory::GetPointer(address); | ||
| 107 | u32 bytes_per_pixel = BytesPerPixel(format); | ||
| 108 | |||
| 109 | std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); | 91 | std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); |
| 110 | 92 | CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, | |
| 111 | switch (format) { | 93 | Memory::GetPointer(address), unswizzled_data.data(), true, block_height); |
| 112 | case TextureFormat::DXT1: | ||
| 113 | case TextureFormat::DXT23: | ||
| 114 | case TextureFormat::DXT45: | ||
| 115 | case TextureFormat::DXN1: | ||
| 116 | case TextureFormat::BC7U: | ||
| 117 | // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel | ||
| 118 | // values. | ||
| 119 | CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, | ||
| 120 | unswizzled_data.data(), true, block_height); | ||
| 121 | break; | ||
| 122 | case TextureFormat::A8R8G8B8: | ||
| 123 | case TextureFormat::A2B10G10R10: | ||
| 124 | case TextureFormat::A1B5G5R5: | ||
| 125 | case TextureFormat::B5G6R5: | ||
| 126 | case TextureFormat::R8: | ||
| 127 | case TextureFormat::G8R8: | ||
| 128 | case TextureFormat::R16_G16_B16_A16: | ||
| 129 | case TextureFormat::R32_G32_B32_A32: | ||
| 130 | case TextureFormat::R32_G32: | ||
| 131 | case TextureFormat::R32: | ||
| 132 | case TextureFormat::R16: | ||
| 133 | case TextureFormat::R16_G16: | ||
| 134 | case TextureFormat::BF10GF11RF11: | ||
| 135 | case TextureFormat::ASTC_2D_4X4: | ||
| 136 | case TextureFormat::R32_G32_B32: | ||
| 137 | CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, | ||
| 138 | unswizzled_data.data(), true, block_height); | ||
| 139 | break; | ||
| 140 | default: | ||
| 141 | UNIMPLEMENTED_MSG("Format not implemented"); | ||
| 142 | break; | ||
| 143 | } | ||
| 144 | |||
| 145 | return unswizzled_data; | ||
| 146 | } | ||
| 147 | |||
| 148 | std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height, | ||
| 149 | u32 block_height) { | ||
| 150 | u8* data = Memory::GetPointer(address); | ||
| 151 | u32 bytes_per_pixel = DepthBytesPerPixel(format); | ||
| 152 | |||
| 153 | std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); | ||
| 154 | |||
| 155 | switch (format) { | ||
| 156 | case DepthFormat::Z16_UNORM: | ||
| 157 | case DepthFormat::S8_Z24_UNORM: | ||
| 158 | case DepthFormat::Z24_S8_UNORM: | ||
| 159 | case DepthFormat::Z32_FLOAT: | ||
| 160 | case DepthFormat::Z32_S8_X24_FLOAT: | ||
| 161 | CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, | ||
| 162 | unswizzled_data.data(), true, block_height); | ||
| 163 | break; | ||
| 164 | default: | ||
| 165 | UNIMPLEMENTED_MSG("Format not implemented"); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | |||
| 169 | return unswizzled_data; | 94 | return unswizzled_data; |
| 170 | } | 95 | } |
| 171 | 96 | ||
| @@ -179,6 +104,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 179 | case TextureFormat::DXT23: | 104 | case TextureFormat::DXT23: |
| 180 | case TextureFormat::DXT45: | 105 | case TextureFormat::DXT45: |
| 181 | case TextureFormat::DXN1: | 106 | case TextureFormat::DXN1: |
| 107 | case TextureFormat::DXN2: | ||
| 182 | case TextureFormat::BC7U: | 108 | case TextureFormat::BC7U: |
| 183 | case TextureFormat::ASTC_2D_4X4: | 109 | case TextureFormat::ASTC_2D_4X4: |
| 184 | case TextureFormat::A8R8G8B8: | 110 | case TextureFormat::A8R8G8B8: |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 73a4924d1..1f7b731be 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -13,8 +13,8 @@ namespace Tegra::Texture { | |||
| 13 | /** | 13 | /** |
| 14 | * Unswizzles a swizzled texture without changing its format. | 14 | * Unswizzles a swizzled texture without changing its format. |
| 15 | */ | 15 | */ |
| 16 | std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, | 16 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, |
| 17 | u32 block_height = TICEntry::DefaultBlockHeight); | 17 | u32 height, u32 block_height = TICEntry::DefaultBlockHeight); |
| 18 | 18 | ||
| 19 | /** | 19 | /** |
| 20 | * Unswizzles a swizzled depth texture without changing its format. | 20 | * Unswizzles a swizzled depth texture without changing its format. |