summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar David Marcec2018-08-11 10:35:47 +1000
committerGravatar David Marcec2018-08-11 10:35:47 +1000
commitb76ddb7647cbb390cce4143d91a1db171b0fa503 (patch)
treea6e2e334e82b035923c41458150604dd5fb31d65 /src/video_core
parentAdded IsUserRegistrationRequestPermitted (diff)
parentMerge pull request #1007 from MerryMage/dynarmic (diff)
downloadyuzu-b76ddb7647cbb390cce4143d91a1db171b0fa503.tar.gz
yuzu-b76ddb7647cbb390cce4143d91a1db171b0fa503.tar.xz
yuzu-b76ddb7647cbb390cce4143d91a1db171b0fa503.zip
Merge remote-tracking branch 'origin/master' into better-account
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp15
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h15
-rw-r--r--src/video_core/gpu.cpp1
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp82
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h147
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h7
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/textures/decoders.cpp86
-rw-r--r--src/video_core/textures/decoders.h4
14 files changed, 165 insertions, 248 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5c0ae8009..a46ed4bd7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -23,12 +23,17 @@ Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager&
23 : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {} 23 : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {}
24 24
25void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 25void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
26 auto macro_code = uploaded_macros.find(method); 26 // Reset the current macro.
27 executing_macro = 0;
28
27 // The requested macro must have been uploaded already. 29 // The requested macro must have been uploaded already.
28 ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method); 30 auto macro_code = uploaded_macros.find(method);
31 if (macro_code == uploaded_macros.end()) {
32 LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
33 return;
34 }
29 35
30 // Reset the current macro and execute it. 36 // Execute the current macro.
31 executing_macro = 0;
32 macro_interpreter.Execute(macro_code->second, std::move(parameters)); 37 macro_interpreter.Execute(macro_code->second, std::move(parameters));
33} 38}
34 39
@@ -238,6 +243,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
238 243
239 auto& buffer = shader.const_buffers[bind_data.index]; 244 auto& buffer = shader.const_buffers[bind_data.index];
240 245
246 ASSERT(bind_data.index < Regs::MaxConstBuffers);
247
241 buffer.enabled = bind_data.valid.Value() != 0; 248 buffer.enabled = bind_data.valid.Value() != 0;
242 buffer.index = bind_data.index; 249 buffer.index = bind_data.index;
243 buffer.address = regs.const_buffer.BufferAddress(); 250 buffer.address = regs.const_buffer.BufferAddress();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4d0ff96a5..0506ac8fe 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -44,7 +44,7 @@ public:
44 static constexpr size_t MaxShaderProgram = 6; 44 static constexpr size_t MaxShaderProgram = 6;
45 static constexpr size_t MaxShaderStage = 5; 45 static constexpr size_t MaxShaderStage = 5;
46 // Maximum number of const buffers per shader stage. 46 // Maximum number of const buffers per shader stage.
47 static constexpr size_t MaxConstBuffers = 16; 47 static constexpr size_t MaxConstBuffers = 18;
48 48
49 enum class QueryMode : u32 { 49 enum class QueryMode : u32 {
50 Write = 0, 50 Write = 0,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c7e3fb4b1..3d4557b7e 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -78,6 +78,8 @@ union Attribute {
78 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval 78 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
79 // shader. 79 // shader.
80 TessCoordInstanceIDVertexID = 47, 80 TessCoordInstanceIDVertexID = 47,
81 // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
82 Unknown_63 = 63,
81 }; 83 };
82 84
83 union { 85 union {
@@ -254,20 +256,15 @@ union Instruction {
254 BitField<56, 1, u64> invert_b; 256 BitField<56, 1, u64> invert_b;
255 } lop32i; 257 } lop32i;
256 258
257 float GetImm20_19() const { 259 u32 GetImm20_19() const {
258 float result{};
259 u32 imm{static_cast<u32>(imm20_19)}; 260 u32 imm{static_cast<u32>(imm20_19)};
260 imm <<= 12; 261 imm <<= 12;
261 imm |= negate_imm ? 0x80000000 : 0; 262 imm |= negate_imm ? 0x80000000 : 0;
262 std::memcpy(&result, &imm, sizeof(imm)); 263 return imm;
263 return result;
264 } 264 }
265 265
266 float GetImm20_32() const { 266 u32 GetImm20_32() const {
267 float result{}; 267 return static_cast<u32>(imm20_32);
268 s32 imm{static_cast<s32>(imm20_32)};
269 std::memcpy(&result, &imm, sizeof(imm));
270 return result;
271 } 268 }
272 269
273 s32 GetSignedImm20_20() const { 270 s32 GetSignedImm20_20() const {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index b2a83ce0b..4ff4d71c5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -42,6 +42,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
42 case RenderTargetFormat::RGB10_A2_UNORM: 42 case RenderTargetFormat::RGB10_A2_UNORM:
43 case RenderTargetFormat::BGRA8_UNORM: 43 case RenderTargetFormat::BGRA8_UNORM:
44 case RenderTargetFormat::R32_FLOAT: 44 case RenderTargetFormat::R32_FLOAT:
45 case RenderTargetFormat::R11G11B10_FLOAT:
45 return 4; 46 return 4;
46 default: 47 default:
47 UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); 48 UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 440505c9d..874eddd78 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -34,6 +34,7 @@ enum class RenderTargetFormat : u32 {
34 RG16_FLOAT = 0xDE, 34 RG16_FLOAT = 0xDE,
35 R11G11B10_FLOAT = 0xE0, 35 R11G11B10_FLOAT = 0xE0,
36 R32_FLOAT = 0xE5, 36 R32_FLOAT = 0xE5,
37 B5G6R5_UNORM = 0xE8,
37 R16_FLOAT = 0xF2, 38 R16_FLOAT = 0xF2,
38 R8_UNORM = 0xF3, 39 R8_UNORM = 0xF3,
39}; 40};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c2a931469..8360feb5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -161,7 +161,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
161 // assume every shader uses them all. 161 // assume every shader uses them all.
162 for (unsigned index = 0; index < 16; ++index) { 162 for (unsigned index = 0; index < 16; ++index) {
163 auto& attrib = regs.vertex_attrib_format[index]; 163 auto& attrib = regs.vertex_attrib_format[index];
164 LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 164 LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
165 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 165 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
166 attrib.offset.Value(), attrib.IsNormalized()); 166 attrib.offset.Value(), attrib.IsNormalized());
167 167
@@ -324,11 +324,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
324 bool using_depth_fb) { 324 bool using_depth_fb) {
325 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 325 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
326 326
327 if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
328 LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
329 using_color_fb = false;
330 }
331
327 // TODO(bunnei): Implement this 332 // TODO(bunnei): Implement this
328 const bool has_stencil = false; 333 const bool has_stencil = false;
329 334
330 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
331
332 const bool write_color_fb = 335 const bool write_color_fb =
333 state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || 336 state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
334 state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; 337 state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
@@ -341,9 +344,10 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
341 Surface depth_surface; 344 Surface depth_surface;
342 MathUtil::Rectangle<u32> surfaces_rect; 345 MathUtil::Rectangle<u32> surfaces_rect;
343 std::tie(color_surface, depth_surface, surfaces_rect) = 346 std::tie(color_surface, depth_surface, surfaces_rect) =
344 res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); 347 res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
345 348
346 MathUtil::Rectangle<u32> draw_rect{ 349 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
350 const MathUtil::Rectangle<u32> draw_rect{
347 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, 351 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
348 surfaces_rect.left, surfaces_rect.right)), // Left 352 surfaces_rect.left, surfaces_rect.right)), // Left
349 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, 353 static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
@@ -659,7 +663,10 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
659 auto& buffer_draw_state = 663 auto& buffer_draw_state =
660 state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; 664 state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
661 665
662 ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer"); 666 if (!buffer.enabled) {
667 continue;
668 }
669
663 buffer_draw_state.enabled = true; 670 buffer_draw_state.enabled = true;
664 buffer_draw_state.bindpoint = current_bindpoint + bindpoint; 671 buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
665 672
@@ -804,9 +811,7 @@ void RasterizerOpenGL::SyncClipCoef() {
804void RasterizerOpenGL::SyncCullMode() { 811void RasterizerOpenGL::SyncCullMode() {
805 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 812 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
806 813
807 // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions 814 state.cull.enabled = regs.cull.enabled != 0;
808 // state.cull.enabled = regs.cull.enabled != 0;
809 state.cull.enabled = false;
810 815
811 if (state.cull.enabled) { 816 if (state.cull.enabled) {
812 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 817 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 257aa9571..9fb734b77 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -109,6 +109,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
109 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, 109 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
110 true}, // DXT45 110 true}, // DXT45
111 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 111 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
112 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
113 true}, // DXN2UNORM
114 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
112 {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, 115 {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
113 true}, // BC7U 116 true}, // BC7U
114 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 117 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
@@ -180,36 +183,49 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
180 return {0, actual_height, width, 0}; 183 return {0, actual_height, width, 0};
181} 184}
182 185
186/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
187static bool IsFormatBCn(PixelFormat format) {
188 switch (format) {
189 case PixelFormat::DXT1:
190 case PixelFormat::DXT23:
191 case PixelFormat::DXT45:
192 case PixelFormat::DXN1:
193 case PixelFormat::DXN2SNORM:
194 case PixelFormat::DXN2UNORM:
195 case PixelFormat::BC7U:
196 return true;
197 }
198 return false;
199}
200
183template <bool morton_to_gl, PixelFormat format> 201template <bool morton_to_gl, PixelFormat format>
184void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { 202void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer,
203 Tegra::GPUVAddr addr) {
185 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; 204 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
186 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); 205 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
187 const auto& gpu = Core::System::GetInstance().GPU(); 206 const auto& gpu = Core::System::GetInstance().GPU();
188 207
189 if (morton_to_gl) { 208 if (morton_to_gl) {
190 if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { 209 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
191 auto data = Tegra::Texture::UnswizzleTexture( 210 // pixel values.
192 *gpu.memory_manager->GpuToCpuAddress(addr), 211 const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
193 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); 212 const std::vector<u8> data =
194 std::memcpy(gl_buffer, data.data(), data.size()); 213 Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size,
195 } else { 214 bytes_per_pixel, stride, height, block_height);
196 auto data = Tegra::Texture::UnswizzleDepthTexture( 215 const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
197 *gpu.memory_manager->GpuToCpuAddress(addr), 216 gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
198 SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height);
199 std::memcpy(gl_buffer, data.data(), data.size());
200 }
201 } else { 217 } else {
202 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should 218 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
203 // check the configuration for this and perform more generic un/swizzle 219 // check the configuration for this and perform more generic un/swizzle
204 LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); 220 LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
205 VideoCore::MortonCopyPixels128( 221 VideoCore::MortonCopyPixels128(
206 stride, height, bytes_per_pixel, gl_bytes_per_pixel, 222 stride, height, bytes_per_pixel, gl_bytes_per_pixel,
207 Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, 223 Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(),
208 morton_to_gl); 224 morton_to_gl);
209 } 225 }
210} 226}
211 227
212static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), 228static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
213 SurfaceParams::MaxPixelFormat> 229 SurfaceParams::MaxPixelFormat>
214 morton_to_gl_fns = { 230 morton_to_gl_fns = {
215 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, 231 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
@@ -218,6 +234,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
218 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, 234 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
219 MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, 235 MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
220 MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, 236 MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
237 MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>,
221 MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, 238 MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
222 MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, 239 MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>,
223 MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, 240 MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>,
@@ -231,7 +248,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
231 MortonCopy<true, PixelFormat::Z32FS8>, 248 MortonCopy<true, PixelFormat::Z32FS8>,
232}; 249};
233 250
234static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), 251static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
235 SurfaceParams::MaxPixelFormat> 252 SurfaceParams::MaxPixelFormat>
236 gl_to_morton_fns = { 253 gl_to_morton_fns = {
237 MortonCopy<false, PixelFormat::ABGR8>, 254 MortonCopy<false, PixelFormat::ABGR8>,
@@ -242,7 +259,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
242 MortonCopy<false, PixelFormat::RGBA16F>, 259 MortonCopy<false, PixelFormat::RGBA16F>,
243 MortonCopy<false, PixelFormat::R11FG11FB10F>, 260 MortonCopy<false, PixelFormat::R11FG11FB10F>,
244 MortonCopy<false, PixelFormat::RGBA32UI>, 261 MortonCopy<false, PixelFormat::RGBA32UI>,
245 // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported 262 // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
263 // supported
264 nullptr,
265 nullptr,
246 nullptr, 266 nullptr,
247 nullptr, 267 nullptr,
248 nullptr, 268 nullptr,
@@ -447,22 +467,24 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64
447void CachedSurface::LoadGLBuffer() { 467void CachedSurface::LoadGLBuffer() {
448 ASSERT(params.type != SurfaceType::Fill); 468 ASSERT(params.type != SurfaceType::Fill);
449 469
450 u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); 470 const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
451 471
452 ASSERT(texture_src_data); 472 ASSERT(texture_src_data);
453 473
454 gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); 474 const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
475 const u32 copy_size = params.width * params.height * bytes_per_pixel;
455 476
456 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); 477 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
457 478
458 if (!params.is_tiled) { 479 if (params.is_tiled) {
459 const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; 480 gl_buffer.resize(copy_size);
460 481
461 std::memcpy(gl_buffer.data(), texture_src_data,
462 bytes_per_pixel * params.width * params.height);
463 } else {
464 morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( 482 morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
465 params.width, params.block_height, params.height, gl_buffer.data(), params.addr); 483 params.width, params.block_height, params.height, gl_buffer, params.addr);
484 } else {
485 const u8* const texture_src_data_end = texture_src_data + copy_size;
486
487 gl_buffer.assign(texture_src_data, texture_src_data_end);
466 } 488 }
467 489
468 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); 490 ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
@@ -485,7 +507,7 @@ void CachedSurface::FlushGLBuffer() {
485 std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); 507 std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
486 } else { 508 } else {
487 gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( 509 gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
488 params.width, params.block_height, params.height, gl_buffer.data(), params.addr); 510 params.width, params.block_height, params.height, gl_buffer, params.addr);
489 } 511 }
490} 512}
491 513
@@ -600,8 +622,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
600 return GetSurface(SurfaceParams::CreateForTexture(config)); 622 return GetSurface(SurfaceParams::CreateForTexture(config));
601} 623}
602 624
603SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( 625SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
604 bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { 626 bool using_depth_fb) {
605 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 627 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
606 628
607 // TODO(bunnei): This is hard corded to use just the first render buffer 629 // TODO(bunnei): This is hard corded to use just the first render buffer
@@ -757,10 +779,12 @@ void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*
757} 779}
758 780
759void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { 781void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
760 for (const auto& pair : surface_cache) { 782 for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) {
761 const auto& surface{pair.second}; 783 const auto& surface{iter->second};
762 const auto& params{surface->GetSurfaceParams()}; 784 const auto& params{surface->GetSurfaceParams()};
763 785
786 ++iter;
787
764 if (params.IsOverlappingRegion(addr, size)) { 788 if (params.IsOverlappingRegion(addr, size)) {
765 UnregisterSurface(surface); 789 UnregisterSurface(surface);
766 } 790 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0c6652c7a..829a76dfe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -35,31 +35,33 @@ struct SurfaceParams {
35 DXT23 = 9, 35 DXT23 = 9,
36 DXT45 = 10, 36 DXT45 = 10,
37 DXN1 = 11, // This is also known as BC4 37 DXN1 = 11, // This is also known as BC4
38 BC7U = 12, 38 DXN2UNORM = 12,
39 ASTC_2D_4X4 = 13, 39 DXN2SNORM = 13,
40 G8R8 = 14, 40 BC7U = 14,
41 BGRA8 = 15, 41 ASTC_2D_4X4 = 15,
42 RGBA32F = 16, 42 G8R8 = 16,
43 RG32F = 17, 43 BGRA8 = 17,
44 R32F = 18, 44 RGBA32F = 18,
45 R16F = 19, 45 RG32F = 19,
46 R16UNORM = 20, 46 R32F = 20,
47 RG16 = 21, 47 R16F = 21,
48 RG16F = 22, 48 R16UNORM = 22,
49 RG16UI = 23, 49 RG16 = 23,
50 RG16I = 24, 50 RG16F = 24,
51 RG16S = 25, 51 RG16UI = 25,
52 RGB32F = 26, 52 RG16I = 26,
53 SRGBA8 = 27, 53 RG16S = 27,
54 RGB32F = 28,
55 SRGBA8 = 29,
54 56
55 MaxColorFormat, 57 MaxColorFormat,
56 58
57 // DepthStencil formats 59 // DepthStencil formats
58 Z24S8 = 28, 60 Z24S8 = 30,
59 S8Z24 = 29, 61 S8Z24 = 31,
60 Z32F = 30, 62 Z32F = 32,
61 Z16 = 31, 63 Z16 = 33,
62 Z32FS8 = 32, 64 Z32FS8 = 34,
63 65
64 MaxDepthStencilFormat, 66 MaxDepthStencilFormat,
65 67
@@ -109,6 +111,8 @@ struct SurfaceParams {
109 4, // DXT23 111 4, // DXT23
110 4, // DXT45 112 4, // DXT45
111 4, // DXN1 113 4, // DXN1
114 4, // DXN2UNORM
115 4, // DXN2SNORM
112 4, // BC7U 116 4, // BC7U
113 4, // ASTC_2D_4X4 117 4, // ASTC_2D_4X4
114 1, // G8R8 118 1, // G8R8
@@ -153,6 +157,8 @@ struct SurfaceParams {
153 128, // DXT23 157 128, // DXT23
154 128, // DXT45 158 128, // DXT45
155 64, // DXN1 159 64, // DXN1
160 128, // DXN2UNORM
161 128, // DXN2SNORM
156 128, // BC7U 162 128, // BC7U
157 32, // ASTC_2D_4X4 163 32, // ASTC_2D_4X4
158 16, // G8R8 164 16, // G8R8
@@ -221,6 +227,8 @@ struct SurfaceParams {
221 return PixelFormat::RG32F; 227 return PixelFormat::RG32F;
222 case Tegra::RenderTargetFormat::R11G11B10_FLOAT: 228 case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
223 return PixelFormat::R11FG11FB10F; 229 return PixelFormat::R11FG11FB10F;
230 case Tegra::RenderTargetFormat::B5G6R5_UNORM:
231 return PixelFormat::B5G6R5;
224 case Tegra::RenderTargetFormat::RGBA32_UINT: 232 case Tegra::RenderTargetFormat::RGBA32_UINT:
225 return PixelFormat::RGBA32UI; 233 return PixelFormat::RGBA32UI;
226 case Tegra::RenderTargetFormat::R8_UNORM: 234 case Tegra::RenderTargetFormat::R8_UNORM:
@@ -303,6 +311,16 @@ struct SurfaceParams {
303 return PixelFormat::DXT45; 311 return PixelFormat::DXT45;
304 case Tegra::Texture::TextureFormat::DXN1: 312 case Tegra::Texture::TextureFormat::DXN1:
305 return PixelFormat::DXN1; 313 return PixelFormat::DXN1;
314 case Tegra::Texture::TextureFormat::DXN2:
315 switch (component_type) {
316 case Tegra::Texture::ComponentType::UNORM:
317 return PixelFormat::DXN2UNORM;
318 case Tegra::Texture::ComponentType::SNORM:
319 return PixelFormat::DXN2SNORM;
320 }
321 LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
322 static_cast<u32>(component_type));
323 UNREACHABLE();
306 case Tegra::Texture::TextureFormat::BC7U: 324 case Tegra::Texture::TextureFormat::BC7U:
307 return PixelFormat::BC7U; 325 return PixelFormat::BC7U;
308 case Tegra::Texture::TextureFormat::ASTC_2D_4X4: 326 case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
@@ -330,89 +348,6 @@ struct SurfaceParams {
330 } 348 }
331 } 349 }
332 350
333 static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
334 // TODO(Subv): Properly implement this
335 switch (format) {
336 case PixelFormat::ABGR8:
337 case PixelFormat::SRGBA8:
338 return Tegra::Texture::TextureFormat::A8R8G8B8;
339 case PixelFormat::B5G6R5:
340 return Tegra::Texture::TextureFormat::B5G6R5;
341 case PixelFormat::A2B10G10R10:
342 return Tegra::Texture::TextureFormat::A2B10G10R10;
343 case PixelFormat::A1B5G5R5:
344 return Tegra::Texture::TextureFormat::A1B5G5R5;
345 case PixelFormat::R8:
346 return Tegra::Texture::TextureFormat::R8;
347 case PixelFormat::G8R8:
348 return Tegra::Texture::TextureFormat::G8R8;
349 case PixelFormat::RGBA16F:
350 return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
351 case PixelFormat::R11FG11FB10F:
352 return Tegra::Texture::TextureFormat::BF10GF11RF11;
353 case PixelFormat::RGBA32UI:
354 return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
355 case PixelFormat::DXT1:
356 return Tegra::Texture::TextureFormat::DXT1;
357 case PixelFormat::DXT23:
358 return Tegra::Texture::TextureFormat::DXT23;
359 case PixelFormat::DXT45:
360 return Tegra::Texture::TextureFormat::DXT45;
361 case PixelFormat::DXN1:
362 return Tegra::Texture::TextureFormat::DXN1;
363 case PixelFormat::BC7U:
364 return Tegra::Texture::TextureFormat::BC7U;
365 case PixelFormat::ASTC_2D_4X4:
366 return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
367 case PixelFormat::BGRA8:
368 // TODO(bunnei): This is fine for unswizzling (since we just need the right component
369 // sizes), but could be a bug if we used this function in different ways.
370 return Tegra::Texture::TextureFormat::A8R8G8B8;
371 case PixelFormat::RGBA32F:
372 return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
373 case PixelFormat::RGB32F:
374 return Tegra::Texture::TextureFormat::R32_G32_B32;
375 case PixelFormat::RG32F:
376 return Tegra::Texture::TextureFormat::R32_G32;
377 case PixelFormat::R32F:
378 return Tegra::Texture::TextureFormat::R32;
379 case PixelFormat::R16F:
380 case PixelFormat::R16UNORM:
381 return Tegra::Texture::TextureFormat::R16;
382 case PixelFormat::Z32F:
383 return Tegra::Texture::TextureFormat::ZF32;
384 case PixelFormat::Z24S8:
385 return Tegra::Texture::TextureFormat::Z24S8;
386 case PixelFormat::RG16F:
387 case PixelFormat::RG16:
388 case PixelFormat::RG16UI:
389 case PixelFormat::RG16I:
390 case PixelFormat::RG16S:
391 return Tegra::Texture::TextureFormat::R16_G16;
392 default:
393 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
394 UNREACHABLE();
395 }
396 }
397
398 static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
399 switch (format) {
400 case PixelFormat::S8Z24:
401 return Tegra::DepthFormat::S8_Z24_UNORM;
402 case PixelFormat::Z24S8:
403 return Tegra::DepthFormat::Z24_S8_UNORM;
404 case PixelFormat::Z32F:
405 return Tegra::DepthFormat::Z32_FLOAT;
406 case PixelFormat::Z16:
407 return Tegra::DepthFormat::Z16_UNORM;
408 case PixelFormat::Z32FS8:
409 return Tegra::DepthFormat::Z32_S8_X24_FLOAT;
410 default:
411 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
412 UNREACHABLE();
413 }
414 }
415
416 static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { 351 static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
417 // TODO(Subv): Implement more component types 352 // TODO(Subv): Implement more component types
418 switch (type) { 353 switch (type) {
@@ -441,6 +376,7 @@ struct SurfaceParams {
441 case Tegra::RenderTargetFormat::RGB10_A2_UNORM: 376 case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
442 case Tegra::RenderTargetFormat::R8_UNORM: 377 case Tegra::RenderTargetFormat::R8_UNORM:
443 case Tegra::RenderTargetFormat::RG16_UNORM: 378 case Tegra::RenderTargetFormat::RG16_UNORM:
379 case Tegra::RenderTargetFormat::B5G6R5_UNORM:
444 return ComponentType::UNorm; 380 return ComponentType::UNorm;
445 case Tegra::RenderTargetFormat::RG16_SNORM: 381 case Tegra::RenderTargetFormat::RG16_SNORM:
446 return ComponentType::SNorm; 382 return ComponentType::SNorm;
@@ -612,8 +548,7 @@ public:
612 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); 548 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
613 549
614 /// Get the color and depth surfaces based on the framebuffer configuration 550 /// Get the color and depth surfaces based on the framebuffer configuration
615 SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, 551 SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
616 const MathUtil::Rectangle<s32>& viewport);
617 552
618 /// Flushes the surface to Switch memory 553 /// Flushes the surface to Switch memory
619 void FlushSurface(const Surface& surface); 554 void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e3217db81..32f06f409 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -507,6 +507,8 @@ private:
507 507
508 /// Build the GLSL register list. 508 /// Build the GLSL register list.
509 void BuildRegisterList() { 509 void BuildRegisterList() {
510 regs.reserve(Register::NumRegisters);
511
510 for (size_t index = 0; index < Register::NumRegisters; ++index) { 512 for (size_t index = 0; index < Register::NumRegisters; ++index) {
511 regs.emplace_back(index, suffix); 513 regs.emplace_back(index, suffix);
512 } 514 }
@@ -523,6 +525,11 @@ private:
523 // shader. 525 // shader.
524 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); 526 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
525 return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))"; 527 return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
528 case Attribute::Index::Unknown_63:
529 // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
530 LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63");
531 UNREACHABLE();
532 break;
526 default: 533 default:
527 const u32 index{static_cast<u32>(attribute) - 534 const u32 index{static_cast<u32>(attribute) -
528 static_cast<u32>(Attribute::Index::Attribute_0)}; 535 static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -534,6 +541,8 @@ private:
534 LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); 541 LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
535 UNREACHABLE(); 542 UNREACHABLE();
536 } 543 }
544
545 return "vec4(0, 0, 0, 0)";
537 } 546 }
538 547
539 /// Generates code representing an output attribute register. 548 /// Generates code representing an output attribute register.
@@ -602,12 +611,12 @@ private:
602 611
603 /// Generates code representing a 19-bit immediate value 612 /// Generates code representing a 19-bit immediate value
604 static std::string GetImmediate19(const Instruction& instr) { 613 static std::string GetImmediate19(const Instruction& instr) {
605 return std::to_string(instr.alu.GetImm20_19()); 614 return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19());
606 } 615 }
607 616
608 /// Generates code representing a 32-bit immediate value 617 /// Generates code representing a 32-bit immediate value
609 static std::string GetImmediate32(const Instruction& instr) { 618 static std::string GetImmediate32(const Instruction& instr) {
610 return std::to_string(instr.alu.GetImm20_32()); 619 return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32());
611 } 620 }
612 621
613 /// Generates code representing a texture sampler. 622 /// Generates code representing a texture sampler.
@@ -650,16 +659,17 @@ private:
650 * @param instr Instruction to generate the if condition for. 659 * @param instr Instruction to generate the if condition for.
651 * @returns string containing the predicate condition. 660 * @returns string containing the predicate condition.
652 */ 661 */
653 std::string GetPredicateCondition(u64 index, bool negate) const { 662 std::string GetPredicateCondition(u64 index, bool negate) {
654 using Tegra::Shader::Pred; 663 using Tegra::Shader::Pred;
655 std::string variable; 664 std::string variable;
656 665
657 // Index 7 is used as an 'Always True' condition. 666 // Index 7 is used as an 'Always True' condition.
658 if (index == static_cast<u64>(Pred::UnusedIndex)) 667 if (index == static_cast<u64>(Pred::UnusedIndex)) {
659 variable = "true"; 668 variable = "true";
660 else 669 } else {
661 variable = 'p' + std::to_string(index) + '_' + suffix; 670 variable = 'p' + std::to_string(index) + '_' + suffix;
662 671 declr_predicates.insert(variable);
672 }
663 if (negate) { 673 if (negate) {
664 return "!(" + variable + ')'; 674 return "!(" + variable + ')';
665 } 675 }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 24b1d956b..5c7b636e4 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -7,6 +7,10 @@
7#include <array> 7#include <array>
8#include <glad/glad.h> 8#include <glad/glad.h>
9 9
10#include "video_core/engines/maxwell_3d.h"
11
12using Regs = Tegra::Engines::Maxwell3D::Regs;
13
10namespace TextureUnits { 14namespace TextureUnits {
11 15
12struct TextureUnit { 16struct TextureUnit {
@@ -120,7 +124,7 @@ public:
120 GLuint bindpoint; 124 GLuint bindpoint;
121 GLuint ssbo; 125 GLuint ssbo;
122 }; 126 };
123 std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{}; 127 std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
124 } draw; 128 } draw;
125 129
126 struct { 130 struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 16b1bd606..c439446b1 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -27,9 +27,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
27 case Maxwell::VertexAttribute::Type::UnsignedNorm: { 27 case Maxwell::VertexAttribute::Type::UnsignedNorm: {
28 28
29 switch (attrib.size) { 29 switch (attrib.size) {
30 case Maxwell::VertexAttribute::Size::Size_8_8:
30 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 31 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
31 return GL_UNSIGNED_BYTE; 32 return GL_UNSIGNED_BYTE;
32 case Maxwell::VertexAttribute::Size::Size_16_16: 33 case Maxwell::VertexAttribute::Size::Size_16_16:
34 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
33 return GL_UNSIGNED_SHORT; 35 return GL_UNSIGNED_SHORT;
34 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 36 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
35 return GL_UNSIGNED_INT_2_10_10_10_REV; 37 return GL_UNSIGNED_INT_2_10_10_10_REV;
@@ -43,6 +45,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
43 case Maxwell::VertexAttribute::Type::SignedNorm: { 45 case Maxwell::VertexAttribute::Type::SignedNorm: {
44 46
45 switch (attrib.size) { 47 switch (attrib.size) {
48 case Maxwell::VertexAttribute::Size::Size_32_32_32:
49 return GL_INT;
50 case Maxwell::VertexAttribute::Size::Size_8_8:
46 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 51 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
47 return GL_BYTE; 52 return GL_BYTE;
48 case Maxwell::VertexAttribute::Size::Size_16_16: 53 case Maxwell::VertexAttribute::Size::Size_16_16:
@@ -84,6 +89,8 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
84 89
85inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { 90inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
86 switch (topology) { 91 switch (topology) {
92 case Maxwell::PrimitiveTopology::Points:
93 return GL_POINTS;
87 case Maxwell::PrimitiveTopology::Triangles: 94 case Maxwell::PrimitiveTopology::Triangles:
88 return GL_TRIANGLES; 95 return GL_TRIANGLES;
89 case Maxwell::PrimitiveTopology::TriangleStrip: 96 case Maxwell::PrimitiveTopology::TriangleStrip:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bf9131193..899865e3b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -430,7 +430,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
430 break; 430 break;
431 case GL_DEBUG_SEVERITY_NOTIFICATION: 431 case GL_DEBUG_SEVERITY_NOTIFICATION:
432 case GL_DEBUG_SEVERITY_LOW: 432 case GL_DEBUG_SEVERITY_LOW:
433 LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); 433 LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message);
434 break; 434 break;
435 } 435 }
436} 436}
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 65db84ad3..70746a34e 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) {
54 return 8; 54 return 8;
55 case TextureFormat::DXT23: 55 case TextureFormat::DXT23:
56 case TextureFormat::DXT45: 56 case TextureFormat::DXT45:
57 case TextureFormat::DXN2:
57 case TextureFormat::BC7U: 58 case TextureFormat::BC7U:
58 // In this case a 'pixel' actually refers to a 4x4 tile. 59 // In this case a 'pixel' actually refers to a 4x4 tile.
59 return 16; 60 return 16;
@@ -85,87 +86,11 @@ u32 BytesPerPixel(TextureFormat format) {
85 } 86 }
86} 87}
87 88
88static u32 DepthBytesPerPixel(DepthFormat format) { 89std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
89 switch (format) { 90 u32 height, u32 block_height) {
90 case DepthFormat::Z16_UNORM:
91 return 2;
92 case DepthFormat::S8_Z24_UNORM:
93 case DepthFormat::Z24_S8_UNORM:
94 case DepthFormat::Z32_FLOAT:
95 return 4;
96 case DepthFormat::Z32_S8_X24_FLOAT:
97 return 8;
98 default:
99 UNIMPLEMENTED_MSG("Format not implemented");
100 break;
101 }
102}
103
104std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
105 u32 block_height) {
106 u8* data = Memory::GetPointer(address);
107 u32 bytes_per_pixel = BytesPerPixel(format);
108
109 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); 91 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
110 92 CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
111 switch (format) { 93 Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
112 case TextureFormat::DXT1:
113 case TextureFormat::DXT23:
114 case TextureFormat::DXT45:
115 case TextureFormat::DXN1:
116 case TextureFormat::BC7U:
117 // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
118 // values.
119 CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
120 unswizzled_data.data(), true, block_height);
121 break;
122 case TextureFormat::A8R8G8B8:
123 case TextureFormat::A2B10G10R10:
124 case TextureFormat::A1B5G5R5:
125 case TextureFormat::B5G6R5:
126 case TextureFormat::R8:
127 case TextureFormat::G8R8:
128 case TextureFormat::R16_G16_B16_A16:
129 case TextureFormat::R32_G32_B32_A32:
130 case TextureFormat::R32_G32:
131 case TextureFormat::R32:
132 case TextureFormat::R16:
133 case TextureFormat::R16_G16:
134 case TextureFormat::BF10GF11RF11:
135 case TextureFormat::ASTC_2D_4X4:
136 case TextureFormat::R32_G32_B32:
137 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
138 unswizzled_data.data(), true, block_height);
139 break;
140 default:
141 UNIMPLEMENTED_MSG("Format not implemented");
142 break;
143 }
144
145 return unswizzled_data;
146}
147
148std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
149 u32 block_height) {
150 u8* data = Memory::GetPointer(address);
151 u32 bytes_per_pixel = DepthBytesPerPixel(format);
152
153 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
154
155 switch (format) {
156 case DepthFormat::Z16_UNORM:
157 case DepthFormat::S8_Z24_UNORM:
158 case DepthFormat::Z24_S8_UNORM:
159 case DepthFormat::Z32_FLOAT:
160 case DepthFormat::Z32_S8_X24_FLOAT:
161 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
162 unswizzled_data.data(), true, block_height);
163 break;
164 default:
165 UNIMPLEMENTED_MSG("Format not implemented");
166 break;
167 }
168
169 return unswizzled_data; 94 return unswizzled_data;
170} 95}
171 96
@@ -179,6 +104,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
179 case TextureFormat::DXT23: 104 case TextureFormat::DXT23:
180 case TextureFormat::DXT45: 105 case TextureFormat::DXT45:
181 case TextureFormat::DXN1: 106 case TextureFormat::DXN1:
107 case TextureFormat::DXN2:
182 case TextureFormat::BC7U: 108 case TextureFormat::BC7U:
183 case TextureFormat::ASTC_2D_4X4: 109 case TextureFormat::ASTC_2D_4X4:
184 case TextureFormat::A8R8G8B8: 110 case TextureFormat::A8R8G8B8:
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 73a4924d1..1f7b731be 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -13,8 +13,8 @@ namespace Tegra::Texture {
13/** 13/**
14 * Unswizzles a swizzled texture without changing its format. 14 * Unswizzles a swizzled texture without changing its format.
15 */ 15 */
16std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, 16std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
17 u32 block_height = TICEntry::DefaultBlockHeight); 17 u32 height, u32 block_height = TICEntry::DefaultBlockHeight);
18 18
19/** 19/**
20 * Unswizzles a swizzled depth texture without changing its format. 20 * Unswizzles a swizzled depth texture without changing its format.