summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/maxwell_3d.h22
-rw-r--r--src/video_core/engines/shader_bytecode.h15
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp33
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h8
-rw-r--r--src/video_core/textures/decoders.cpp2
8 files changed, 103 insertions, 7 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 988a6433e..cc1f90de6 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -478,7 +478,9 @@ public:
478 478
479 u32 depth_write_enabled; 479 u32 depth_write_enabled;
480 480
481 INSERT_PADDING_WORDS(0x8); 481 INSERT_PADDING_WORDS(0x7);
482
483 u32 d3d_cull_mode;
482 484
483 BitField<0, 3, ComparisonOp> depth_test_func; 485 BitField<0, 3, ComparisonOp> depth_test_func;
484 486
@@ -498,7 +500,13 @@ public:
498 u32 enable[NumRenderTargets]; 500 u32 enable[NumRenderTargets];
499 } blend; 501 } blend;
500 502
501 INSERT_PADDING_WORDS(0x2D); 503 INSERT_PADDING_WORDS(0xB);
504
505 union {
506 BitField<4, 1, u32> triangle_rast_flip;
507 } screen_y_control;
508
509 INSERT_PADDING_WORDS(0x21);
502 510
503 u32 vb_element_base; 511 u32 vb_element_base;
504 512
@@ -528,7 +536,12 @@ public:
528 } 536 }
529 } tic; 537 } tic;
530 538
531 INSERT_PADDING_WORDS(0x22); 539 INSERT_PADDING_WORDS(0x21);
540
541 union {
542 BitField<2, 1, u32> coord_origin;
543 BitField<3, 10, u32> enable;
544 } point_coord_replace;
532 545
533 struct { 546 struct {
534 u32 code_address_high; 547 u32 code_address_high;
@@ -818,11 +831,14 @@ ASSERT_REG_POSITION(rt_control, 0x487);
818ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 831ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
819ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 832ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
820ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 833ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
834ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
821ASSERT_REG_POSITION(depth_test_func, 0x4C3); 835ASSERT_REG_POSITION(depth_test_func, 0x4C3);
822ASSERT_REG_POSITION(blend, 0x4CF); 836ASSERT_REG_POSITION(blend, 0x4CF);
837ASSERT_REG_POSITION(screen_y_control, 0x4EB);
823ASSERT_REG_POSITION(vb_element_base, 0x50D); 838ASSERT_REG_POSITION(vb_element_base, 0x50D);
824ASSERT_REG_POSITION(tsc, 0x557); 839ASSERT_REG_POSITION(tsc, 0x557);
825ASSERT_REG_POSITION(tic, 0x55D); 840ASSERT_REG_POSITION(tic, 0x55D);
841ASSERT_REG_POSITION(point_coord_replace, 0x581);
826ASSERT_REG_POSITION(code_address, 0x582); 842ASSERT_REG_POSITION(code_address, 0x582);
827ASSERT_REG_POSITION(draw, 0x585); 843ASSERT_REG_POSITION(draw, 0x585);
828ASSERT_REG_POSITION(index_array, 0x5F2); 844ASSERT_REG_POSITION(index_array, 0x5F2);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index ec9050d3d..3b70efeec 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -329,6 +329,19 @@ union Instruction {
329 } isetp; 329 } isetp;
330 330
331 union { 331 union {
332 BitField<0, 3, u64> pred0;
333 BitField<3, 3, u64> pred3;
334 BitField<12, 3, u64> pred12;
335 BitField<15, 1, u64> neg_pred12;
336 BitField<24, 2, PredOperation> cond;
337 BitField<29, 3, u64> pred29;
338 BitField<32, 1, u64> neg_pred29;
339 BitField<39, 3, u64> pred39;
340 BitField<42, 1, u64> neg_pred39;
341 BitField<45, 2, PredOperation> op;
342 } psetp;
343
344 union {
332 BitField<39, 3, u64> pred39; 345 BitField<39, 3, u64> pred39;
333 BitField<42, 1, u64> neg_pred; 346 BitField<42, 1, u64> neg_pred;
334 BitField<43, 1, u64> neg_a; 347 BitField<43, 1, u64> neg_a;
@@ -646,7 +659,7 @@ private:
646 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 659 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
647 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), 660 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
648 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), 661 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
649 INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), 662 INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
650 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), 663 INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
651 INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), 664 INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
652 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 665 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e516eb1ad..bacb389e1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -686,7 +686,10 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
686 686
687 // Bind the uniform to the sampler. 687 // Bind the uniform to the sampler.
688 GLint uniform = glGetUniformLocation(program, entry.GetName().c_str()); 688 GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
689 ASSERT(uniform != -1); 689 if (uniform == -1) {
690 continue;
691 }
692
690 glProgramUniform1i(program, uniform, current_bindpoint); 693 glProgramUniform1i(program, uniform, current_bindpoint);
691 694
692 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); 695 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
@@ -771,6 +774,16 @@ void RasterizerOpenGL::SyncCullMode() {
771 if (state.cull.enabled) { 774 if (state.cull.enabled) {
772 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 775 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
773 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 776 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
777
778 // If the GPU is configured to flip the rasterized triangles, then we need to flip the
779 // notion of front and back. Note: We flip the triangles when the value of the register is 0
780 // because OpenGL already does it for us.
781 if (regs.screen_y_control.triangle_rast_flip == 0) {
782 if (state.cull.front_face == GL_CCW)
783 state.cull.front_face = GL_CW;
784 else if (state.cull.front_face == GL_CW)
785 state.cull.front_face = GL_CCW;
786 }
774 } 787 }
775} 788}
776 789
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 50469c05c..57d7763ff 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -108,7 +108,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
108 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, 108 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
109 false}, // Z24S8 109 false}, // Z24S8
110 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, 110 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
111 false}, // S8Z24 111 false}, // S8Z24
112 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
112}}; 113}};
113 114
114static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 115static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -191,7 +192,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
191 MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, 192 MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
192 MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, 193 MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
193 MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::Z24S8>, 194 MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::Z24S8>,
194 MortonCopy<true, PixelFormat::S8Z24>, 195 MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>,
195}; 196};
196 197
197static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), 198static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -213,6 +214,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
213 MortonCopy<false, PixelFormat::ABGR8>, 214 MortonCopy<false, PixelFormat::ABGR8>,
214 MortonCopy<false, PixelFormat::Z24S8>, 215 MortonCopy<false, PixelFormat::Z24S8>,
215 MortonCopy<false, PixelFormat::S8Z24>, 216 MortonCopy<false, PixelFormat::S8Z24>,
217 MortonCopy<false, PixelFormat::Z32F>,
216}; 218};
217 219
218// Allocate an uninitialized texture of appropriate size and format for the surface 220// Allocate an uninitialized texture of appropriate size and format for the surface
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 8005a81b8..b4d7f8ebe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -42,6 +42,7 @@ struct SurfaceParams {
42 // DepthStencil formats 42 // DepthStencil formats
43 Z24S8 = 13, 43 Z24S8 = 13,
44 S8Z24 = 14, 44 S8Z24 = 14,
45 Z32F = 15,
45 46
46 MaxDepthStencilFormat, 47 MaxDepthStencilFormat,
47 48
@@ -94,6 +95,7 @@ struct SurfaceParams {
94 4, // ASTC_2D_4X4 95 4, // ASTC_2D_4X4
95 1, // Z24S8 96 1, // Z24S8
96 1, // S8Z24 97 1, // S8Z24
98 1, // Z32F
97 }}; 99 }};
98 100
99 ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); 101 ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -120,6 +122,7 @@ struct SurfaceParams {
120 32, // ASTC_2D_4X4 122 32, // ASTC_2D_4X4
121 32, // Z24S8 123 32, // Z24S8
122 32, // S8Z24 124 32, // S8Z24
125 32, // Z32F
123 }}; 126 }};
124 127
125 ASSERT(static_cast<size_t>(format) < bpp_table.size()); 128 ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -135,6 +138,8 @@ struct SurfaceParams {
135 return PixelFormat::S8Z24; 138 return PixelFormat::S8Z24;
136 case Tegra::DepthFormat::Z24_S8_UNORM: 139 case Tegra::DepthFormat::Z24_S8_UNORM:
137 return PixelFormat::Z24S8; 140 return PixelFormat::Z24S8;
141 case Tegra::DepthFormat::Z32_FLOAT:
142 return PixelFormat::Z32F;
138 default: 143 default:
139 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 144 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
140 UNREACHABLE(); 145 UNREACHABLE();
@@ -235,6 +240,8 @@ struct SurfaceParams {
235 return Tegra::DepthFormat::S8_Z24_UNORM; 240 return Tegra::DepthFormat::S8_Z24_UNORM;
236 case PixelFormat::Z24S8: 241 case PixelFormat::Z24S8:
237 return Tegra::DepthFormat::Z24_S8_UNORM; 242 return Tegra::DepthFormat::Z24_S8_UNORM;
243 case PixelFormat::Z32F:
244 return Tegra::DepthFormat::Z32_FLOAT;
238 default: 245 default:
239 UNREACHABLE(); 246 UNREACHABLE();
240 } 247 }
@@ -284,6 +291,8 @@ struct SurfaceParams {
284 case Tegra::DepthFormat::S8_Z24_UNORM: 291 case Tegra::DepthFormat::S8_Z24_UNORM:
285 case Tegra::DepthFormat::Z24_S8_UNORM: 292 case Tegra::DepthFormat::Z24_S8_UNORM:
286 return ComponentType::UNorm; 293 return ComponentType::UNorm;
294 case Tegra::DepthFormat::Z32_FLOAT:
295 return ComponentType::Float;
287 default: 296 default:
288 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 297 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
289 UNREACHABLE(); 298 UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d5259e0b1..e817aca5a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1213,6 +1213,9 @@ private:
1213 switch (instr.conversion.f2f.rounding) { 1213 switch (instr.conversion.f2f.rounding) {
1214 case Tegra::Shader::F2fRoundingOp::None: 1214 case Tegra::Shader::F2fRoundingOp::None:
1215 break; 1215 break;
1216 case Tegra::Shader::F2fRoundingOp::Round:
1217 op_a = "roundEven(" + op_a + ')';
1218 break;
1216 case Tegra::Shader::F2fRoundingOp::Floor: 1219 case Tegra::Shader::F2fRoundingOp::Floor:
1217 op_a = "floor(" + op_a + ')'; 1220 op_a = "floor(" + op_a + ')';
1218 break; 1221 break;
@@ -1477,6 +1480,36 @@ private:
1477 } 1480 }
1478 break; 1481 break;
1479 } 1482 }
1483 case OpCode::Type::PredicateSetPredicate: {
1484 std::string op_a =
1485 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
1486 std::string op_b =
1487 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
1488
1489 using Tegra::Shader::Pred;
1490 // We can't use the constant predicate as destination.
1491 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
1492
1493 std::string second_pred =
1494 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
1495
1496 std::string combiner = GetPredicateCombiner(instr.psetp.op);
1497
1498 std::string predicate =
1499 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
1500
1501 // Set the primary predicate to the result of Predicate OP SecondPredicate
1502 SetPredicate(instr.psetp.pred3,
1503 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
1504
1505 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
1506 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
1507 // if enabled
1508 SetPredicate(instr.psetp.pred0,
1509 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
1510 }
1511 break;
1512 }
1480 case OpCode::Type::FloatSet: { 1513 case OpCode::Type::FloatSet: {
1481 std::string op_a = instr.fset.neg_a ? "-" : ""; 1514 std::string op_a = instr.fset.neg_a ? "-" : "";
1482 op_a += regs.GetRegisterAsFloat(instr.gpr8); 1515 op_a += regs.GetRegisterAsFloat(instr.gpr8);
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 6b9bb3df1..6ce53bbd9 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -29,6 +29,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
29 switch (attrib.size) { 29 switch (attrib.size) {
30 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 30 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
31 return GL_UNSIGNED_BYTE; 31 return GL_UNSIGNED_BYTE;
32 case Maxwell::VertexAttribute::Size::Size_16_16:
33 return GL_UNSIGNED_SHORT;
34 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
35 return GL_UNSIGNED_INT_2_10_10_10_REV;
32 } 36 }
33 37
34 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); 38 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
@@ -41,6 +45,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
41 switch (attrib.size) { 45 switch (attrib.size) {
42 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 46 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
43 return GL_BYTE; 47 return GL_BYTE;
48 case Maxwell::VertexAttribute::Size::Size_16_16:
49 return GL_SHORT;
50 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
51 return GL_INT_2_10_10_10_REV;
44 } 52 }
45 53
46 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); 54 LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7b06fea3e..d5ab4e4f9 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -78,6 +78,7 @@ static u32 DepthBytesPerPixel(DepthFormat format) {
78 switch (format) { 78 switch (format) {
79 case DepthFormat::S8_Z24_UNORM: 79 case DepthFormat::S8_Z24_UNORM:
80 case DepthFormat::Z24_S8_UNORM: 80 case DepthFormat::Z24_S8_UNORM:
81 case DepthFormat::Z32_FLOAT:
81 return 4; 82 return 4;
82 default: 83 default:
83 UNIMPLEMENTED_MSG("Format not implemented"); 84 UNIMPLEMENTED_MSG("Format not implemented");
@@ -132,6 +133,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
132 switch (format) { 133 switch (format) {
133 case DepthFormat::S8_Z24_UNORM: 134 case DepthFormat::S8_Z24_UNORM:
134 case DepthFormat::Z24_S8_UNORM: 135 case DepthFormat::Z24_S8_UNORM:
136 case DepthFormat::Z32_FLOAT:
135 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, 137 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
136 unswizzled_data.data(), true, block_height); 138 unswizzled_data.data(), true, block_height);
137 break; 139 break;