summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/shader_bytecode.h11
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp288
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp2
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h142
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
15 files changed, 320 insertions, 224 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c9bc83cd7..eba42deb4 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -911,14 +911,9 @@ union Instruction {
911 } fadd32i; 911 } fadd32i;
912 912
913 union { 913 union {
914 BitField<20, 8, u64> shift_position; 914 BitField<40, 1, u64> brev;
915 BitField<28, 8, u64> shift_length; 915 BitField<47, 1, u64> rd_cc;
916 BitField<48, 1, u64> negate_b; 916 BitField<48, 1, u64> is_signed;
917 BitField<49, 1, u64> negate_a;
918
919 u64 GetLeftShiftValue() const {
920 return 32 - (shift_position + shift_length);
921 }
922 } bfe; 917 } bfe;
923 918
924 union { 919 union {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ba8c9d665..64acb17df 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
39 RGBA32_FLOAT = 0xC0, 39 RGBA32_FLOAT = 0xC0,
40 RGBA32_UINT = 0xC2, 40 RGBA32_UINT = 0xC2,
41 RGBA16_UNORM = 0xC6, 41 RGBA16_UNORM = 0xC6,
42 RGBA16_SNORM = 0xC7,
42 RGBA16_UINT = 0xC9, 43 RGBA16_UINT = 0xC9,
43 RGBA16_FLOAT = 0xCA, 44 RGBA16_FLOAT = 0xCA,
44 RG32_FLOAT = 0xCB, 45 RG32_FLOAT = 0xCB,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index f2c83266e..6d522c318 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
51 MortonCopy<true, PixelFormat::R8UI>, 51 MortonCopy<true, PixelFormat::R8UI>,
52 MortonCopy<true, PixelFormat::RGBA16F>, 52 MortonCopy<true, PixelFormat::RGBA16F>,
53 MortonCopy<true, PixelFormat::RGBA16U>, 53 MortonCopy<true, PixelFormat::RGBA16U>,
54 MortonCopy<true, PixelFormat::RGBA16S>,
54 MortonCopy<true, PixelFormat::RGBA16UI>, 55 MortonCopy<true, PixelFormat::RGBA16UI>,
55 MortonCopy<true, PixelFormat::R11FG11FB10F>, 56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
56 MortonCopy<true, PixelFormat::RGBA32UI>, 57 MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
131 MortonCopy<false, PixelFormat::R8U>, 132 MortonCopy<false, PixelFormat::R8U>,
132 MortonCopy<false, PixelFormat::R8UI>, 133 MortonCopy<false, PixelFormat::R8UI>,
133 MortonCopy<false, PixelFormat::RGBA16F>, 134 MortonCopy<false, PixelFormat::RGBA16F>,
135 MortonCopy<false, PixelFormat::RGBA16S>,
134 MortonCopy<false, PixelFormat::RGBA16U>, 136 MortonCopy<false, PixelFormat::RGBA16U>,
135 MortonCopy<false, PixelFormat::RGBA16UI>, 137 MortonCopy<false, PixelFormat::RGBA16UI>,
136 MortonCopy<false, PixelFormat::R11FG11FB10F>, 138 MortonCopy<false, PixelFormat::R11FG11FB10F>,
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3adf7f0cb..849839fe3 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2009,16 +2009,19 @@ private:
2009 expr += GetSampler(meta->sampler); 2009 expr += GetSampler(meta->sampler);
2010 expr += ", "; 2010 expr += ", ";
2011 2011
2012 expr += constructors.at(operation.GetOperandsCount() - 1); 2012 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
2013 expr += '('; 2013 expr += '(';
2014 for (std::size_t i = 0; i < count; ++i) { 2014 for (std::size_t i = 0; i < count; ++i) {
2015 expr += VisitOperand(operation, i).AsInt(); 2015 if (i > 0) {
2016 const std::size_t next = i + 1;
2017 if (next == count)
2018 expr += ')';
2019 else if (next < count)
2020 expr += ", "; 2016 expr += ", ";
2017 }
2018 expr += VisitOperand(operation, i).AsInt();
2021 } 2019 }
2020 if (meta->array) {
2021 expr += ", ";
2022 expr += Visit(meta->array).AsInt();
2023 }
2024 expr += ')';
2022 2025
2023 if (meta->lod && !meta->sampler.IsBuffer()) { 2026 if (meta->lod && !meta->sampler.IsBuffer()) {
2024 expr += ", "; 2027 expr += ", ";
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 2d3838a7a..f424e3000 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI 53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U 55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
56 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
56 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI 57 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
57 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F 58 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
58 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI 59 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 12333e8c9..fca5e3ec0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,8 +5,11 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <cstring>
8#include <memory> 9#include <memory>
10
9#include <glad/glad.h> 11#include <glad/glad.h>
12
10#include "common/assert.h" 13#include "common/assert.h"
11#include "common/logging/log.h" 14#include "common/logging/log.h"
12#include "common/microprofile.h" 15#include "common/microprofile.h"
@@ -25,6 +28,8 @@
25 28
26namespace OpenGL { 29namespace OpenGL {
27 30
31namespace {
32
28// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have 33// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
29// to wait on available presentation frames. 34// to wait on available presentation frames.
30constexpr std::size_t SWAP_CHAIN_SIZE = 3; 35constexpr std::size_t SWAP_CHAIN_SIZE = 3;
@@ -41,124 +46,6 @@ struct Frame {
41 bool is_srgb{}; /// Framebuffer is sRGB or RGB 46 bool is_srgb{}; /// Framebuffer is sRGB or RGB
42}; 47};
43 48
44/**
45 * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
46 * but also make sure that rendering happens at the pace that the frontend dictates. This is a
47 * helper class that the renderer uses to sync frames between the render thread and the presentation
48 * thread
49 */
50class FrameMailbox {
51public:
52 std::mutex swap_chain_lock;
53 std::condition_variable present_cv;
54 std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
55 std::queue<Frame*> free_queue;
56 std::deque<Frame*> present_queue;
57 Frame* previous_frame{};
58
59 FrameMailbox() {
60 for (auto& frame : swap_chain) {
61 free_queue.push(&frame);
62 }
63 }
64
65 ~FrameMailbox() {
66 // lock the mutex and clear out the present and free_queues and notify any people who are
67 // blocked to prevent deadlock on shutdown
68 std::scoped_lock lock{swap_chain_lock};
69 std::queue<Frame*>().swap(free_queue);
70 present_queue.clear();
71 present_cv.notify_all();
72 }
73
74 void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
75 frame->present.Release();
76 frame->present.Create();
77 GLint previous_draw_fbo{};
78 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
79 glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
80 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
81 frame->color.handle);
82 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
83 LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
84 }
85 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
86 frame->color_reloaded = false;
87 }
88
89 void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
90 // Recreate the color texture attachment
91 frame->color.Release();
92 frame->color.Create();
93 const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
94 glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
95
96 // Recreate the FBO for the render target
97 frame->render.Release();
98 frame->render.Create();
99 glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
100 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
101 frame->color.handle);
102 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
103 LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
104 }
105
106 frame->width = width;
107 frame->height = height;
108 frame->color_reloaded = true;
109 }
110
111 Frame* GetRenderFrame() {
112 std::unique_lock lock{swap_chain_lock};
113
114 // If theres no free frames, we will reuse the oldest render frame
115 if (free_queue.empty()) {
116 auto frame = present_queue.back();
117 present_queue.pop_back();
118 return frame;
119 }
120
121 Frame* frame = free_queue.front();
122 free_queue.pop();
123 return frame;
124 }
125
126 void ReleaseRenderFrame(Frame* frame) {
127 std::unique_lock lock{swap_chain_lock};
128 present_queue.push_front(frame);
129 present_cv.notify_one();
130 }
131
132 Frame* TryGetPresentFrame(int timeout_ms) {
133 std::unique_lock lock{swap_chain_lock};
134 // wait for new entries in the present_queue
135 present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
136 [&] { return !present_queue.empty(); });
137 if (present_queue.empty()) {
138 // timed out waiting for a frame to draw so return the previous frame
139 return previous_frame;
140 }
141
142 // free the previous frame and add it back to the free queue
143 if (previous_frame) {
144 free_queue.push(previous_frame);
145 }
146
147 // the newest entries are pushed to the front of the queue
148 Frame* frame = present_queue.front();
149 present_queue.pop_front();
150 // remove all old entries from the present queue and move them back to the free_queue
151 for (auto f : present_queue) {
152 free_queue.push(f);
153 }
154 present_queue.clear();
155 previous_frame = frame;
156 return frame;
157 }
158};
159
160namespace {
161
162constexpr char VERTEX_SHADER[] = R"( 49constexpr char VERTEX_SHADER[] = R"(
163#version 430 core 50#version 430 core
164 51
@@ -211,6 +98,24 @@ struct ScreenRectVertex {
211 std::array<GLfloat, 2> tex_coord; 98 std::array<GLfloat, 2> tex_coord;
212}; 99};
213 100
101/// Returns true if any debug tool is attached
102bool HasDebugTool() {
103 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
104 if (nsight) {
105 return true;
106 }
107
108 GLint num_extensions;
109 glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
110 for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
111 const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
112 if (!std::strcmp(name, "GL_EXT_debug_tool")) {
113 return true;
114 }
115 }
116 return false;
117}
118
214/** 119/**
215 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left 120 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
216 * corner and (width, height) on the lower-bottom. 121 * corner and (width, height) on the lower-bottom.
@@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
294 199
295} // Anonymous namespace 200} // Anonymous namespace
296 201
202/**
203 * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
204 * but also make sure that rendering happens at the pace that the frontend dictates. This is a
205 * helper class that the renderer uses to sync frames between the render thread and the presentation
206 * thread
207 */
208class FrameMailbox {
209public:
210 std::mutex swap_chain_lock;
211 std::condition_variable present_cv;
212 std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
213 std::queue<Frame*> free_queue;
214 std::deque<Frame*> present_queue;
215 Frame* previous_frame{};
216
217 FrameMailbox() : has_debug_tool{HasDebugTool()} {
218 for (auto& frame : swap_chain) {
219 free_queue.push(&frame);
220 }
221 }
222
223 ~FrameMailbox() {
224 // lock the mutex and clear out the present and free_queues and notify any people who are
225 // blocked to prevent deadlock on shutdown
226 std::scoped_lock lock{swap_chain_lock};
227 std::queue<Frame*>().swap(free_queue);
228 present_queue.clear();
229 present_cv.notify_all();
230 }
231
232 void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
233 frame->present.Release();
234 frame->present.Create();
235 GLint previous_draw_fbo{};
236 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
237 glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
238 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
239 frame->color.handle);
240 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
241 LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
242 }
243 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
244 frame->color_reloaded = false;
245 }
246
247 void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
248 // Recreate the color texture attachment
249 frame->color.Release();
250 frame->color.Create();
251 const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
252 glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
253
254 // Recreate the FBO for the render target
255 frame->render.Release();
256 frame->render.Create();
257 glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
258 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
259 frame->color.handle);
260 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
261 LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
262 }
263
264 frame->width = width;
265 frame->height = height;
266 frame->color_reloaded = true;
267 }
268
269 Frame* GetRenderFrame() {
270 std::unique_lock lock{swap_chain_lock};
271
272 // If theres no free frames, we will reuse the oldest render frame
273 if (free_queue.empty()) {
274 auto frame = present_queue.back();
275 present_queue.pop_back();
276 return frame;
277 }
278
279 Frame* frame = free_queue.front();
280 free_queue.pop();
281 return frame;
282 }
283
284 void ReleaseRenderFrame(Frame* frame) {
285 std::unique_lock lock{swap_chain_lock};
286 present_queue.push_front(frame);
287 present_cv.notify_one();
288
289 DebugNotifyNextFrame();
290 }
291
292 Frame* TryGetPresentFrame(int timeout_ms) {
293 DebugWaitForNextFrame();
294
295 std::unique_lock lock{swap_chain_lock};
296 // wait for new entries in the present_queue
297 present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
298 [&] { return !present_queue.empty(); });
299 if (present_queue.empty()) {
300 // timed out waiting for a frame to draw so return the previous frame
301 return previous_frame;
302 }
303
304 // free the previous frame and add it back to the free queue
305 if (previous_frame) {
306 free_queue.push(previous_frame);
307 }
308
309 // the newest entries are pushed to the front of the queue
310 Frame* frame = present_queue.front();
311 present_queue.pop_front();
312 // remove all old entries from the present queue and move them back to the free_queue
313 for (auto f : present_queue) {
314 free_queue.push(f);
315 }
316 present_queue.clear();
317 previous_frame = frame;
318 return frame;
319 }
320
321private:
322 std::mutex debug_synch_mutex;
323 std::condition_variable debug_synch_condition;
324 std::atomic_int frame_for_debug{};
325 const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
326
327 /// Signal that a new frame is available (called from GPU thread)
328 void DebugNotifyNextFrame() {
329 if (!has_debug_tool) {
330 return;
331 }
332 frame_for_debug++;
333 std::lock_guard lock{debug_synch_mutex};
334 debug_synch_condition.notify_one();
335 }
336
337 /// Wait for a new frame to be available (called from presentation thread)
338 void DebugWaitForNextFrame() {
339 if (!has_debug_tool) {
340 return;
341 }
342 const int last_frame = frame_for_debug;
343 std::unique_lock lock{debug_synch_mutex};
344 debug_synch_condition.wait(lock,
345 [this, last_frame] { return frame_for_debug > last_frame; });
346 }
347};
348
297RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) 349RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
298 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, 350 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
299 frame_mailbox{std::make_unique<FrameMailbox>()} {} 351 frame_mailbox{std::make_unique<FrameMailbox>()} {}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 72880d7ea..0e2e5e6c7 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -125,6 +125,7 @@ struct FormatTuple {
125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI 125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI
126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F 126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U 127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
128 {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
128 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI 129 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
129 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F 130 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
130 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI 131 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -331,6 +332,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
331 return vk::Format::eR16G16B16Unorm; 332 return vk::Format::eR16G16B16Unorm;
332 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 333 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
333 return vk::Format::eR16G16B16A16Unorm; 334 return vk::Format::eR16G16B16A16Unorm;
335 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
336 return vk::Format::eA2B10G10R10UnormPack32;
334 default: 337 default:
335 break; 338 break;
336 } 339 }
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 3847bd722..28d2fbc4f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -535,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
535 vk::Format::eR32G32Sfloat, 535 vk::Format::eR32G32Sfloat,
536 vk::Format::eR32G32Uint, 536 vk::Format::eR32G32Uint,
537 vk::Format::eR16G16B16A16Uint, 537 vk::Format::eR16G16B16A16Uint,
538 vk::Format::eR16G16B16A16Snorm,
538 vk::Format::eR16G16B16A16Unorm, 539 vk::Format::eR16G16B16A16Unorm,
539 vk::Format::eR16G16Unorm, 540 vk::Format::eR16G16Unorm,
540 vk::Format::eR16G16Snorm, 541 vk::Format::eR16G16Snorm,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f889019c1..c9886cc16 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -1151,7 +1151,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1151 // This implementation assumes that all attributes are used in the shader. 1151 // This implementation assumes that all attributes are used in the shader.
1152 const GPUVAddr start{regs.vertex_array[index].StartAddress()}; 1152 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1153 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 1153 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1154 DEBUG_ASSERT(end > start); 1154 DEBUG_ASSERT(end >= start);
1155 1155
1156 size += (end - start + 1) * regs.vertex_array[index].enable; 1156 size += (end - start + 1) * regs.vertex_array[index].enable;
1157 } 1157 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b2c298051..51ecb5567 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -837,7 +837,7 @@ private:
837 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); 837 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
838 } 838 }
839 839
840 element += static_cast<u8>(num_components); 840 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
841 } 841 }
842 } 842 }
843 843
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF(instr.bfe.negate_b);
21
22 Node op_a = GetRegister(instr.gpr8); 20 Node op_a = GetRegister(instr.gpr8);
23 op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); 21 Node op_b = [&] {
24 22 switch (opcode->get().GetId()) {
25 switch (opcode->get().GetId()) { 23 case OpCode::Id::BFE_R:
26 case OpCode::Id::BFE_IMM: { 24 return GetRegister(instr.gpr20);
27 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 25 case OpCode::Id::BFE_C:
28 "Condition codes generation in BFE is not implemented"); 26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
29 34
30 const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); 35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
31 const Node outer_shift_imm =
32 Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
33 36
34 const Node inner_shift = 37 const bool is_signed = instr.bfe.is_signed;
35 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
36 const Node outer_shift =
37 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
38 38
39 SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); 39 // using reverse parallel method in
40 SetRegister(bb, instr.gpr0, outer_shift); 40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 break; 41 // note for later if possible to implement faster method.
42 } 42 if (instr.bfe.brev) {
43 default: 43 const auto swap = [&](u32 s, u32 mask) {
44 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); 44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
45 } 65 }
46 66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
47 return pc; 74 return pc;
48} 75}
49 76
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
68 return OperationCode::UBitwiseXor; 68 return OperationCode::UBitwiseXor;
69 case OperationCode::IBitwiseNot: 69 case OperationCode::IBitwiseNot:
70 return OperationCode::UBitwiseNot; 70 return OperationCode::UBitwiseNot;
71 case OperationCode::IBitfieldExtract:
72 return OperationCode::UBitfieldExtract;
71 case OperationCode::IBitfieldInsert: 73 case OperationCode::IBitfieldInsert:
72 return OperationCode::UBitfieldInsert; 74 return OperationCode::UBitfieldInsert;
73 case OperationCode::IBitCount: 75 case OperationCode::IBitCount:
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 9707c353d..cc7181229 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
111 return PixelFormat::RGBA16F; 111 return PixelFormat::RGBA16F;
112 case Tegra::RenderTargetFormat::RGBA16_UNORM: 112 case Tegra::RenderTargetFormat::RGBA16_UNORM:
113 return PixelFormat::RGBA16U; 113 return PixelFormat::RGBA16U;
114 case Tegra::RenderTargetFormat::RGBA16_SNORM:
115 return PixelFormat::RGBA16S;
114 case Tegra::RenderTargetFormat::RGBA16_UINT: 116 case Tegra::RenderTargetFormat::RGBA16_UINT:
115 return PixelFormat::RGBA16UI; 117 return PixelFormat::RGBA16UI;
116 case Tegra::RenderTargetFormat::RGBA32_FLOAT: 118 case Tegra::RenderTargetFormat::RGBA32_FLOAT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index d88109e5a..ae8817465 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,82 +25,83 @@ enum class PixelFormat {
25 R8UI = 7, 25 R8UI = 7,
26 RGBA16F = 8, 26 RGBA16F = 8,
27 RGBA16U = 9, 27 RGBA16U = 9,
28 RGBA16UI = 10, 28 RGBA16S = 10,
29 R11FG11FB10F = 11, 29 RGBA16UI = 11,
30 RGBA32UI = 12, 30 R11FG11FB10F = 12,
31 DXT1 = 13, 31 RGBA32UI = 13,
32 DXT23 = 14, 32 DXT1 = 14,
33 DXT45 = 15, 33 DXT23 = 15,
34 DXN1 = 16, // This is also known as BC4 34 DXT45 = 16,
35 DXN2UNORM = 17, 35 DXN1 = 17, // This is also known as BC4
36 DXN2SNORM = 18, 36 DXN2UNORM = 18,
37 BC7U = 19, 37 DXN2SNORM = 19,
38 BC6H_UF16 = 20, 38 BC7U = 20,
39 BC6H_SF16 = 21, 39 BC6H_UF16 = 21,
40 ASTC_2D_4X4 = 22, 40 BC6H_SF16 = 22,
41 BGRA8 = 23, 41 ASTC_2D_4X4 = 23,
42 RGBA32F = 24, 42 BGRA8 = 24,
43 RG32F = 25, 43 RGBA32F = 25,
44 R32F = 26, 44 RG32F = 26,
45 R16F = 27, 45 R32F = 27,
46 R16U = 28, 46 R16F = 28,
47 R16S = 29, 47 R16U = 29,
48 R16UI = 30, 48 R16S = 30,
49 R16I = 31, 49 R16UI = 31,
50 RG16 = 32, 50 R16I = 32,
51 RG16F = 33, 51 RG16 = 33,
52 RG16UI = 34, 52 RG16F = 34,
53 RG16I = 35, 53 RG16UI = 35,
54 RG16S = 36, 54 RG16I = 36,
55 RGB32F = 37, 55 RG16S = 37,
56 RGBA8_SRGB = 38, 56 RGB32F = 38,
57 RG8U = 39, 57 RGBA8_SRGB = 39,
58 RG8S = 40, 58 RG8U = 40,
59 RG32UI = 41, 59 RG8S = 41,
60 RGBX16F = 42, 60 RG32UI = 42,
61 R32UI = 43, 61 RGBX16F = 43,
62 R32I = 44, 62 R32UI = 44,
63 ASTC_2D_8X8 = 45, 63 R32I = 45,
64 ASTC_2D_8X5 = 46, 64 ASTC_2D_8X8 = 46,
65 ASTC_2D_5X4 = 47, 65 ASTC_2D_8X5 = 47,
66 BGRA8_SRGB = 48, 66 ASTC_2D_5X4 = 48,
67 DXT1_SRGB = 49, 67 BGRA8_SRGB = 49,
68 DXT23_SRGB = 50, 68 DXT1_SRGB = 50,
69 DXT45_SRGB = 51, 69 DXT23_SRGB = 51,
70 BC7U_SRGB = 52, 70 DXT45_SRGB = 52,
71 R4G4B4A4U = 53, 71 BC7U_SRGB = 53,
72 ASTC_2D_4X4_SRGB = 54, 72 R4G4B4A4U = 54,
73 ASTC_2D_8X8_SRGB = 55, 73 ASTC_2D_4X4_SRGB = 55,
74 ASTC_2D_8X5_SRGB = 56, 74 ASTC_2D_8X8_SRGB = 56,
75 ASTC_2D_5X4_SRGB = 57, 75 ASTC_2D_8X5_SRGB = 57,
76 ASTC_2D_5X5 = 58, 76 ASTC_2D_5X4_SRGB = 58,
77 ASTC_2D_5X5_SRGB = 59, 77 ASTC_2D_5X5 = 59,
78 ASTC_2D_10X8 = 60, 78 ASTC_2D_5X5_SRGB = 60,
79 ASTC_2D_10X8_SRGB = 61, 79 ASTC_2D_10X8 = 61,
80 ASTC_2D_6X6 = 62, 80 ASTC_2D_10X8_SRGB = 62,
81 ASTC_2D_6X6_SRGB = 63, 81 ASTC_2D_6X6 = 63,
82 ASTC_2D_10X10 = 64, 82 ASTC_2D_6X6_SRGB = 64,
83 ASTC_2D_10X10_SRGB = 65, 83 ASTC_2D_10X10 = 65,
84 ASTC_2D_12X12 = 66, 84 ASTC_2D_10X10_SRGB = 66,
85 ASTC_2D_12X12_SRGB = 67, 85 ASTC_2D_12X12 = 67,
86 ASTC_2D_8X6 = 68, 86 ASTC_2D_12X12_SRGB = 68,
87 ASTC_2D_8X6_SRGB = 69, 87 ASTC_2D_8X6 = 69,
88 ASTC_2D_6X5 = 70, 88 ASTC_2D_8X6_SRGB = 70,
89 ASTC_2D_6X5_SRGB = 71, 89 ASTC_2D_6X5 = 71,
90 E5B9G9R9F = 72, 90 ASTC_2D_6X5_SRGB = 72,
91 E5B9G9R9F = 73,
91 92
92 MaxColorFormat, 93 MaxColorFormat,
93 94
94 // Depth formats 95 // Depth formats
95 Z32F = 73, 96 Z32F = 74,
96 Z16 = 74, 97 Z16 = 75,
97 98
98 MaxDepthFormat, 99 MaxDepthFormat,
99 100
100 // DepthStencil formats 101 // DepthStencil formats
101 Z24S8 = 75, 102 Z24S8 = 76,
102 S8Z24 = 76, 103 S8Z24 = 77,
103 Z32FS8 = 77, 104 Z32FS8 = 78,
104 105
105 MaxDepthStencilFormat, 106 MaxDepthStencilFormat,
106 107
@@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
138 0, // R8UI 139 0, // R8UI
139 0, // RGBA16F 140 0, // RGBA16F
140 0, // RGBA16U 141 0, // RGBA16U
142 0, // RGBA16S
141 0, // RGBA16UI 143 0, // RGBA16UI
142 0, // R11FG11FB10F 144 0, // R11FG11FB10F
143 0, // RGBA32UI 145 0, // RGBA32UI
@@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
235 1, // R8UI 237 1, // R8UI
236 1, // RGBA16F 238 1, // RGBA16F
237 1, // RGBA16U 239 1, // RGBA16U
240 1, // RGBA16S
238 1, // RGBA16UI 241 1, // RGBA16UI
239 1, // R11FG11FB10F 242 1, // R11FG11FB10F
240 1, // RGBA32UI 243 1, // RGBA32UI
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
324 1, // R8UI 327 1, // R8UI
325 1, // RGBA16F 328 1, // RGBA16F
326 1, // RGBA16U 329 1, // RGBA16U
330 1, // RGBA16S
327 1, // RGBA16UI 331 1, // RGBA16UI
328 1, // R11FG11FB10F 332 1, // R11FG11FB10F
329 1, // RGBA32UI 333 1, // RGBA32UI
@@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
413 8, // R8UI 417 8, // R8UI
414 64, // RGBA16F 418 64, // RGBA16F
415 64, // RGBA16U 419 64, // RGBA16U
420 64, // RGBA16S
416 64, // RGBA16UI 421 64, // RGBA16UI
417 32, // R11FG11FB10F 422 32, // R11FG11FB10F
418 128, // RGBA32UI 423 128, // RGBA32UI
@@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
517 SurfaceCompression::None, // R8UI 522 SurfaceCompression::None, // R8UI
518 SurfaceCompression::None, // RGBA16F 523 SurfaceCompression::None, // RGBA16F
519 SurfaceCompression::None, // RGBA16U 524 SurfaceCompression::None, // RGBA16U
525 SurfaceCompression::None, // RGBA16S
520 SurfaceCompression::None, // RGBA16UI 526 SurfaceCompression::None, // RGBA16UI
521 SurfaceCompression::None, // R11FG11FB10F 527 SurfaceCompression::None, // R11FG11FB10F
522 SurfaceCompression::None, // RGBA32UI 528 SurfaceCompression::None, // RGBA32UI
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index cc3ad8417..e151c26c4 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 75> DefinitionTable = {{ 44constexpr std::array<Table, 76> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{
61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, 61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, 62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
63 63
64 {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
64 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, 65 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
65 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, 66 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
66 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, 67 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},