summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/stream.cpp47
-rw-r--r--src/common/stream.h50
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp100
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h71
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp234
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h168
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp90
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h88
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h1
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp4
-rw-r--r--src/core/settings.cpp2
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/video_core/CMakeLists.txt26
-rw-r--r--src/video_core/cdma_pusher.cpp171
-rw-r--r--src/video_core/cdma_pusher.h138
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp114
-rw-r--r--src/video_core/command_classes/codecs/codec.h68
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp276
-rw-r--r--src/video_core/command_classes/codecs/h264.h130
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp1010
-rw-r--r--src/video_core/command_classes/codecs/vp9.h216
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h369
-rw-r--r--src/video_core/command_classes/host1x.cpp39
-rw-r--r--src/video_core/command_classes/host1x.h78
-rw-r--r--src/video_core/command_classes/nvdec.cpp56
-rw-r--r--src/video_core/command_classes/nvdec.h39
-rw-r--r--src/video_core/command_classes/nvdec_common.h48
-rw-r--r--src/video_core/command_classes/sync_manager.cpp60
-rw-r--r--src/video_core/command_classes/sync_manager.h64
-rw-r--r--src/video_core/command_classes/vic.cpp180
-rw-r--r--src/video_core/command_classes/vic.h110
-rw-r--r--src/video_core/gpu.cpp11
-rw-r--r--src/video_core/gpu.h23
-rw-r--r--src/video_core/gpu_asynch.cpp26
-rw-r--r--src/video_core/gpu_asynch.h3
-rw-r--r--src/video_core/gpu_synch.cpp18
-rw-r--r--src/video_core/gpu_synch.h3
-rw-r--r--src/video_core/gpu_thread.cpp16
-rw-r--r--src/video_core/gpu_thread.h19
-rw-r--r--src/video_core/memory_manager.cpp12
-rw-r--r--src/video_core/memory_manager.h5
-rw-r--r--src/video_core/video_core.cpp5
-rw-r--r--src/yuzu/CMakeLists.txt2
-rw-r--r--src/yuzu/configuration/config.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp10
-rw-r--r--src/yuzu/configuration/configure_graphics.h1
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
50 files changed, 3909 insertions, 310 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 0fb5d9708..e50ab2922 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -150,6 +150,8 @@ add_library(common STATIC
150 scope_exit.h 150 scope_exit.h
151 spin_lock.cpp 151 spin_lock.cpp
152 spin_lock.h 152 spin_lock.h
153 stream.cpp
154 stream.h
153 string_util.cpp 155 string_util.cpp
154 string_util.h 156 string_util.h
155 swap.h 157 swap.h
diff --git a/src/common/stream.cpp b/src/common/stream.cpp
new file mode 100644
index 000000000..bf0496c26
--- /dev/null
+++ b/src/common/stream.cpp
@@ -0,0 +1,47 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <stdexcept>
6#include "common/common_types.h"
7#include "common/stream.h"
8
9namespace Common {
10
11Stream::Stream() = default;
12Stream::~Stream() = default;
13
14void Stream::Seek(s32 offset, SeekOrigin origin) {
15 if (origin == SeekOrigin::SetOrigin) {
16 if (offset < 0) {
17 position = 0;
18 } else if (position >= buffer.size()) {
19 position = buffer.size();
20 } else {
21 position = offset;
22 }
23 } else if (origin == SeekOrigin::FromCurrentPos) {
24 Seek(static_cast<s32>(position) + offset, SeekOrigin::SetOrigin);
25 } else if (origin == SeekOrigin::FromEnd) {
26 Seek(static_cast<s32>(buffer.size()) - offset, SeekOrigin::SetOrigin);
27 }
28}
29
30u8 Stream::ReadByte() {
31 if (position < buffer.size()) {
32 return buffer[position++];
33 } else {
34 throw std::out_of_range("Attempting to read a byte not within the buffer range");
35 }
36}
37
38void Stream::WriteByte(u8 byte) {
39 if (position == buffer.size()) {
40 buffer.push_back(byte);
41 position++;
42 } else {
43 buffer.insert(buffer.begin() + position, byte);
44 }
45}
46
47} // namespace Common
diff --git a/src/common/stream.h b/src/common/stream.h
new file mode 100644
index 000000000..2585c16af
--- /dev/null
+++ b/src/common/stream.h
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_types.h"
9
10namespace Common {
11
12enum class SeekOrigin {
13 SetOrigin,
14 FromCurrentPos,
15 FromEnd,
16};
17
18class Stream {
19public:
20 /// Stream creates a bitstream and provides common functionality on the stream.
21 explicit Stream();
22 ~Stream();
23
24 /// Reposition bitstream "cursor" to the specified offset from origin
25 void Seek(s32 offset, SeekOrigin origin);
26
27 /// Reads next byte in the stream buffer and increments position
28 u8 ReadByte();
29
30 /// Writes byte at current position
31 void WriteByte(u8 byte);
32
33 std::size_t GetPosition() const {
34 return position;
35 }
36
37 std::vector<u8>& GetBuffer() {
38 return buffer;
39 }
40
41 const std::vector<u8>& GetBuffer() const {
42 return buffer;
43 }
44
45private:
46 std::vector<u8> buffer;
47 std::size_t position{0};
48};
49
50} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index db1c9fdef..e0f207f3e 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -439,6 +439,8 @@ add_library(core STATIC
439 hle/service/nvdrv/devices/nvhost_gpu.h 439 hle/service/nvdrv/devices/nvhost_gpu.h
440 hle/service/nvdrv/devices/nvhost_nvdec.cpp 440 hle/service/nvdrv/devices/nvhost_nvdec.cpp
441 hle/service/nvdrv/devices/nvhost_nvdec.h 441 hle/service/nvdrv/devices/nvhost_nvdec.h
442 hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
443 hle/service/nvdrv/devices/nvhost_nvdec_common.h
442 hle/service/nvdrv/devices/nvhost_nvjpg.cpp 444 hle/service/nvdrv/devices/nvhost_nvjpg.cpp
443 hle/service/nvdrv/devices/nvhost_nvjpg.h 445 hle/service/nvdrv/devices/nvhost_nvjpg.h
444 hle/service/nvdrv/devices/nvhost_vic.cpp 446 hle/service/nvdrv/devices/nvhost_vic.cpp
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index fcb612864..b6df48360 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -2,15 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6
7#include "common/assert.h" 5#include "common/assert.h"
8#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h"
9#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" 8#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
9#include "video_core/memory_manager.h"
10#include "video_core/renderer_base.h"
10 11
11namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
12 13
13nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {} 14nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
15 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
14nvhost_nvdec::~nvhost_nvdec() = default; 16nvhost_nvdec::~nvhost_nvdec() = default;
15 17
16u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 18u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -21,7 +23,7 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::
21 23
22 switch (static_cast<IoctlCommand>(command.raw)) { 24 switch (static_cast<IoctlCommand>(command.raw)) {
23 case IoctlCommand::IocSetNVMAPfdCommand: 25 case IoctlCommand::IocSetNVMAPfdCommand:
24 return SetNVMAPfd(input, output); 26 return SetNVMAPfd(input);
25 case IoctlCommand::IocSubmit: 27 case IoctlCommand::IocSubmit:
26 return Submit(input, output); 28 return Submit(input, output);
27 case IoctlCommand::IocGetSyncpoint: 29 case IoctlCommand::IocGetSyncpoint:
@@ -29,79 +31,29 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::
29 case IoctlCommand::IocGetWaitbase: 31 case IoctlCommand::IocGetWaitbase:
30 return GetWaitbase(input, output); 32 return GetWaitbase(input, output);
31 case IoctlCommand::IocMapBuffer: 33 case IoctlCommand::IocMapBuffer:
32 return MapBuffer(input, output); 34 case IoctlCommand::IocMapBuffer2:
35 case IoctlCommand::IocMapBuffer3:
33 case IoctlCommand::IocMapBufferEx: 36 case IoctlCommand::IocMapBufferEx:
34 return MapBufferEx(input, output); 37 return MapBuffer(input, output);
35 case IoctlCommand::IocUnmapBufferEx: 38 case IoctlCommand::IocUnmapBufferEx: {
36 return UnmapBufferEx(input, output); 39 // This command is sent when the video stream has ended, flush all video contexts
40 // This is usually sent in the folowing order: vic, nvdec, vic.
41 // Inform the GPU to clear any remaining nvdec buffers when this is detected.
42 LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
43 Tegra::ChCommandHeaderList cmdlist(1);
44 cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F};
45 system.GPU().PushCommandBuffer(cmdlist);
46 [[fallthrough]]; // fallthrough to unmap buffers
47 };
48 case IoctlCommand::IocUnmapBuffer:
49 case IoctlCommand::IocUnmapBuffer2:
50 case IoctlCommand::IocUnmapBuffer3:
51 return UnmapBuffer(input, output);
52 case IoctlCommand::IocSetSubmitTimeout:
53 return SetSubmitTimeout(input, output);
37 } 54 }
38 55
39 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 56 UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw);
40 return 0;
41}
42
43u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
44 IoctlSetNvmapFD params{};
45 std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
46 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
47
48 nvmap_fd = params.nvmap_fd;
49 return 0;
50}
51
52u32 nvhost_nvdec::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
53 IoctlSubmit params{};
54 std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
55 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
56 std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
57 return 0;
58}
59
60u32 nvhost_nvdec::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
61 IoctlGetSyncpoint params{};
62 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
63 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
64 params.value = 0; // Seems to be hard coded at 0
65 std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
66 return 0;
67}
68
69u32 nvhost_nvdec::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
70 IoctlGetWaitbase params{};
71 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
72 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
73 params.value = 0; // Seems to be hard coded at 0
74 std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
75 return 0;
76}
77
78u32 nvhost_nvdec::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
79 IoctlMapBuffer params{};
80 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
81 LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
82 params.address_1);
83 params.address_1 = 0;
84 params.address_2 = 0;
85 std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
86 return 0;
87}
88
89u32 nvhost_nvdec::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
90 IoctlMapBufferEx params{};
91 std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
92 LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
93 params.address_1);
94 params.address_1 = 0;
95 params.address_2 = 0;
96 std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
97 return 0;
98}
99
100u32 nvhost_nvdec::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
101 IoctlUnmapBufferEx params{};
102 std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
103 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
104 std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
105 return 0; 57 return 0;
106} 58}
107 59
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 4332db118..102777ddd 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -4,16 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector> 7#include <memory>
8#include "common/common_types.h" 8#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
9#include "common/swap.h"
10#include "core/hle/service/nvdrv/devices/nvdevice.h"
11 9
12namespace Service::Nvidia::Devices { 10namespace Service::Nvidia::Devices {
13 11
14class nvhost_nvdec final : public nvdevice { 12class nvhost_nvdec final : public nvhost_nvdec_common {
15public: 13public:
16 explicit nvhost_nvdec(Core::System& system); 14 explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
17 ~nvhost_nvdec() override; 15 ~nvhost_nvdec() override;
18 16
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 17 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -27,62 +25,15 @@ private:
27 IocGetSyncpoint = 0xC0080002, 25 IocGetSyncpoint = 0xC0080002,
28 IocGetWaitbase = 0xC0080003, 26 IocGetWaitbase = 0xC0080003,
29 IocMapBuffer = 0xC01C0009, 27 IocMapBuffer = 0xC01C0009,
28 IocMapBuffer2 = 0xC16C0009,
29 IocMapBuffer3 = 0xC15C0009,
30 IocMapBufferEx = 0xC0A40009, 30 IocMapBufferEx = 0xC0A40009,
31 IocUnmapBufferEx = 0xC0A4000A, 31 IocUnmapBuffer = 0xC0A4000A,
32 IocUnmapBuffer2 = 0xC16C000A,
33 IocUnmapBufferEx = 0xC01C000A,
34 IocUnmapBuffer3 = 0xC15C000A,
35 IocSetSubmitTimeout = 0x40040007,
32 }; 36 };
33
34 struct IoctlSetNvmapFD {
35 u32_le nvmap_fd;
36 };
37 static_assert(sizeof(IoctlSetNvmapFD) == 0x4, "IoctlSetNvmapFD is incorrect size");
38
39 struct IoctlSubmit {
40 INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure
41 };
42 static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit has incorrect size");
43
44 struct IoctlGetSyncpoint {
45 u32 unknown; // seems to be ignored? Nintendo added this
46 u32 value;
47 };
48 static_assert(sizeof(IoctlGetSyncpoint) == 0x08, "IoctlGetSyncpoint has incorrect size");
49
50 struct IoctlGetWaitbase {
51 u32 unknown; // seems to be ignored? Nintendo added this
52 u32 value;
53 };
54 static_assert(sizeof(IoctlGetWaitbase) == 0x08, "IoctlGetWaitbase has incorrect size");
55
56 struct IoctlMapBuffer {
57 u32 unknown;
58 u32 address_1;
59 u32 address_2;
60 INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
61 };
62 static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
63
64 struct IoctlMapBufferEx {
65 u32 unknown;
66 u32 address_1;
67 u32 address_2;
68 INSERT_PADDING_BYTES(0x98); // TODO(DarkLordZach): RE this structure
69 };
70 static_assert(sizeof(IoctlMapBufferEx) == 0xA4, "IoctlMapBufferEx has incorrect size");
71
72 struct IoctlUnmapBufferEx {
73 INSERT_PADDING_BYTES(0xA4); // TODO(DarkLordZach): RE this structure
74 };
75 static_assert(sizeof(IoctlUnmapBufferEx) == 0xA4, "IoctlUnmapBufferEx has incorrect size");
76
77 u32_le nvmap_fd{};
78
79 u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
80 u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
81 u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
82 u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
83 u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
84 u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
85 u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
86}; 37};
87 38
88} // namespace Service::Nvidia::Devices 39} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
new file mode 100644
index 000000000..85792495f
--- /dev/null
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -0,0 +1,234 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "common/logging/log.h"
11#include "core/core.h"
12#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
13#include "core/hle/service/nvdrv/devices/nvmap.h"
14#include "core/memory.h"
15#include "video_core/memory_manager.h"
16#include "video_core/renderer_base.h"
17
18namespace Service::Nvidia::Devices {
19
20namespace {
21// Splice vectors will copy count amount of type T from the input vector into the dst vector.
22template <typename T>
23std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
24 std::size_t offset) {
25 std::memcpy(dst.data(), input.data() + offset, count * sizeof(T));
26 offset += count * sizeof(T);
27 return offset;
28}
29
30// Write vectors will write data to the output buffer
31template <typename T>
32std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
33 std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T));
34 offset += src.size() * sizeof(T);
35 return offset;
36}
37} // Anonymous namespace
38
39namespace NvErrCodes {
40constexpr u32 Success{};
41constexpr u32 OutOfMemory{static_cast<u32>(-12)};
42constexpr u32 InvalidInput{static_cast<u32>(-22)};
43} // namespace NvErrCodes
44
45nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
46 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
47nvhost_nvdec_common::~nvhost_nvdec_common() = default;
48
49u32 nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
50 IoctlSetNvmapFD params{};
51 std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
52 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
53
54 nvmap_fd = params.nvmap_fd;
55 return 0;
56}
57
58u32 nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
59 IoctlSubmit params{};
60 std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
61 LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
62
63 // Instantiate param buffers
64 std::size_t offset = sizeof(IoctlSubmit);
65 std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count);
66 std::vector<Reloc> relocs(params.relocation_count);
67 std::vector<u32> reloc_shifts(params.relocation_count);
68 std::vector<SyncptIncr> syncpt_increments(params.syncpoint_count);
69 std::vector<SyncptIncr> wait_checks(params.syncpoint_count);
70 std::vector<Fence> fences(params.fence_count);
71
72 // Splice input into their respective buffers
73 offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
74 offset = SpliceVectors(input, relocs, params.relocation_count, offset);
75 offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset);
76 offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
77 offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
78 offset = SpliceVectors(input, fences, params.fence_count, offset);
79
80 // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment
81
82 auto& gpu = system.GPU();
83
84 for (const auto& cmd_buffer : command_buffers) {
85 auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
86 ASSERT_OR_EXECUTE(object, return NvErrCodes::InvalidInput;);
87 const auto map = FindBufferMap(object->dma_map_addr);
88 if (!map) {
89 LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}",
90 object->addr, object->dma_map_addr);
91 return 0;
92 }
93 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
94 gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(),
95 cmdlist.size() * sizeof(u32));
96 gpu.PushCommandBuffer(cmdlist);
97 }
98
99 std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
100 // Some games expect command_buffers to be written back
101 offset = sizeof(IoctlSubmit);
102 offset = WriteVectors(output, command_buffers, offset);
103 offset = WriteVectors(output, relocs, offset);
104 offset = WriteVectors(output, reloc_shifts, offset);
105 offset = WriteVectors(output, syncpt_increments, offset);
106 offset = WriteVectors(output, wait_checks, offset);
107
108 return NvErrCodes::Success;
109}
110
111u32 nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
112 IoctlGetSyncpoint params{};
113 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
114 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
115
116 // We found that implementing this causes deadlocks with async gpu, along with degraded
117 // performance. TODO: RE the nvdec async implementation
118 params.value = 0;
119 std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
120
121 return NvErrCodes::Success;
122}
123
124u32 nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
125 IoctlGetWaitbase params{};
126 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
127 params.value = 0; // Seems to be hard coded at 0
128 std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
129 return 0;
130}
131
132u32 nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
133 IoctlMapBuffer params{};
134 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
135 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
136
137 SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
138
139 auto& gpu = system.GPU();
140
141 for (auto& cmf_buff : cmd_buffer_handles) {
142 auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
143 if (!object) {
144 LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
145 std::memcpy(output.data(), &params, output.size());
146 return NvErrCodes::InvalidInput;
147 }
148 if (object->dma_map_addr == 0) {
149 // NVDEC and VIC memory is in the 32-bit address space
150 // MapAllocate32 will attempt to map a lower 32-bit value in the shared gpu memory space
151 const GPUVAddr low_addr = gpu.MemoryManager().MapAllocate32(object->addr, object->size);
152 object->dma_map_addr = static_cast<u32>(low_addr);
153 // Ensure that the dma_map_addr is indeed in the lower 32-bit address space.
154 ASSERT(object->dma_map_addr == low_addr);
155 }
156 if (!object->dma_map_addr) {
157 LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
158 } else {
159 cmf_buff.map_address = object->dma_map_addr;
160 AddBufferMap(object->dma_map_addr, object->size, object->addr,
161 object->status == nvmap::Object::Status::Allocated);
162 }
163 }
164 std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
165 std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(),
166 cmd_buffer_handles.size() * sizeof(MapBufferEntry));
167
168 return NvErrCodes::Success;
169}
170
171u32 nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
172 IoctlMapBuffer params{};
173 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
174 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
175 SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
176
177 auto& gpu = system.GPU();
178
179 for (auto& cmf_buff : cmd_buffer_handles) {
180 const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
181 if (!object) {
182 LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
183 std::memcpy(output.data(), &params, output.size());
184 return NvErrCodes::InvalidInput;
185 }
186 if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) {
187 gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
188 } else {
189 // This occurs quite frequently, however does not seem to impact functionality
190 LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr,
191 object->dma_map_addr);
192 }
193 object->dma_map_addr = 0;
194 }
195 std::memset(output.data(), 0, output.size());
196 return NvErrCodes::Success;
197}
198
199u32 nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output) {
200 std::memcpy(&submit_timeout, input.data(), input.size());
201 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
202 return NvErrCodes::Success;
203}
204
205std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap(
206 GPUVAddr gpu_addr) const {
207 const auto it = std::find_if(
208 buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) {
209 return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr());
210 });
211
212 ASSERT(it != buffer_mappings.end());
213 return it->second;
214}
215
216void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
217 bool is_allocated) {
218 buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated});
219}
220
221std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) {
222 const auto iter{buffer_mappings.find(gpu_addr)};
223 if (iter == buffer_mappings.end()) {
224 return std::nullopt;
225 }
226 std::size_t size = 0;
227 if (iter->second.IsAllocated()) {
228 size = iter->second.Size();
229 }
230 buffer_mappings.erase(iter);
231 return size;
232}
233
234} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
new file mode 100644
index 000000000..c249c5349
--- /dev/null
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -0,0 +1,168 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <vector>
9#include "common/common_types.h"
10#include "common/swap.h"
11#include "core/hle/service/nvdrv/devices/nvdevice.h"
12
13namespace Service::Nvidia::Devices {
14class nvmap;
15
16class nvhost_nvdec_common : public nvdevice {
17public:
18 explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
19 ~nvhost_nvdec_common() override;
20
21 virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
22 std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
23 IoctlVersion version) = 0;
24
25protected:
26 class BufferMap final {
27 public:
28 constexpr BufferMap() = default;
29
30 constexpr BufferMap(GPUVAddr start_addr, std::size_t size)
31 : start_addr{start_addr}, end_addr{start_addr + size} {}
32
33 constexpr BufferMap(GPUVAddr start_addr, std::size_t size, VAddr cpu_addr,
34 bool is_allocated)
35 : start_addr{start_addr}, end_addr{start_addr + size}, cpu_addr{cpu_addr},
36 is_allocated{is_allocated} {}
37
38 constexpr VAddr StartAddr() const {
39 return start_addr;
40 }
41
42 constexpr VAddr EndAddr() const {
43 return end_addr;
44 }
45
46 constexpr std::size_t Size() const {
47 return end_addr - start_addr;
48 }
49
50 constexpr VAddr CpuAddr() const {
51 return cpu_addr;
52 }
53
54 constexpr bool IsAllocated() const {
55 return is_allocated;
56 }
57
58 private:
59 GPUVAddr start_addr{};
60 GPUVAddr end_addr{};
61 VAddr cpu_addr{};
62 bool is_allocated{};
63 };
64
65 struct IoctlSetNvmapFD {
66 u32_le nvmap_fd;
67 };
68 static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
69
70 struct IoctlSubmitCommandBuffer {
71 u32_le id;
72 u32_le offset;
73 u32_le count;
74 };
75 static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC,
76 "IoctlSubmitCommandBuffer is incorrect size");
77 struct IoctlSubmit {
78 u32_le cmd_buffer_count;
79 u32_le relocation_count;
80 u32_le syncpoint_count;
81 u32_le fence_count;
82 };
83 static_assert(sizeof(IoctlSubmit) == 0x10, "IoctlSubmit has incorrect size");
84
85 struct CommandBuffer {
86 s32 memory_id;
87 u32 offset;
88 s32 word_count;
89 };
90 static_assert(sizeof(CommandBuffer) == 0xC, "CommandBuffer has incorrect size");
91
92 struct Reloc {
93 s32 cmdbuffer_memory;
94 s32 cmdbuffer_offset;
95 s32 target;
96 s32 target_offset;
97 };
98 static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size");
99
100 struct SyncptIncr {
101 u32 id;
102 u32 increments;
103 };
104 static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size");
105
106 struct Fence {
107 u32 id;
108 u32 value;
109 };
110 static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size");
111
112 struct IoctlGetSyncpoint {
113 // Input
114 u32_le param;
115 // Output
116 u32_le value;
117 };
118 static_assert(sizeof(IoctlGetSyncpoint) == 8, "IocGetIdParams has wrong size");
119
120 struct IoctlGetWaitbase {
121 u32_le unknown; // seems to be ignored? Nintendo added this
122 u32_le value;
123 };
124 static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");
125
126 struct IoctlMapBuffer {
127 u32_le num_entries;
128 u32_le data_address; // Ignored by the driver.
129 u32_le attach_host_ch_das;
130 };
131 static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");
132
133 struct IocGetIdParams {
134 // Input
135 u32_le param;
136 // Output
137 u32_le value;
138 };
139 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
140
141 // Used for mapping and unmapping command buffers
142 struct MapBufferEntry {
143 u32_le map_handle;
144 u32_le map_address;
145 };
146 static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");
147
148 /// Ioctl command implementations
149 u32 SetNVMAPfd(const std::vector<u8>& input);
150 u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
151 u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
152 u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
153 u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
154 u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
155 u32 SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
156
157 std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
158 void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
159 std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
160
161 u32_le nvmap_fd{};
162 u32_le submit_timeout{};
163 std::shared_ptr<nvmap> nvmap_dev;
164
165 // This is expected to be ordered, therefore we must use a map, not unordered_map
166 std::map<GPUVAddr, BufferMap> buffer_mappings;
167};
168}; // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 9da19ad56..60db54d00 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -2,15 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6
7#include "common/assert.h" 5#include "common/assert.h"
8#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h"
9#include "core/hle/service/nvdrv/devices/nvhost_vic.h" 8#include "core/hle/service/nvdrv/devices/nvhost_vic.h"
9#include "video_core/memory_manager.h"
10#include "video_core/renderer_base.h"
10 11
11namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
13nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
14 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
12 15
13nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {}
14nvhost_vic::~nvhost_vic() = default; 16nvhost_vic::~nvhost_vic() = default;
15 17
16u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 18u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -21,7 +23,7 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
21 23
22 switch (static_cast<IoctlCommand>(command.raw)) { 24 switch (static_cast<IoctlCommand>(command.raw)) {
23 case IoctlCommand::IocSetNVMAPfdCommand: 25 case IoctlCommand::IocSetNVMAPfdCommand:
24 return SetNVMAPfd(input, output); 26 return SetNVMAPfd(input);
25 case IoctlCommand::IocSubmit: 27 case IoctlCommand::IocSubmit:
26 return Submit(input, output); 28 return Submit(input, output);
27 case IoctlCommand::IocGetSyncpoint: 29 case IoctlCommand::IocGetSyncpoint:
@@ -29,83 +31,19 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
29 case IoctlCommand::IocGetWaitbase: 31 case IoctlCommand::IocGetWaitbase:
30 return GetWaitbase(input, output); 32 return GetWaitbase(input, output);
31 case IoctlCommand::IocMapBuffer: 33 case IoctlCommand::IocMapBuffer:
32 return MapBuffer(input, output); 34 case IoctlCommand::IocMapBuffer2:
35 case IoctlCommand::IocMapBuffer3:
36 case IoctlCommand::IocMapBuffer4:
33 case IoctlCommand::IocMapBufferEx: 37 case IoctlCommand::IocMapBufferEx:
34 return MapBuffer(input, output); 38 return MapBuffer(input, output);
39 case IoctlCommand::IocUnmapBuffer:
40 case IoctlCommand::IocUnmapBuffer2:
41 case IoctlCommand::IocUnmapBuffer3:
35 case IoctlCommand::IocUnmapBufferEx: 42 case IoctlCommand::IocUnmapBufferEx:
36 return UnmapBufferEx(input, output); 43 return UnmapBuffer(input, output);
37 } 44 }
38 45
39 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 46 UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw);
40 return 0;
41}
42
43u32 nvhost_vic::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
44 IoctlSetNvmapFD params{};
45 std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
46 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
47
48 nvmap_fd = params.nvmap_fd;
49 return 0;
50}
51
52u32 nvhost_vic::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
53 IoctlSubmit params{};
54 std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
55 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
56
57 // Workaround for Luigi's Mansion 3, as nvhost_vic is not implemented for asynch GPU
58 params.command_buffer = {};
59
60 std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
61 return 0;
62}
63
64u32 nvhost_vic::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
65 IoctlGetSyncpoint params{};
66 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
67 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
68 params.value = 0; // Seems to be hard coded at 0
69 std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
70 return 0;
71}
72
73u32 nvhost_vic::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
74 IoctlGetWaitbase params{};
75 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
76 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
77 params.value = 0; // Seems to be hard coded at 0
78 std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
79 return 0;
80}
81
82u32 nvhost_vic::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
83 IoctlMapBuffer params{};
84 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
85 LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
86 params.address_1);
87 params.address_1 = 0;
88 params.address_2 = 0;
89 std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
90 return 0;
91}
92
93u32 nvhost_vic::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
94 IoctlMapBufferEx params{};
95 std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
96 LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
97 params.address_1);
98 params.address_1 = 0;
99 params.address_2 = 0;
100 std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
101 return 0;
102}
103
104u32 nvhost_vic::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
105 IoctlUnmapBufferEx params{};
106 std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
107 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
108 std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
109 return 0; 47 return 0;
110} 48}
111 49
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index a7bb7bbd5..f975b190c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -4,19 +4,15 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array> 7#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
8#include <vector>
9#include "common/common_types.h"
10#include "common/swap.h"
11#include "core/hle/service/nvdrv/devices/nvdevice.h"
12 8
13namespace Service::Nvidia::Devices { 9namespace Service::Nvidia::Devices {
10class nvmap;
14 11
15class nvhost_vic final : public nvdevice { 12class nvhost_vic final : public nvhost_nvdec_common {
16public: 13public:
17 explicit nvhost_vic(Core::System& system); 14 explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
18 ~nvhost_vic() override; 15 ~nvhost_vic();
19
20 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 16 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
21 std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl, 17 std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
22 IoctlVersion version) override; 18 IoctlVersion version) override;
@@ -28,74 +24,14 @@ private:
28 IocGetSyncpoint = 0xC0080002, 24 IocGetSyncpoint = 0xC0080002,
29 IocGetWaitbase = 0xC0080003, 25 IocGetWaitbase = 0xC0080003,
30 IocMapBuffer = 0xC01C0009, 26 IocMapBuffer = 0xC01C0009,
27 IocMapBuffer2 = 0xC0340009,
28 IocMapBuffer3 = 0xC0140009,
29 IocMapBuffer4 = 0xC00C0009,
31 IocMapBufferEx = 0xC03C0009, 30 IocMapBufferEx = 0xC03C0009,
32 IocUnmapBufferEx = 0xC03C000A, 31 IocUnmapBuffer = 0xC03C000A,
33 }; 32 IocUnmapBuffer2 = 0xC034000A,
34 33 IocUnmapBuffer3 = 0xC00C000A,
35 struct IoctlSetNvmapFD { 34 IocUnmapBufferEx = 0xC01C000A,
36 u32_le nvmap_fd;
37 };
38 static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
39
40 struct IoctlSubmitCommandBuffer {
41 u32 id;
42 u32 offset;
43 u32 count;
44 };
45 static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC,
46 "IoctlSubmitCommandBuffer is incorrect size");
47
48 struct IoctlSubmit {
49 u32 command_buffer_count;
50 u32 relocations_count;
51 u32 syncpt_count;
52 u32 wait_count;
53 std::array<IoctlSubmitCommandBuffer, 4> command_buffer;
54 };
55 static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit is incorrect size");
56
57 struct IoctlGetSyncpoint {
58 u32 unknown; // seems to be ignored? Nintendo added this
59 u32 value;
60 };
61 static_assert(sizeof(IoctlGetSyncpoint) == 0x8, "IoctlGetSyncpoint is incorrect size");
62
63 struct IoctlGetWaitbase {
64 u32 unknown; // seems to be ignored? Nintendo added this
65 u32 value;
66 };
67 static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");
68
69 struct IoctlMapBuffer {
70 u32 unknown;
71 u32 address_1;
72 u32 address_2;
73 INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
74 };
75 static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
76
77 struct IoctlMapBufferEx {
78 u32 unknown;
79 u32 address_1;
80 u32 address_2;
81 INSERT_PADDING_BYTES(0x30); // TODO(DarkLordZach): RE this structure
82 }; 35 };
83 static_assert(sizeof(IoctlMapBufferEx) == 0x3C, "IoctlMapBufferEx is incorrect size");
84
85 struct IoctlUnmapBufferEx {
86 INSERT_PADDING_BYTES(0x3C); // TODO(DarkLordZach): RE this structure
87 };
88 static_assert(sizeof(IoctlUnmapBufferEx) == 0x3C, "IoctlUnmapBufferEx is incorrect size");
89
90 u32_le nvmap_fd{};
91
92 u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
93 u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
94 u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
95 u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
96 u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
97 u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
98 u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
99}; 36};
100
101} // namespace Service::Nvidia::Devices 37} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 84624be00..04b9ef540 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -37,6 +37,7 @@ public:
37 VAddr addr; 37 VAddr addr;
38 Status status; 38 Status status;
39 u32 refcount; 39 u32 refcount;
40 u32 dma_map_addr;
40 }; 41 };
41 42
42 std::shared_ptr<Object> GetObject(u32 handle) const { 43 std::shared_ptr<Object> GetObject(u32 handle) const {
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 197c77db0..803c1a984 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -51,9 +51,9 @@ Module::Module(Core::System& system) {
51 devices["/dev/nvmap"] = nvmap_dev; 51 devices["/dev/nvmap"] = nvmap_dev;
52 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); 52 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
53 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); 53 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
54 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system); 54 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
55 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); 55 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
56 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system); 56 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
57} 57}
58 58
59Module::~Module() = default; 59Module::~Module() = default;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 28d3f9099..e14c02045 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -63,6 +63,7 @@ void LogSettings() {
63 log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); 63 log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
64 log_setting("Renderer_UseAsynchronousGpuEmulation", 64 log_setting("Renderer_UseAsynchronousGpuEmulation",
65 values.use_asynchronous_gpu_emulation.GetValue()); 65 values.use_asynchronous_gpu_emulation.GetValue());
66 log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
66 log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); 67 log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
67 log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); 68 log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
68 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); 69 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
@@ -119,6 +120,7 @@ void RestoreGlobalState() {
119 values.use_disk_shader_cache.SetGlobal(true); 120 values.use_disk_shader_cache.SetGlobal(true);
120 values.gpu_accuracy.SetGlobal(true); 121 values.gpu_accuracy.SetGlobal(true);
121 values.use_asynchronous_gpu_emulation.SetGlobal(true); 122 values.use_asynchronous_gpu_emulation.SetGlobal(true);
123 values.use_nvdec_emulation.SetGlobal(true);
122 values.use_vsync.SetGlobal(true); 124 values.use_vsync.SetGlobal(true);
123 values.use_assembly_shaders.SetGlobal(true); 125 values.use_assembly_shaders.SetGlobal(true);
124 values.use_asynchronous_shaders.SetGlobal(true); 126 values.use_asynchronous_shaders.SetGlobal(true);
diff --git a/src/core/settings.h b/src/core/settings.h
index 9834f44bb..604805615 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -111,6 +111,7 @@ struct Values {
111 Setting<bool> use_disk_shader_cache; 111 Setting<bool> use_disk_shader_cache;
112 Setting<GPUAccuracy> gpu_accuracy; 112 Setting<GPUAccuracy> gpu_accuracy;
113 Setting<bool> use_asynchronous_gpu_emulation; 113 Setting<bool> use_asynchronous_gpu_emulation;
114 Setting<bool> use_nvdec_emulation;
114 Setting<bool> use_vsync; 115 Setting<bool> use_vsync;
115 Setting<bool> use_assembly_shaders; 116 Setting<bool> use_assembly_shaders;
116 Setting<bool> use_asynchronous_shaders; 117 Setting<bool> use_asynchronous_shaders;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index da09c0dbc..ebc19e18a 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -206,6 +206,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
206 TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); 206 TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
207 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", 207 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
208 Settings::values.use_asynchronous_gpu_emulation.GetValue()); 208 Settings::values.use_asynchronous_gpu_emulation.GetValue());
209 AddField(field_type, "Renderer_UseNvdecEmulation",
210 Settings::values.use_nvdec_emulation.GetValue());
209 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); 211 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
210 AddField(field_type, "Renderer_UseAssemblyShaders", 212 AddField(field_type, "Renderer_UseAssemblyShaders",
211 Settings::values.use_assembly_shaders.GetValue()); 213 Settings::values.use_assembly_shaders.GetValue());
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 77ebac19f..fdfc885fc 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,6 +5,24 @@ add_library(video_core STATIC
5 buffer_cache/buffer_cache.h 5 buffer_cache/buffer_cache.h
6 buffer_cache/map_interval.cpp 6 buffer_cache/map_interval.cpp
7 buffer_cache/map_interval.h 7 buffer_cache/map_interval.h
8 cdma_pusher.cpp
9 cdma_pusher.h
10 command_classes/codecs/codec.cpp
11 command_classes/codecs/codec.h
12 command_classes/codecs/h264.cpp
13 command_classes/codecs/h264.h
14 command_classes/codecs/vp9.cpp
15 command_classes/codecs/vp9.h
16 command_classes/codecs/vp9_types.h
17 command_classes/host1x.cpp
18 command_classes/host1x.h
19 command_classes/nvdec.cpp
20 command_classes/nvdec.h
21 command_classes/nvdec_common.h
22 command_classes/sync_manager.cpp
23 command_classes/sync_manager.h
24 command_classes/vic.cpp
25 command_classes/vic.h
8 compatible_formats.cpp 26 compatible_formats.cpp
9 compatible_formats.h 27 compatible_formats.h
10 dirty_flags.cpp 28 dirty_flags.cpp
@@ -250,6 +268,14 @@ create_target_directory_groups(video_core)
250target_link_libraries(video_core PUBLIC common core) 268target_link_libraries(video_core PUBLIC common core)
251target_link_libraries(video_core PRIVATE glad xbyak) 269target_link_libraries(video_core PRIVATE glad xbyak)
252 270
271if (MSVC)
272 target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
273 target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib)
274else()
275 target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
276 target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES})
277endif()
278
253add_dependencies(video_core host_shaders) 279add_dependencies(video_core host_shaders)
254target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) 280target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
255 281
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
new file mode 100644
index 000000000..d774db107
--- /dev/null
+++ b/src/video_core/cdma_pusher.cpp
@@ -0,0 +1,171 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#include "command_classes/host1x.h"
22#include "command_classes/nvdec.h"
23#include "command_classes/vic.h"
24#include "common/bit_util.h"
25#include "video_core/cdma_pusher.h"
26#include "video_core/command_classes/nvdec_common.h"
27#include "video_core/engines/maxwell_3d.h"
28#include "video_core/gpu.h"
29#include "video_core/memory_manager.h"
30
31namespace Tegra {
32CDmaPusher::CDmaPusher(GPU& gpu)
33 : gpu(gpu), nvdec_processor(std::make_shared<Nvdec>(gpu)),
34 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
35 host1x_processor(std::make_unique<Host1x>(gpu)),
36 nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)),
37 vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
38
39CDmaPusher::~CDmaPusher() = default;
40
41void CDmaPusher::Push(ChCommandHeaderList&& entries) {
42 cdma_queue.push(std::move(entries));
43}
44
45void CDmaPusher::DispatchCalls() {
46 while (!cdma_queue.empty()) {
47 Step();
48 }
49}
50
51void CDmaPusher::Step() {
52 const auto entries{cdma_queue.front()};
53 cdma_queue.pop();
54
55 std::vector<u32> values(entries.size());
56 std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
57
58 for (const u32 value : values) {
59 if (mask != 0) {
60 const u32 lbs = Common::CountTrailingZeroes32(mask);
61 mask &= ~(1U << lbs);
62 ExecuteCommand(static_cast<u32>(offset + lbs), value);
63 continue;
64 } else if (count != 0) {
65 --count;
66 ExecuteCommand(static_cast<u32>(offset), value);
67 if (incrementing) {
68 ++offset;
69 }
70 continue;
71 }
72 const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf);
73 switch (mode) {
74 case ChSubmissionMode::SetClass: {
75 mask = value & 0x3f;
76 offset = (value >> 16) & 0xfff;
77 current_class = static_cast<ChClassId>((value >> 6) & 0x3ff);
78 break;
79 }
80 case ChSubmissionMode::Incrementing:
81 case ChSubmissionMode::NonIncrementing:
82 count = value & 0xffff;
83 offset = (value >> 16) & 0xfff;
84 incrementing = mode == ChSubmissionMode::Incrementing;
85 break;
86 case ChSubmissionMode::Mask:
87 mask = value & 0xffff;
88 offset = (value >> 16) & 0xfff;
89 break;
90 case ChSubmissionMode::Immediate: {
91 const u32 data = value & 0xfff;
92 offset = (value >> 16) & 0xfff;
93 ExecuteCommand(static_cast<u32>(offset), data);
94 break;
95 }
96 default:
97 UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
98 break;
99 }
100 }
101}
102
103void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
104 switch (current_class) {
105 case ChClassId::NvDec:
106 ThiStateWrite(nvdec_thi_state, offset, {data});
107 switch (static_cast<ThiMethod>(offset)) {
108 case ThiMethod::IncSyncpt: {
109 LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
110 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
111 const auto cond = static_cast<u32>((data >> 8) & 0xFF);
112 if (cond == 0) {
113 nvdec_sync->Increment(syncpoint_id);
114 } else {
115 nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
116 nvdec_sync->SignalDone(syncpoint_id);
117 }
118 break;
119 }
120 case ThiMethod::SetMethod1:
121 LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
122 static_cast<u32>(nvdec_thi_state.method_0));
123 nvdec_processor->ProcessMethod(
124 static_cast<Tegra::Nvdec::Method>(nvdec_thi_state.method_0), {data});
125 break;
126 default:
127 break;
128 }
129 break;
130 case ChClassId::GraphicsVic:
131 ThiStateWrite(vic_thi_state, static_cast<u32>(offset), {data});
132 switch (static_cast<ThiMethod>(offset)) {
133 case ThiMethod::IncSyncpt: {
134 LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
135 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
136 const auto cond = static_cast<u32>((data >> 8) & 0xFF);
137 if (cond == 0) {
138 vic_sync->Increment(syncpoint_id);
139 } else {
140 vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
141 vic_sync->SignalDone(syncpoint_id);
142 }
143 break;
144 }
145 case ThiMethod::SetMethod1:
146 LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
147 static_cast<u32>(vic_thi_state.method_0));
148 vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0),
149 {data});
150 break;
151 default:
152 break;
153 }
154 break;
155 case ChClassId::Host1x:
156 // This device is mainly for syncpoint synchronization
157 LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
158 host1x_processor->ProcessMethod(static_cast<Tegra::Host1x::Method>(offset), {data});
159 break;
160 default:
161 UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
162 break;
163 }
164}
165
166void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments) {
167 u8* const state_offset = reinterpret_cast<u8*>(&state) + sizeof(u32) * offset;
168 std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size());
169}
170
171} // namespace Tegra
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
new file mode 100644
index 000000000..982f309c5
--- /dev/null
+++ b/src/video_core/cdma_pusher.h
@@ -0,0 +1,138 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9#include <vector>
10#include <queue>
11
12#include "common/bit_field.h"
13#include "common/common_types.h"
14#include "video_core/command_classes/sync_manager.h"
15
16namespace Tegra {
17
18class GPU;
19class Nvdec;
20class Vic;
21class Host1x;
22
23enum class ChSubmissionMode : u32 {
24 SetClass = 0,
25 Incrementing = 1,
26 NonIncrementing = 2,
27 Mask = 3,
28 Immediate = 4,
29 Restart = 5,
30 Gather = 6,
31};
32
33enum class ChClassId : u32 {
34 NoClass = 0x0,
35 Host1x = 0x1,
36 VideoEncodeMpeg = 0x20,
37 VideoEncodeNvEnc = 0x21,
38 VideoStreamingVi = 0x30,
39 VideoStreamingIsp = 0x32,
40 VideoStreamingIspB = 0x34,
41 VideoStreamingViI2c = 0x36,
42 GraphicsVic = 0x5d,
43 Graphics3D = 0x60,
44 GraphicsGpu = 0x61,
45 Tsec = 0xe0,
46 TsecB = 0xe1,
47 NvJpg = 0xc0,
48 NvDec = 0xf0
49};
50
51enum class ChMethod : u32 {
52 Empty = 0,
53 SetMethod = 0x10,
54 SetData = 0x11,
55};
56
57union ChCommandHeader {
58 u32 raw;
59 BitField<0, 16, u32> value;
60 BitField<16, 12, ChMethod> method_offset;
61 BitField<28, 4, ChSubmissionMode> submission_mode;
62};
63static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
64
65struct ChCommand {
66 ChClassId class_id{};
67 int method_offset{};
68 std::vector<u32> arguments;
69};
70
71using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>;
72using ChCommandList = std::vector<Tegra::ChCommand>;
73
74struct ThiRegisters {
75 u32_le increment_syncpt{};
76 INSERT_PADDING_WORDS(1);
77 u32_le increment_syncpt_error{};
78 u32_le ctx_switch_incremement_syncpt{};
79 INSERT_PADDING_WORDS(4);
80 u32_le ctx_switch{};
81 INSERT_PADDING_WORDS(1);
82 u32_le ctx_syncpt_eof{};
83 INSERT_PADDING_WORDS(5);
84 u32_le method_0{};
85 u32_le method_1{};
86 INSERT_PADDING_WORDS(12);
87 u32_le int_status{};
88 u32_le int_mask{};
89};
90
91enum class ThiMethod : u32 {
92 IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32),
93 SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32),
94 SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32),
95};
96
97class CDmaPusher {
98public:
99 explicit CDmaPusher(GPU& gpu);
100 ~CDmaPusher();
101
102 /// Push NVDEC command buffer entries into queue
103 void Push(ChCommandHeaderList&& entries);
104
105 /// Process queued command buffer entries
106 void DispatchCalls();
107
108 /// Process one queue element
109 void Step();
110
111 /// Invoke command class devices to execute the command based on the current state
112 void ExecuteCommand(u32 offset, u32 data);
113
114private:
115 /// Write arguments value to the ThiRegisters member at the specified offset
116 void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments);
117
118 GPU& gpu;
119
120 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
121 std::unique_ptr<Tegra::Vic> vic_processor;
122 std::unique_ptr<Tegra::Host1x> host1x_processor;
123 std::unique_ptr<SyncptIncrManager> nvdec_sync;
124 std::unique_ptr<SyncptIncrManager> vic_sync;
125 ChClassId current_class{};
126 ThiRegisters vic_thi_state{};
127 ThiRegisters nvdec_thi_state{};
128
129 s32 count{};
130 s32 offset{};
131 s32 mask{};
132 bool incrementing{};
133
134 // Queue of command lists to be processed
135 std::queue<ChCommandHeaderList> cdma_queue;
136};
137
138} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
new file mode 100644
index 000000000..2df410be8
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -0,0 +1,114 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <fstream>
7#include "common/assert.h"
8#include "video_core/command_classes/codecs/codec.h"
9#include "video_core/command_classes/codecs/h264.h"
10#include "video_core/command_classes/codecs/vp9.h"
11#include "video_core/gpu.h"
12#include "video_core/memory_manager.h"
13
14extern "C" {
15#include <libavutil/opt.h>
16}
17
18namespace Tegra {
19
20Codec::Codec(GPU& gpu_)
21 : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
22 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
23
24Codec::~Codec() {
25 if (!initialized) {
26 return;
27 }
28 // Free libav memory
29 avcodec_send_packet(av_codec_ctx, nullptr);
30 avcodec_receive_frame(av_codec_ctx, av_frame);
31 avcodec_flush_buffers(av_codec_ctx);
32
33 av_frame_unref(av_frame);
34 av_free(av_frame);
35 avcodec_close(av_codec_ctx);
36}
37
38void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
39 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
40 current_codec = codec;
41}
42
43void Codec::StateWrite(u32 offset, u64 arguments) {
44 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
45 std::memcpy(state_offset, &arguments, sizeof(u64));
46}
47
48void Codec::Decode() {
49 bool is_first_frame = false;
50
51 if (!initialized) {
52 if (current_codec == NvdecCommon::VideoCodec::H264) {
53 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
54 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
55 av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
56 } else {
57 LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec));
58 return;
59 }
60
61 av_codec_ctx = avcodec_alloc_context3(av_codec);
62 av_frame = av_frame_alloc();
63 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
64
65 // TODO(ameerj): libavcodec gpu hw acceleration
66
67 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
68 if (av_error < 0) {
69 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
70 av_frame_unref(av_frame);
71 av_free(av_frame);
72 avcodec_close(av_codec_ctx);
73 return;
74 }
75 initialized = true;
76 is_first_frame = true;
77 }
78 bool vp9_hidden_frame = false;
79
80 AVPacket packet{};
81 av_init_packet(&packet);
82 std::vector<u8> frame_data;
83
84 if (current_codec == NvdecCommon::VideoCodec::H264) {
85 frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
86 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
87 frame_data = vp9_decoder->ComposeFrameHeader(state);
88 vp9_hidden_frame = vp9_decoder->WasFrameHidden();
89 }
90
91 packet.data = frame_data.data();
92 packet.size = static_cast<int>(frame_data.size());
93
94 avcodec_send_packet(av_codec_ctx, &packet);
95
96 if (!vp9_hidden_frame) {
97 // Only receive/store visible frames
98 avcodec_receive_frame(av_codec_ctx, av_frame);
99 }
100}
101
102AVFrame* Codec::GetCurrentFrame() {
103 return av_frame;
104}
105
106const AVFrame* Codec::GetCurrentFrame() const {
107 return av_frame;
108}
109
110NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
111 return current_codec;
112}
113
114} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
new file mode 100644
index 000000000..2e56daf29
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -0,0 +1,68 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "video_core/command_classes/nvdec_common.h"
12
13extern "C" {
14#if defined(__GNUC__) || defined(__clang__)
15#pragma GCC diagnostic ignored "-Wconversion"
16#endif
17#include <libavcodec/avcodec.h>
18#if defined(__GNUC__) || defined(__clang__)
19#pragma GCC diagnostic pop
20#endif
21}
22
23namespace Tegra {
24class GPU;
25struct VicRegisters;
26
27namespace Decoder {
28class H264;
29class VP9;
30} // namespace Decoder
31
32class Codec {
33public:
34 explicit Codec(GPU& gpu);
35 ~Codec();
36
37 /// Sets NVDEC video stream codec
38 void SetTargetCodec(NvdecCommon::VideoCodec codec);
39
40 /// Populate NvdecRegisters state with argument value at the provided offset
41 void StateWrite(u32 offset, u64 arguments);
42
43 /// Call decoders to construct headers, decode AVFrame with ffmpeg
44 void Decode();
45
46 /// Returns most recently decoded frame
47 AVFrame* GetCurrentFrame();
48 const AVFrame* GetCurrentFrame() const;
49
50 /// Returns the value of current_codec
51 NvdecCommon::VideoCodec GetCurrentCodec() const;
52
53private:
54 bool initialized{};
55 NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
56
57 AVCodec* av_codec{nullptr};
58 AVCodecContext* av_codec_ctx{nullptr};
59 AVFrame* av_frame{nullptr};
60
61 GPU& gpu;
62 std::unique_ptr<Decoder::H264> h264_decoder;
63 std::unique_ptr<Decoder::VP9> vp9_decoder;
64
65 NvdecCommon::NvdecRegisters state{};
66};
67
68} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
new file mode 100644
index 000000000..1a39f7b23
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -0,0 +1,276 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#include "common/bit_util.h"
22#include "video_core/command_classes/codecs/h264.h"
23#include "video_core/gpu.h"
24#include "video_core/memory_manager.h"
25
26namespace Tegra::Decoder {
27H264::H264(GPU& gpu_) : gpu(gpu_) {}
28
29H264::~H264() = default;
30
31std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
32 H264DecoderContext context{};
33 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
34
35 const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
36 if (!is_first_frame && frame_number != 0) {
37 frame.resize(context.frame_data_size);
38
39 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
40 } else {
41 /// Encode header
42 H264BitWriter writer{};
43 writer.WriteU(1, 24);
44 writer.WriteU(0, 1);
45 writer.WriteU(3, 2);
46 writer.WriteU(7, 5);
47 writer.WriteU(100, 8);
48 writer.WriteU(0, 8);
49 writer.WriteU(31, 8);
50 writer.WriteUe(0);
51 const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3;
52 writer.WriteUe(chroma_format_idc);
53 if (chroma_format_idc == 3) {
54 writer.WriteBit(false);
55 }
56
57 writer.WriteUe(0);
58 writer.WriteUe(0);
59 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
60 writer.WriteBit(false); // Scaling matrix present flag
61
62 const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3);
63 writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf));
64 writer.WriteUe(order_cnt_type);
65 if (order_cnt_type == 0) {
66 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
67 } else if (order_cnt_type == 1) {
68 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
69
70 writer.WriteSe(0);
71 writer.WriteSe(0);
72 writer.WriteUe(0);
73 }
74
75 const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
76 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
77
78 writer.WriteUe(16);
79 writer.WriteBit(false);
80 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
81 writer.WriteUe(pic_height - 1);
82 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
83
84 if (!context.h264_parameter_set.frame_mbs_only_flag) {
85 writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
86 }
87
88 writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
89 writer.WriteBit(false); // Frame cropping flag
90 writer.WriteBit(false); // VUI parameter present flag
91
92 writer.End();
93
94 // H264 PPS
95 writer.WriteU(1, 24);
96 writer.WriteU(0, 1);
97 writer.WriteU(3, 2);
98 writer.WriteU(8, 5);
99
100 writer.WriteUe(0);
101 writer.WriteUe(0);
102
103 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag);
104 writer.WriteBit(false);
105 writer.WriteUe(0);
106 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
107 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
108 writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
109 writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
110 s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
111 pic_init_qp = (pic_init_qp << 26) >> 26;
112 writer.WriteSe(pic_init_qp);
113 writer.WriteSe(0);
114 s32 chroma_qp_index_offset =
115 static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
116 chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
117
118 writer.WriteSe(chroma_qp_index_offset);
119 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
120 writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
121 writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
122 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
123
124 writer.WriteBit(true);
125
126 for (s32 index = 0; index < 6; index++) {
127 writer.WriteBit(true);
128 const auto matrix_x4 =
129 std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
130 writer.WriteScalingList(matrix_x4, index * 16, 16);
131 }
132
133 if (context.h264_parameter_set.transform_8x8_mode_flag) {
134 for (s32 index = 0; index < 2; index++) {
135 writer.WriteBit(true);
136 const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
137 context.scaling_matrix_8.end());
138
139 writer.WriteScalingList(matrix_x8, index * 64, 64);
140 }
141 }
142
143 s32 chroma_qp_index_offset2 =
144 static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
145 chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
146
147 writer.WriteSe(chroma_qp_index_offset2);
148
149 writer.End();
150
151 const auto& encoded_header = writer.GetByteArray();
152 frame.resize(encoded_header.size() + context.frame_data_size);
153 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
154
155 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
156 frame.data() + encoded_header.size(),
157 context.frame_data_size);
158 }
159
160 return frame;
161}
162
163H264BitWriter::H264BitWriter() = default;
164
165H264BitWriter::~H264BitWriter() = default;
166
167void H264BitWriter::WriteU(s32 value, s32 value_sz) {
168 WriteBits(value, value_sz);
169}
170
171void H264BitWriter::WriteSe(s32 value) {
172 WriteExpGolombCodedInt(value);
173}
174
175void H264BitWriter::WriteUe(s32 value) {
176 WriteExpGolombCodedUInt((u32)value);
177}
178
179void H264BitWriter::End() {
180 WriteBit(true);
181 Flush();
182}
183
184void H264BitWriter::WriteBit(bool state) {
185 WriteBits(state ? 1 : 0, 1);
186}
187
188void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
189 std::vector<u8> scan(count);
190 if (count == 16) {
191 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
192 } else {
193 std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
194 }
195 u8 last_scale = 8;
196
197 for (s32 index = 0; index < count; index++) {
198 const u8 value = list[start + scan[index]];
199 const s32 delta_scale = static_cast<s32>(value - last_scale);
200
201 WriteSe(delta_scale);
202
203 last_scale = value;
204 }
205}
206
207std::vector<u8>& H264BitWriter::GetByteArray() {
208 return byte_array;
209}
210
211const std::vector<u8>& H264BitWriter::GetByteArray() const {
212 return byte_array;
213}
214
215void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
216 s32 value_pos = 0;
217
218 s32 remaining = bit_count;
219
220 while (remaining > 0) {
221 s32 copy_size = remaining;
222
223 const s32 free_bits = GetFreeBufferBits();
224
225 if (copy_size > free_bits) {
226 copy_size = free_bits;
227 }
228
229 const s32 mask = (1 << copy_size) - 1;
230
231 const s32 src_shift = (bit_count - value_pos) - copy_size;
232 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
233
234 buffer |= ((value >> src_shift) & mask) << dst_shift;
235
236 value_pos += copy_size;
237 buffer_pos += copy_size;
238 remaining -= copy_size;
239 }
240}
241
242void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
243 const s32 sign = value <= 0 ? 0 : 1;
244 if (value < 0) {
245 value = -value;
246 }
247 value = (value << 1) - sign;
248 WriteExpGolombCodedUInt(value);
249}
250
251void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
252 const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1));
253 WriteBits(1, size);
254
255 value -= (1U << (size - 1)) - 1;
256 WriteBits(static_cast<s32>(value), size - 1);
257}
258
259s32 H264BitWriter::GetFreeBufferBits() {
260 if (buffer_pos == buffer_size) {
261 Flush();
262 }
263
264 return buffer_size - buffer_pos;
265}
266
267void H264BitWriter::Flush() {
268 if (buffer_pos == 0) {
269 return;
270 }
271 byte_array.push_back(static_cast<u8>(buffer));
272
273 buffer = 0;
274 buffer_pos = 0;
275}
276} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
new file mode 100644
index 000000000..21752dd90
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -0,0 +1,130 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#pragma once
22
23#include <vector>
24#include "common/common_funcs.h"
25#include "common/common_types.h"
26#include "video_core/command_classes/nvdec_common.h"
27
28namespace Tegra {
29class GPU;
30namespace Decoder {
31
32class H264BitWriter {
33public:
34 H264BitWriter();
35 ~H264BitWriter();
36
37 /// The following Write methods are based on clause 9.1 in the H.264 specification.
38 /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
39 void WriteU(s32 value, s32 value_sz);
40 void WriteSe(s32 value);
41 void WriteUe(s32 value);
42
43 /// Finalize the bitstream
44 void End();
45
46 /// append a bit to the stream, equivalent value to the state parameter
47 void WriteBit(bool state);
48
49 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
50 /// Writes the scaling matrices of the sream
51 void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
52
53 /// Return the bitstream as a vector.
54 std::vector<u8>& GetByteArray();
55 const std::vector<u8>& GetByteArray() const;
56
57private:
58 // ZigZag LUTs from libavcodec.
59 static constexpr std::array<u8, 64> zig_zag_direct{
60 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
61 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
62 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
63 };
64
65 static constexpr std::array<u8, 16> zig_zag_scan{
66 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
67 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
68 };
69
70 void WriteBits(s32 value, s32 bit_count);
71 void WriteExpGolombCodedInt(s32 value);
72 void WriteExpGolombCodedUInt(u32 value);
73 s32 GetFreeBufferBits();
74 void Flush();
75
76 s32 buffer_size{8};
77
78 s32 buffer{};
79 s32 buffer_pos{};
80 std::vector<u8> byte_array;
81};
82
83class H264 {
84public:
85 explicit H264(GPU& gpu);
86 ~H264();
87
88 /// Compose the H264 header of the frame for FFmpeg decoding
89 std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
90 bool is_first_frame = false);
91
92private:
93 struct H264ParameterSet {
94 u32 log2_max_pic_order_cnt{};
95 u32 delta_pic_order_always_zero_flag{};
96 u32 frame_mbs_only_flag{};
97 u32 pic_width_in_mbs{};
98 u32 pic_height_in_map_units{};
99 INSERT_PADDING_WORDS(1);
100 u32 entropy_coding_mode_flag{};
101 u32 bottom_field_pic_order_flag{};
102 u32 num_refidx_l0_default_active{};
103 u32 num_refidx_l1_default_active{};
104 u32 deblocking_filter_control_flag{};
105 u32 redundant_pic_count_flag{};
106 u32 transform_8x8_mode_flag{};
107 INSERT_PADDING_WORDS(9);
108 u64 flags{};
109 u32 frame_number{};
110 u32 frame_number2{};
111 };
112 static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
113
114 struct H264DecoderContext {
115 INSERT_PADDING_BYTES(0x48);
116 u32 frame_data_size{};
117 INSERT_PADDING_BYTES(0xc);
118 H264ParameterSet h264_parameter_set{};
119 INSERT_PADDING_BYTES(0x100);
120 std::array<u8, 0x60> scaling_matrix_4;
121 std::array<u8, 0x80> scaling_matrix_8;
122 };
123 static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
124
125 std::vector<u8> frame;
126 GPU& gpu;
127};
128
129} // namespace Decoder
130} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
new file mode 100644
index 000000000..3bae0bb5d
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -0,0 +1,1010 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring> // for std::memcpy
6#include <numeric>
7#include "video_core/command_classes/codecs/vp9.h"
8#include "video_core/gpu.h"
9#include "video_core/memory_manager.h"
10
11namespace Tegra::Decoder {
12
13// Default compressed header probabilities once frame context resets
14constexpr Vp9EntropyProbs default_probs{
15 .y_mode_prob{
16 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78,
17 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29,
18 },
19 .partition_prob{
20 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
21 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
22 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
23 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0,
24 },
25 .coef_probs{
26 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, 17, 82, 140, 0, 8, 66, 114, 0,
28 2, 44, 76, 0, 1, 19, 32, 0, 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0,
29 7, 75, 127, 0, 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0,
30 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, 102, 148, 228, 0,
31 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, 2, 39, 75, 0, 1, 15, 29, 0,
32 156, 57, 233, 0, 119, 57, 212, 0, 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0,
33 3, 12, 31, 0, 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0,
34 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, 8, 93, 157, 0,
35 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, 41, 151, 213, 0, 27, 123, 193, 0,
36 3, 82, 144, 0, 1, 58, 105, 0, 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0,
37 23, 126, 198, 0, 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0,
38 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, 1, 33, 65, 0,
39 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, 61, 49, 166, 0, 28, 36, 114, 0,
40 12, 25, 76, 0, 3, 16, 42, 0, 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0,
42 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, 89, 163, 230, 0,
43 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, 2, 42, 81, 0, 1, 17, 33, 0,
44 108, 167, 237, 0, 55, 133, 222, 0, 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0,
45 1, 19, 38, 0, 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0,
46 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, 66, 58, 182, 0,
47 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, 229, 99, 249, 0, 143, 111, 235, 0,
48 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0,
49 94, 146, 224, 0, 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0,
50 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, 1, 41, 79, 0,
51 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, 10, 104, 178, 0, 2, 73, 133, 0,
52 1, 44, 85, 0, 1, 22, 47, 0, 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0,
53 3, 61, 124, 0, 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0,
54 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, 125, 34, 187, 0,
55 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56 37, 109, 153, 0, 51, 102, 147, 0, 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0,
57 1, 19, 29, 0, 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0,
58 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, 2, 93, 148, 0,
59 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, 29, 176, 217, 0, 12, 145, 201, 0,
60 3, 101, 156, 0, 1, 69, 111, 0, 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0,
61 25, 154, 215, 0, 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0,
62 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, 14, 117, 177, 0, 5, 90, 141, 0,
64 2, 61, 95, 0, 1, 37, 57, 0, 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0,
65 1, 60, 104, 0, 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0,
66 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, 32, 186, 224, 0,
67 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 22, 0,
68 57, 192, 227, 0, 20, 143, 204, 0, 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0,
69 1, 19, 32, 0, 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0,
70 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, 49, 107, 178, 0,
71 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, 71, 172, 217, 0, 44, 141, 209, 0,
72 15, 102, 173, 0, 6, 76, 133, 0, 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0,
73 31, 148, 216, 0, 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0,
74 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, 1, 38, 69, 0,
75 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, 7, 107, 177, 0, 2, 70, 124, 0,
76 1, 42, 73, 0, 1, 18, 34, 0, 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0,
77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0,
78 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, 75, 183, 239, 0,
79 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, 1, 44, 76, 0, 1, 17, 28, 0,
80 73, 185, 240, 0, 27, 159, 222, 0, 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0,
81 1, 17, 29, 0, 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0,
82 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, 4, 113, 180, 0,
83 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, 7, 27, 153, 0, 5, 30, 95, 0,
84 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0,
85 57, 75, 124, 0, 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0,
86 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, 1, 38, 60, 0,
87 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, 2, 75, 117, 0, 1, 50, 81, 0,
88 1, 31, 51, 0, 1, 14, 23, 0, 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0,
89 1, 51, 86, 0, 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0,
90 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, 19, 55, 240, 0,
91 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92 41, 166, 207, 0, 104, 153, 199, 0, 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0,
93 1, 36, 52, 0, 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0,
94 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, 1, 86, 142, 0,
95 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, 20, 190, 215, 0, 4, 135, 192, 0,
96 1, 84, 139, 0, 1, 53, 91, 0, 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0,
97 2, 137, 192, 0, 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0,
98 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, 46, 102, 164, 0, 15, 80, 128, 0,
100 2, 49, 76, 0, 1, 18, 28, 0, 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0,
101 3, 69, 109, 0, 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0,
102 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, 47, 199, 217, 0,
103 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, 1, 36, 62, 0, 1, 15, 26, 0,
104 26, 219, 229, 0, 5, 155, 207, 0, 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0,
105 1, 16, 28, 0, 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, 63, 142, 204, 0,
107 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, 85, 181, 230, 0, 32, 146, 209, 0,
108 7, 100, 164, 0, 3, 71, 121, 0, 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0,
109 20, 148, 207, 0, 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0,
110 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, 1, 39, 66, 0,
111 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, 1, 98, 160, 0, 1, 67, 117, 0,
112 1, 41, 74, 0, 1, 17, 31, 0, 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0,
113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0,
114 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, 50, 127, 154, 0,
115 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, 1, 35, 54, 0, 1, 13, 20, 0,
116 40, 142, 167, 0, 17, 110, 157, 0, 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0,
117 1, 11, 17, 0, 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0,
118 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, 1, 83, 128, 0,
119 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, 36, 41, 235, 0, 29, 36, 193, 0,
120 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0,
121 177, 162, 215, 0, 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0,
122 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, 1, 38, 65, 0,
123 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, 2, 86, 140, 0, 1, 56, 97, 0,
124 1, 36, 61, 0, 1, 16, 27, 0, 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0,
125 1, 57, 99, 0, 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0,
126 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, 181, 21, 201, 0,
127 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128 47, 106, 172, 0, 95, 104, 173, 0, 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0,
129 1, 17, 23, 0, 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0,
130 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, 3, 93, 146, 0,
131 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, 49, 186, 223, 0, 17, 148, 204, 0,
132 1, 96, 142, 0, 1, 53, 83, 0, 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0,
133 2, 136, 180, 0, 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0,
134 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0,
135 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, 176, 177, 234, 0, 104, 158, 220, 0,
136 66, 128, 186, 0, 55, 90, 137, 0, 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0,
137 2, 65, 125, 0, 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0,
138 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, 84, 220, 246, 0,
139 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, 1, 55, 77, 0, 1, 60, 79, 0,
140 43, 243, 240, 0, 8, 180, 217, 0, 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0,
141 1, 16, 6, 0,
142 },
143 .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144},
144 .inter_mode_prob{
145 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94,
146 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0,
147 },
148 .intra_inter_prob{9, 102, 187, 225},
149 .comp_inter_prob{9, 102, 187, 225, 0},
150 .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247},
151 .comp_ref_prob{50, 126, 123, 221, 226},
152 .tx_32x32_prob{3, 136, 37, 5, 52, 13},
153 .tx_16x16_prob{20, 152, 15, 101},
154 .tx_8x8_prob{100, 66},
155 .skip_probs{192, 128, 64},
156 .joints{32, 64, 96},
157 .sign{128, 128},
158 .classes{
159 224, 144, 192, 168, 192, 176, 192, 198, 198, 245,
160 216, 128, 176, 160, 176, 176, 192, 198, 198, 208,
161 },
162 .class_0{216, 208},
163 .prob_bits{
164 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
165 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
166 },
167 .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64},
168 .fr{64, 96, 64, 64, 96, 64},
169 .class_0_hp{160, 160},
170 .high_precision{128, 128},
171};
172
173VP9::VP9(GPU& gpu) : gpu(gpu) {}
174
175VP9::~VP9() = default;
176
177void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
178 const bool update = new_prob != old_prob;
179
180 writer.Write(update, diff_update_probability);
181
182 if (update) {
183 WriteProbabilityDelta(writer, new_prob, old_prob);
184 }
185}
186template <typename T, std::size_t N>
187void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
188 const std::array<T, N>& old_prob) {
189 for (std::size_t offset = 0; offset < new_prob.size(); ++offset) {
190 WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]);
191 }
192}
193
194template <typename T, std::size_t N>
195void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
196 const std::array<T, N>& old_prob) {
197 for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) {
198 WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]);
199 WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]);
200 WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]);
201 }
202}
203
204void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
205 const int delta = RemapProbability(new_prob, old_prob);
206
207 EncodeTermSubExp(writer, delta);
208}
209
210s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) {
211 new_prob--;
212 old_prob--;
213
214 std::size_t index{};
215
216 if (old_prob * 2 <= 0xff) {
217 index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
218 } else {
219 index = static_cast<std::size_t>(
220 std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
221 }
222
223 return map_lut[index];
224}
225
226s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) {
227 if (new_prob > old_prob * 2) {
228 return new_prob;
229 } else if (new_prob >= old_prob) {
230 return (new_prob - old_prob) * 2;
231 } else {
232 return (old_prob - new_prob) * 2 - 1;
233 }
234}
235
236void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
237 if (WriteLessThan(writer, value, 16)) {
238 writer.Write(value, 4);
239 } else if (WriteLessThan(writer, value, 32)) {
240 writer.Write(value - 16, 4);
241 } else if (WriteLessThan(writer, value, 64)) {
242 writer.Write(value - 32, 5);
243 } else {
244 value -= 64;
245
246 constexpr s32 size = 8;
247
248 const s32 mask = (1 << size) - 191;
249
250 const s32 delta = value - mask;
251
252 if (delta < 0) {
253 writer.Write(value, size - 1);
254 } else {
255 writer.Write(delta / 2 + mask, size - 1);
256 writer.Write(delta & 1, 1);
257 }
258 }
259}
260
261bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) {
262 const bool is_lt = value < test;
263 writer.Write(!is_lt);
264 return is_lt;
265}
266
267void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
268 const std::array<u8, 2304>& new_prob,
269 const std::array<u8, 2304>& old_prob) {
270 // Note: There's 1 byte added on each packet for alignment,
271 // this byte is ignored when doing updates.
272 constexpr s32 block_bytes = 2 * 2 * 6 * 6 * 4;
273
274 const auto needs_update = [&](s32 base_index) -> bool {
275 s32 index = base_index;
276 for (s32 i = 0; i < 2; i++) {
277 for (s32 j = 0; j < 2; j++) {
278 for (s32 k = 0; k < 6; k++) {
279 for (s32 l = 0; l < 6; l++) {
280 if (new_prob[index + 0] != old_prob[index + 0] ||
281 new_prob[index + 1] != old_prob[index + 1] ||
282 new_prob[index + 2] != old_prob[index + 2]) {
283 return true;
284 }
285
286 index += 4;
287 }
288 }
289 }
290 }
291 return false;
292 };
293
294 for (s32 block_index = 0; block_index < 4; block_index++) {
295 const s32 base_index = block_index * block_bytes;
296 const bool update = needs_update(base_index);
297 writer.Write(update);
298
299 if (update) {
300 s32 index = base_index;
301 for (s32 i = 0; i < 2; i++) {
302 for (s32 j = 0; j < 2; j++) {
303 for (s32 k = 0; k < 6; k++) {
304 for (s32 l = 0; l < 6; l++) {
305 if (k != 0 || l < 3) {
306 WriteProbabilityUpdate(writer, new_prob[index + 0],
307 old_prob[index + 0]);
308 WriteProbabilityUpdate(writer, new_prob[index + 1],
309 old_prob[index + 1]);
310 WriteProbabilityUpdate(writer, new_prob[index + 2],
311 old_prob[index + 2]);
312 }
313 index += 4;
314 }
315 }
316 }
317 }
318 }
319
320 if (block_index == tx_mode) {
321 break;
322 }
323 }
324}
325
326void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
327 const bool update = new_prob != old_prob;
328 writer.Write(update, diff_update_probability);
329
330 if (update) {
331 writer.Write(new_prob >> 1, 7);
332 }
333}
334
335s32 VP9::CalcMinLog2TileCols(s32 frame_width) {
336 const s32 sb64_cols = (frame_width + 63) / 64;
337 s32 min_log2 = 0;
338
339 while ((64 << min_log2) < sb64_cols) {
340 min_log2++;
341 }
342
343 return min_log2;
344}
345
346s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) {
347 const s32 sb64_cols = (frameWidth + 63) / 64;
348 s32 max_log2 = 1;
349
350 while ((sb64_cols >> max_log2) >= 4) {
351 max_log2++;
352 }
353
354 return max_log2 - 1;
355}
356
357Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
358 PictureInfo picture_info{};
359 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
360 Vp9PictureInfo vp9_info = picture_info.Convert();
361
362 InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
363
364 // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
365 // order: last, golden, altref, current. It may be worthwhile to track the updates done here
366 // to avoid buffering frame data needed for reference frame updating in the header composition.
367 std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64));
368
369 return std::move(vp9_info);
370}
371
372void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
373 EntropyProbs entropy{};
374 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
375 entropy.Convert(dst);
376}
377
378Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
379 Vp9FrameContainer frame{};
380 {
381 gpu.SyncGuestHost();
382 frame.info = std::move(GetVp9PictureInfo(state));
383
384 frame.bit_stream.resize(frame.info.bitstream_size);
385 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(),
386 frame.info.bitstream_size);
387 }
388 // Buffer two frames, saving the last show frame info
389 if (next_next_frame.bit_stream.size() != 0) {
390 Vp9FrameContainer temp{
391 .info = frame.info,
392 .bit_stream = frame.bit_stream,
393 };
394 next_next_frame.info.show_frame = frame.info.last_frame_shown;
395 frame.info = next_next_frame.info;
396 frame.bit_stream = next_next_frame.bit_stream;
397 next_next_frame = std::move(temp);
398
399 if (next_frame.bit_stream.size() != 0) {
400 Vp9FrameContainer temp{
401 .info = frame.info,
402 .bit_stream = frame.bit_stream,
403 };
404 next_frame.info.show_frame = frame.info.last_frame_shown;
405 frame.info = next_frame.info;
406 frame.bit_stream = next_frame.bit_stream;
407 next_frame = std::move(temp);
408 } else {
409 next_frame.info = frame.info;
410 next_frame.bit_stream = frame.bit_stream;
411 }
412 } else {
413 next_next_frame.info = frame.info;
414 next_next_frame.bit_stream = frame.bit_stream;
415 }
416 return frame;
417}
418
419std::vector<u8> VP9::ComposeCompressedHeader() {
420 VpxRangeEncoder writer{};
421
422 if (!current_frame_info.lossless) {
423 if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
424 writer.Write(3, 2);
425 writer.Write(current_frame_info.transform_mode == 4);
426 } else {
427 writer.Write(current_frame_info.transform_mode, 2);
428 }
429 }
430
431 if (current_frame_info.transform_mode == 4) {
432 // tx_mode_probs() in the spec
433 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob,
434 prev_frame_probs.tx_8x8_prob);
435 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob,
436 prev_frame_probs.tx_16x16_prob);
437 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob,
438 prev_frame_probs.tx_32x32_prob);
439 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
440 prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob;
441 prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob;
442 prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob;
443 }
444 }
445 // read_coef_probs() in the spec
446 WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode,
447 current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs);
448 // read_skip_probs() in the spec
449 WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs,
450 prev_frame_probs.skip_probs);
451
452 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
453 prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs;
454 prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs;
455 }
456
457 if (!current_frame_info.intra_only) {
458 // read_inter_probs() in the spec
459 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob,
460 prev_frame_probs.inter_mode_prob);
461 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
462 prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob;
463 }
464
465 if (current_frame_info.interp_filter == 4) {
466 // read_interp_filter_probs() in the spec
467 WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob,
468 prev_frame_probs.switchable_interp_prob);
469 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
470 prev_frame_probs.switchable_interp_prob =
471 current_frame_info.entropy.switchable_interp_prob;
472 }
473 }
474
475 // read_is_inter_probs() in the spec
476 WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob,
477 prev_frame_probs.intra_inter_prob);
478 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
479 prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob;
480 }
481 // frame_reference_mode() in the spec
482 if ((current_frame_info.ref_frame_sign_bias[1] & 1) !=
483 (current_frame_info.ref_frame_sign_bias[2] & 1) ||
484 (current_frame_info.ref_frame_sign_bias[1] & 1) !=
485 (current_frame_info.ref_frame_sign_bias[3] & 1)) {
486 if (current_frame_info.reference_mode >= 1) {
487 writer.Write(1, 1);
488 writer.Write(current_frame_info.reference_mode == 2);
489 } else {
490 writer.Write(0, 1);
491 }
492 }
493
494 // frame_reference_mode_probs() in the spec
495 if (current_frame_info.reference_mode == 2) {
496 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob,
497 prev_frame_probs.comp_inter_prob);
498 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
499 prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob;
500 }
501 }
502
503 if (current_frame_info.reference_mode != 1) {
504 WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob,
505 prev_frame_probs.single_ref_prob);
506 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
507 prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob;
508 }
509 }
510
511 if (current_frame_info.reference_mode != 0) {
512 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob,
513 prev_frame_probs.comp_ref_prob);
514 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
515 prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob;
516 }
517 }
518
519 // read_y_mode_probs
520 for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size();
521 ++index) {
522 WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index],
523 prev_frame_probs.y_mode_prob[index]);
524 }
525 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
526 prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob;
527 }
528 // read_partition_probs
529 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob,
530 prev_frame_probs.partition_prob);
531 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
532 prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob;
533 }
534
535 // mv_probs
536 for (s32 i = 0; i < 3; i++) {
537 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i],
538 prev_frame_probs.joints[i]);
539 }
540 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
541 prev_frame_probs.joints = current_frame_info.entropy.joints;
542 }
543
544 for (s32 i = 0; i < 2; i++) {
545 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i],
546 prev_frame_probs.sign[i]);
547
548 for (s32 j = 0; j < 10; j++) {
549 const int index = i * 10 + j;
550
551 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index],
552 prev_frame_probs.classes[index]);
553 }
554
555 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i],
556 prev_frame_probs.class_0[i]);
557
558 for (s32 j = 0; j < 10; j++) {
559 const int index = i * 10 + j;
560
561 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index],
562 prev_frame_probs.prob_bits[index]);
563 }
564 }
565
566 for (s32 i = 0; i < 2; i++) {
567 for (s32 j = 0; j < 2; j++) {
568 for (s32 k = 0; k < 3; k++) {
569 const int index = i * 2 * 3 + j * 3 + k;
570
571 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index],
572 prev_frame_probs.class_0_fr[index]);
573 }
574 }
575
576 for (s32 j = 0; j < 3; j++) {
577 const int index = i * 3 + j;
578
579 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index],
580 prev_frame_probs.fr[index]);
581 }
582 }
583
584 if (current_frame_info.allow_high_precision_mv) {
585 for (s32 index = 0; index < 2; index++) {
586 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index],
587 prev_frame_probs.class_0_hp[index]);
588 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index],
589 prev_frame_probs.high_precision[index]);
590 }
591 }
592
593 // save previous probs
594 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
595 prev_frame_probs.sign = current_frame_info.entropy.sign;
596 prev_frame_probs.classes = current_frame_info.entropy.classes;
597 prev_frame_probs.class_0 = current_frame_info.entropy.class_0;
598 prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits;
599 prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr;
600 prev_frame_probs.fr = current_frame_info.entropy.fr;
601 prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp;
602 prev_frame_probs.high_precision = current_frame_info.entropy.high_precision;
603 }
604 }
605
606 writer.End();
607 return writer.GetBuffer();
608
609 const auto writer_bytearray = writer.GetBuffer();
610
611 std::vector<u8> compressed_header(writer_bytearray.size());
612 std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size());
613 return compressed_header;
614}
615
616VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
617 VpxBitStreamWriter uncomp_writer{};
618
619 uncomp_writer.WriteU(2, 2); // Frame marker.
620 uncomp_writer.WriteU(0, 2); // Profile.
621 uncomp_writer.WriteBit(false); // Show existing frame.
622 uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame?
623 uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame?
624 uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience
625
626 if (current_frame_info.is_key_frame) {
627 uncomp_writer.WriteU(frame_sync_code, 24);
628 uncomp_writer.WriteU(0, 3); // Color space.
629 uncomp_writer.WriteU(0, 1); // Color range.
630 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
631 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
632 uncomp_writer.WriteBit(false); // Render and frame size different.
633
634 // Reset context
635 prev_frame_probs = default_probs;
636 swap_next_golden = false;
637 loop_filter_ref_deltas.fill(0);
638 loop_filter_mode_deltas.fill(0);
639
640 // allow frames offsets to stabilize before checking for golden frames
641 grace_period = 4;
642
643 // On key frames, all frame slots are set to the current frame,
644 // so the value of the selected slot doesn't really matter.
645 frame_ctxs.fill({current_frame_number, false, default_probs});
646
647 // intra only, meaning the frame can be recreated with no other references
648 current_frame_info.intra_only = true;
649
650 } else {
651 std::array<s32, 3> ref_frame_index;
652
653 if (!current_frame_info.show_frame) {
654 uncomp_writer.WriteBit(current_frame_info.intra_only);
655 if (!current_frame_info.last_frame_was_key) {
656 swap_next_golden = !swap_next_golden;
657 }
658 } else {
659 current_frame_info.intra_only = false;
660 }
661 if (!current_frame_info.error_resilient_mode) {
662 uncomp_writer.WriteU(0, 2); // Reset frame context.
663 }
664
665 // Last, Golden, Altref frames
666 ref_frame_index = std::array<s32, 3>{0, 1, 2};
667
668 // set when next frame is hidden
669 // altref and golden references are swapped
670 if (swap_next_golden) {
671 ref_frame_index = std::array<s32, 3>{0, 2, 1};
672 }
673
674 // update Last Frame
675 u64 refresh_frame_flags = 1;
676
677 // golden frame may refresh, determined if the next golden frame offset is changed
678 bool golden_refresh = false;
679 if (grace_period <= 0) {
680 for (s32 index = 1; index < 3; ++index) {
681 if (current_frame_info.frame_offsets[index] !=
682 next_frame.info.frame_offsets[index]) {
683 current_frame_info.refresh_frame[index] = true;
684 golden_refresh = true;
685 grace_period = 3;
686 }
687 }
688 }
689
690 if (current_frame_info.show_frame &&
691 (!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
692 // Update golden frame
693 refresh_frame_flags = swap_next_golden ? 2 : 4;
694 }
695
696 if (!current_frame_info.show_frame) {
697 // Update altref
698 refresh_frame_flags = swap_next_golden ? 2 : 4;
699 } else if (golden_refresh) {
700 refresh_frame_flags = 3;
701 }
702
703 if (current_frame_info.intra_only) {
704 uncomp_writer.WriteU(frame_sync_code, 24);
705 uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
706 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
707 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
708 uncomp_writer.WriteBit(false); // Render and frame size different.
709 } else {
710 uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
711
712 for (s32 index = 1; index < 4; index++) {
713 uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
714 uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
715 }
716
717 uncomp_writer.WriteBit(true); // Frame size with refs.
718 uncomp_writer.WriteBit(false); // Render and frame size different.
719 uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
720 uncomp_writer.WriteBit(current_frame_info.interp_filter == 4);
721
722 if (current_frame_info.interp_filter != 4) {
723 uncomp_writer.WriteU(current_frame_info.interp_filter, 2);
724 }
725 }
726 }
727
728 if (!current_frame_info.error_resilient_mode) {
729 uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from?
730 uncomp_writer.WriteBit(true); // Frame parallel decoding mode.
731 }
732
733 int frame_ctx_idx = 0;
734 if (!current_frame_info.show_frame) {
735 frame_ctx_idx = 1;
736 }
737
738 uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
739 prev_frame_probs =
740 frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
741 frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
742
743 uncomp_writer.WriteU(current_frame_info.first_level, 6);
744 uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
745 uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled);
746
747 if (current_frame_info.mode_ref_delta_enabled) {
748 // check if ref deltas are different, update accordingly
749 std::array<bool, 4> update_loop_filter_ref_deltas;
750 std::array<bool, 2> update_loop_filter_mode_deltas;
751
752 bool loop_filter_delta_update = false;
753
754 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
755 const s8 old_deltas = loop_filter_ref_deltas[index];
756 const s8 new_deltas = current_frame_info.ref_deltas[index];
757
758 loop_filter_delta_update |=
759 (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas);
760 }
761
762 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
763 const s8 old_deltas = loop_filter_mode_deltas[index];
764 const s8 new_deltas = current_frame_info.mode_deltas[index];
765
766 loop_filter_delta_update |=
767 (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas);
768 }
769
770 uncomp_writer.WriteBit(loop_filter_delta_update);
771
772 if (loop_filter_delta_update) {
773 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
774 uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]);
775
776 if (update_loop_filter_ref_deltas[index]) {
777 uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6);
778 }
779 }
780
781 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
782 uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]);
783
784 if (update_loop_filter_mode_deltas[index]) {
785 uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6);
786 }
787 }
788 // save new deltas
789 loop_filter_ref_deltas = current_frame_info.ref_deltas;
790 loop_filter_mode_deltas = current_frame_info.mode_deltas;
791 }
792 }
793
794 uncomp_writer.WriteU(current_frame_info.base_q_index, 8);
795
796 uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q);
797 uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
798 uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
799
800 uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
801
802 const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
803 const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
804
805 const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2;
806 const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1;
807
808 // If it's less than the maximum, we need to add an extra 0 on the bitstream
809 // to indicate that it should stop reading.
810 if (current_frame_info.log2_tile_cols < max_tile_cols_log2) {
811 uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1);
812 } else {
813 uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff);
814 }
815
816 const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0;
817
818 uncomp_writer.WriteBit(tile_rows_log2_is_nonzero);
819
820 if (tile_rows_log2_is_nonzero) {
821 uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1);
822 }
823
824 return uncomp_writer;
825}
826
827std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
828 std::vector<u8> bitstream;
829 {
830 Vp9FrameContainer curr_frame = GetCurrentFrame(state);
831 current_frame_info = curr_frame.info;
832 bitstream = curr_frame.bit_stream;
833 }
834
835 // The uncompressed header routine sets PrevProb parameters needed for the compressed header
836 auto uncomp_writer = ComposeUncompressedHeader();
837 std::vector<u8> compressed_header = ComposeCompressedHeader();
838
839 uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16);
840 uncomp_writer.Flush();
841 std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
842
843 // Write headers and frame to buffer
844 frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
845 std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size());
846 std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(),
847 compressed_header.size());
848 std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(),
849 bitstream.data(), bitstream.size());
850
851 // keep track of frame number
852 current_frame_number++;
853 grace_period--;
854
855 // don't display hidden frames
856 hidden = !current_frame_info.show_frame;
857 return frame;
858}
859
860VpxRangeEncoder::VpxRangeEncoder() {
861 Write(false);
862}
863
864VpxRangeEncoder::~VpxRangeEncoder() = default;
865
866void VpxRangeEncoder::Write(s32 value, s32 value_size) {
867 for (s32 bit = value_size - 1; bit >= 0; bit--) {
868 Write(((value >> bit) & 1) != 0);
869 }
870}
871
872void VpxRangeEncoder::Write(bool bit) {
873 Write(bit, half_probability);
874}
875
876void VpxRangeEncoder::Write(bool bit, s32 probability) {
877 u32 local_range = range;
878 const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8);
879 local_range = split;
880
881 if (bit) {
882 low_value += split;
883 local_range = range - split;
884 }
885
886 s32 shift = norm_lut[local_range];
887 local_range <<= shift;
888 count += shift;
889
890 if (count >= 0) {
891 const s32 offset = shift - count;
892
893 if (((low_value << (offset - 1)) >> 31) != 0) {
894 const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
895 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
896 while (base_stream.GetPosition() >= 0 && PeekByte() == 0xff) {
897 base_stream.WriteByte(0);
898
899 base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
900 }
901 base_stream.WriteByte(static_cast<u8>((PeekByte() + 1)));
902 base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin);
903 }
904 base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset))));
905
906 low_value <<= offset;
907 shift = count;
908 low_value &= 0xffffff;
909 count -= 8;
910 }
911
912 low_value <<= shift;
913 range = local_range;
914}
915
916void VpxRangeEncoder::End() {
917 for (std::size_t index = 0; index < 32; ++index) {
918 Write(false);
919 }
920}
921
922u8 VpxRangeEncoder::PeekByte() {
923 const u8 value = base_stream.ReadByte();
924 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
925
926 return value;
927}
928
929VpxBitStreamWriter::VpxBitStreamWriter() = default;
930
931VpxBitStreamWriter::~VpxBitStreamWriter() = default;
932
933void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) {
934 WriteBits(value, value_size);
935}
936
937void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) {
938 const bool sign = value < 0;
939 if (sign) {
940 value = -value;
941 }
942
943 WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1);
944}
945
946void VpxBitStreamWriter::WriteDeltaQ(u32 value) {
947 const bool delta_coded = value != 0;
948 WriteBit(delta_coded);
949
950 if (delta_coded) {
951 WriteBits(value, 4);
952 }
953}
954
955void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) {
956 s32 value_pos = 0;
957 s32 remaining = bit_count;
958
959 while (remaining > 0) {
960 s32 copy_size = remaining;
961
962 const s32 free = GetFreeBufferBits();
963
964 if (copy_size > free) {
965 copy_size = free;
966 }
967
968 const s32 mask = (1 << copy_size) - 1;
969
970 const s32 src_shift = (bit_count - value_pos) - copy_size;
971 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
972
973 buffer |= ((value >> src_shift) & mask) << dst_shift;
974
975 value_pos += copy_size;
976 buffer_pos += copy_size;
977 remaining -= copy_size;
978 }
979}
980
981void VpxBitStreamWriter::WriteBit(bool state) {
982 WriteBits(state ? 1 : 0, 1);
983}
984
985s32 VpxBitStreamWriter::GetFreeBufferBits() {
986 if (buffer_pos == buffer_size) {
987 Flush();
988 }
989
990 return buffer_size - buffer_pos;
991}
992
993void VpxBitStreamWriter::Flush() {
994 if (buffer_pos == 0) {
995 return;
996 }
997 byte_array.push_back(static_cast<u8>(buffer));
998 buffer = 0;
999 buffer_pos = 0;
1000}
1001
1002std::vector<u8>& VpxBitStreamWriter::GetByteArray() {
1003 return byte_array;
1004}
1005
1006const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
1007 return byte_array;
1008}
1009
1010} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
new file mode 100644
index 000000000..748e11bae
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -0,0 +1,216 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8#include <vector>
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "common/stream.h"
12#include "video_core/command_classes/codecs/vp9_types.h"
13#include "video_core/command_classes/nvdec_common.h"
14
15namespace Tegra {
16class GPU;
17enum class FrameType { KeyFrame = 0, InterFrame = 1 };
18namespace Decoder {
19
20/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
21/// VP9 header bitstreams.
22
23class VpxRangeEncoder {
24public:
25 VpxRangeEncoder();
26 ~VpxRangeEncoder();
27
28 /// Writes the rightmost value_size bits from value into the stream
29 void Write(s32 value, s32 value_size);
30
31 /// Writes a single bit with half probability
32 void Write(bool bit);
33
34 /// Writes a bit to the base_stream encoded with probability
35 void Write(bool bit, s32 probability);
36
37 /// Signal the end of the bitstream
38 void End();
39
40 std::vector<u8>& GetBuffer() {
41 return base_stream.GetBuffer();
42 }
43
44 const std::vector<u8>& GetBuffer() const {
45 return base_stream.GetBuffer();
46 }
47
48private:
49 u8 PeekByte();
50 Common::Stream base_stream{};
51 u32 low_value{};
52 u32 range{0xff};
53 s32 count{-24};
54 s32 half_probability{128};
55 static constexpr std::array<s32, 256> norm_lut{
56 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
57 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
58 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65 };
66};
67
68class VpxBitStreamWriter {
69public:
70 VpxBitStreamWriter();
71 ~VpxBitStreamWriter();
72
73 /// Write an unsigned integer value
74 void WriteU(u32 value, u32 value_size);
75
76 /// Write a signed integer value
77 void WriteS(s32 value, u32 value_size);
78
79 /// Based on 6.2.10 of VP9 Spec, writes a delta coded value
80 void WriteDeltaQ(u32 value);
81
82 /// Write a single bit.
83 void WriteBit(bool state);
84
85 /// Pushes current buffer into buffer_array, resets buffer
86 void Flush();
87
88 /// Returns byte_array
89 std::vector<u8>& GetByteArray();
90
91 /// Returns const byte_array
92 const std::vector<u8>& GetByteArray() const;
93
94private:
95 /// Write bit_count bits from value into buffer
96 void WriteBits(u32 value, u32 bit_count);
97
98 /// Gets next available position in buffer, invokes Flush() if buffer is full
99 s32 GetFreeBufferBits();
100
101 s32 buffer_size{8};
102
103 s32 buffer{};
104 s32 buffer_pos{};
105 std::vector<u8> byte_array;
106};
107
108class VP9 {
109public:
110 explicit VP9(GPU& gpu);
111 ~VP9();
112
113 /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
114 /// documentation
115 std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
116
117 /// Returns true if the most recent frame was a hidden frame.
118 bool WasFrameHidden() const {
119 return hidden;
120 }
121
122private:
123 /// Generates compressed header probability updates in the bitstream writer
124 template <typename T, std::size_t N>
125 void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
126 const std::array<T, N>& old_prob);
127
128 /// Generates compressed header probability updates in the bitstream writer
129 /// If probs are not equal, WriteProbabilityDelta is invoked
130 void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
131
132 /// Generates compressed header probability deltas in the bitstream writer
133 void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
134
135 /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
136 s32 RemapProbability(s32 new_prob, s32 old_prob);
137
138 /// Recenters probability. Based on section 6.3.6 of VP9 Specification
139 s32 RecenterNonNeg(s32 new_prob, s32 old_prob);
140
141 /// Inverse of 6.3.4 Decode term subexp
142 void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
143
144 /// Writes if the value is less than the test value
145 bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
146
147 /// Writes probability updates for the Coef probabilities
148 void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
149 const std::array<u8, 2304>& new_prob,
150 const std::array<u8, 2304>& old_prob);
151
152 /// Write probabilities for 4-byte aligned structures
153 template <typename T, std::size_t N>
154 void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
155 const std::array<T, N>& old_prob);
156
157 /// Write motion vector probability updates. 6.3.17 in the spec
158 void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
159
160 /// 6.2.14 Tile size calculation
161 s32 CalcMinLog2TileCols(s32 frame_width);
162 s32 CalcMaxLog2TileCols(s32 frame_width);
163
164 /// Returns VP9 information from NVDEC provided offset and size
165 Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
166
167 /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
168 void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
169
170 /// Returns frame to be decoded after buffering
171 Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
172
173 /// Use NVDEC providied information to compose the headers for the current frame
174 std::vector<u8> ComposeCompressedHeader();
175 VpxBitStreamWriter ComposeUncompressedHeader();
176
177 GPU& gpu;
178 std::vector<u8> frame;
179
180 std::array<s8, 4> loop_filter_ref_deltas{};
181 std::array<s8, 2> loop_filter_mode_deltas{};
182
183 bool hidden;
184 s64 current_frame_number = -2; // since we buffer 2 frames
185 s32 grace_period = 6; // frame offsets need to stabilize
186 std::array<FrameContexts, 4> frame_ctxs{};
187 Vp9FrameContainer next_frame{};
188 Vp9FrameContainer next_next_frame{};
189 bool swap_next_golden{};
190
191 Vp9PictureInfo current_frame_info{};
192 Vp9EntropyProbs prev_frame_probs{};
193
194 s32 diff_update_probability = 252;
195 s32 frame_sync_code = 0x498342;
196 static constexpr std::array<s32, 254> map_lut = {
197 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
198 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50,
199 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66,
200 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82,
201 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6,
202 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113,
203 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129,
204 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
205 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160,
206 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176,
207 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
208 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207,
209 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223,
210 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
211 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
212 };
213};
214
215} // namespace Decoder
216} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
new file mode 100644
index 000000000..8688fdac0
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -0,0 +1,369 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <list>
9#include <vector>
10#include "common/cityhash.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/command_classes/nvdec_common.h"
14
15namespace Tegra {
16class GPU;
17
18namespace Decoder {
19struct Vp9FrameDimensions {
20 s16 width{};
21 s16 height{};
22 s16 luma_pitch{};
23 s16 chroma_pitch{};
24};
25static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
26
27enum FrameFlags : u32 {
28 IsKeyFrame = 1 << 0,
29 LastFrameIsKeyFrame = 1 << 1,
30 FrameSizeChanged = 1 << 2,
31 ErrorResilientMode = 1 << 3,
32 LastShowFrame = 1 << 4,
33 IntraOnly = 1 << 5,
34};
35
36enum class MvJointType {
37 MvJointZero = 0, /* Zero vector */
38 MvJointHnzvz = 1, /* Vert zero, hor nonzero */
39 MvJointHzvnz = 2, /* Hor zero, vert nonzero */
40 MvJointHnzvnz = 3, /* Both components nonzero */
41};
42enum class MvClassType {
43 MvClass0 = 0, /* (0, 2] integer pel */
44 MvClass1 = 1, /* (2, 4] integer pel */
45 MvClass2 = 2, /* (4, 8] integer pel */
46 MvClass3 = 3, /* (8, 16] integer pel */
47 MvClass4 = 4, /* (16, 32] integer pel */
48 MvClass5 = 5, /* (32, 64] integer pel */
49 MvClass6 = 6, /* (64, 128] integer pel */
50 MvClass7 = 7, /* (128, 256] integer pel */
51 MvClass8 = 8, /* (256, 512] integer pel */
52 MvClass9 = 9, /* (512, 1024] integer pel */
53 MvClass10 = 10, /* (1024,2048] integer pel */
54};
55
56enum class BlockSize {
57 Block4x4 = 0,
58 Block4x8 = 1,
59 Block8x4 = 2,
60 Block8x8 = 3,
61 Block8x16 = 4,
62 Block16x8 = 5,
63 Block16x16 = 6,
64 Block16x32 = 7,
65 Block32x16 = 8,
66 Block32x32 = 9,
67 Block32x64 = 10,
68 Block64x32 = 11,
69 Block64x64 = 12,
70 BlockSizes = 13,
71 BlockInvalid = BlockSizes
72};
73
74enum class PredictionMode {
75 DcPred = 0, // Average of above and left pixels
76 VPred = 1, // Vertical
77 HPred = 2, // Horizontal
78 D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi)
79 D135Pred = 4, // Directional 135 deg = 180 - 45
80 D117Pred = 5, // Directional 117 deg = 180 - 63
81 D153Pred = 6, // Directional 153 deg = 180 - 27
82 D207Pred = 7, // Directional 207 deg = 180 + 27
83 D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi)
84 TmPred = 9, // True-motion
85 NearestMv = 10,
86 NearMv = 11,
87 ZeroMv = 12,
88 NewMv = 13,
89 MbModeCount = 14
90};
91
92enum class TxSize {
93 Tx4x4 = 0, // 4x4 transform
94 Tx8x8 = 1, // 8x8 transform
95 Tx16x16 = 2, // 16x16 transform
96 Tx32x32 = 3, // 32x32 transform
97 TxSizes = 4
98};
99
100enum class TxMode {
101 Only4X4 = 0, // Only 4x4 transform used
102 Allow8X8 = 1, // Allow block transform size up to 8x8
103 Allow16X16 = 2, // Allow block transform size up to 16x16
104 Allow32X32 = 3, // Allow block transform size up to 32x32
105 TxModeSelect = 4, // Transform specified for each block
106 TxModes = 5
107};
108
109enum class reference_mode {
110 SingleReference = 0,
111 CompoundReference = 1,
112 ReferenceModeSelect = 2,
113 ReferenceModes = 3
114};
115
116struct Segmentation {
117 u8 enabled{};
118 u8 update_map{};
119 u8 temporal_update{};
120 u8 abs_delta{};
121 std::array<u32, 8> feature_mask{};
122 std::array<std::array<s16, 4>, 8> feature_data{};
123};
124static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
125
126struct LoopFilter {
127 u8 mode_ref_delta_enabled{};
128 std::array<s8, 4> ref_deltas{};
129 std::array<s8, 2> mode_deltas{};
130};
131static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
132
133struct Vp9EntropyProbs {
134 std::array<u8, 36> y_mode_prob{};
135 std::array<u8, 64> partition_prob{};
136 std::array<u8, 2304> coef_probs{};
137 std::array<u8, 8> switchable_interp_prob{};
138 std::array<u8, 28> inter_mode_prob{};
139 std::array<u8, 4> intra_inter_prob{};
140 std::array<u8, 5> comp_inter_prob{};
141 std::array<u8, 10> single_ref_prob{};
142 std::array<u8, 5> comp_ref_prob{};
143 std::array<u8, 6> tx_32x32_prob{};
144 std::array<u8, 4> tx_16x16_prob{};
145 std::array<u8, 2> tx_8x8_prob{};
146 std::array<u8, 3> skip_probs{};
147 std::array<u8, 3> joints{};
148 std::array<u8, 2> sign{};
149 std::array<u8, 20> classes{};
150 std::array<u8, 2> class_0{};
151 std::array<u8, 20> prob_bits{};
152 std::array<u8, 12> class_0_fr{};
153 std::array<u8, 6> fr{};
154 std::array<u8, 2> class_0_hp{};
155 std::array<u8, 2> high_precision{};
156};
157static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size");
158
159struct Vp9PictureInfo {
160 bool is_key_frame{};
161 bool intra_only{};
162 bool last_frame_was_key{};
163 bool frame_size_changed{};
164 bool error_resilient_mode{};
165 bool last_frame_shown{};
166 bool show_frame{};
167 std::array<s8, 4> ref_frame_sign_bias{};
168 s32 base_q_index{};
169 s32 y_dc_delta_q{};
170 s32 uv_dc_delta_q{};
171 s32 uv_ac_delta_q{};
172 bool lossless{};
173 s32 transform_mode{};
174 bool allow_high_precision_mv{};
175 s32 interp_filter{};
176 s32 reference_mode{};
177 s8 comp_fixed_ref{};
178 std::array<s8, 2> comp_var_ref{};
179 s32 log2_tile_cols{};
180 s32 log2_tile_rows{};
181 bool segment_enabled{};
182 bool segment_map_update{};
183 bool segment_map_temporal_update{};
184 s32 segment_abs_delta{};
185 std::array<u32, 8> segment_feature_enable{};
186 std::array<std::array<s16, 4>, 8> segment_feature_data{};
187 bool mode_ref_delta_enabled{};
188 bool use_prev_in_find_mv_refs{};
189 std::array<s8, 4> ref_deltas{};
190 std::array<s8, 2> mode_deltas{};
191 Vp9EntropyProbs entropy{};
192 Vp9FrameDimensions frame_size{};
193 u8 first_level{};
194 u8 sharpness_level{};
195 u32 bitstream_size{};
196 std::array<u64, 4> frame_offsets{};
197 std::array<bool, 4> refresh_frame{};
198};
199
200struct Vp9FrameContainer {
201 Vp9PictureInfo info{};
202 std::vector<u8> bit_stream;
203};
204
205struct PictureInfo {
206 INSERT_PADDING_WORDS(12);
207 u32 bitstream_size{};
208 INSERT_PADDING_WORDS(5);
209 Vp9FrameDimensions last_frame_size{};
210 Vp9FrameDimensions golden_frame_size{};
211 Vp9FrameDimensions alt_frame_size{};
212 Vp9FrameDimensions current_frame_size{};
213 u32 vp9_flags{};
214 std::array<s8, 4> ref_frame_sign_bias{};
215 u8 first_level{};
216 u8 sharpness_level{};
217 u8 base_q_index{};
218 u8 y_dc_delta_q{};
219 u8 uv_ac_delta_q{};
220 u8 uv_dc_delta_q{};
221 u8 lossless{};
222 u8 tx_mode{};
223 u8 allow_high_precision_mv{};
224 u8 interp_filter{};
225 u8 reference_mode{};
226 s8 comp_fixed_ref{};
227 std::array<s8, 2> comp_var_ref{};
228 u8 log2_tile_cols{};
229 u8 log2_tile_rows{};
230 Segmentation segmentation{};
231 LoopFilter loop_filter{};
232 INSERT_PADDING_BYTES(5);
233 u32 surface_params{};
234 INSERT_PADDING_WORDS(3);
235
236 Vp9PictureInfo Convert() const {
237
238 return Vp9PictureInfo{
239 .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
240 .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
241 .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
242 .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
243 .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
244 .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
245 .ref_frame_sign_bias = ref_frame_sign_bias,
246 .base_q_index = base_q_index,
247 .y_dc_delta_q = y_dc_delta_q,
248 .uv_dc_delta_q = uv_dc_delta_q,
249 .uv_ac_delta_q = uv_ac_delta_q,
250 .lossless = lossless != 0,
251 .transform_mode = tx_mode,
252 .allow_high_precision_mv = allow_high_precision_mv != 0,
253 .interp_filter = interp_filter,
254 .reference_mode = reference_mode,
255 .comp_fixed_ref = comp_fixed_ref,
256 .comp_var_ref = comp_var_ref,
257 .log2_tile_cols = log2_tile_cols,
258 .log2_tile_rows = log2_tile_rows,
259 .segment_enabled = segmentation.enabled != 0,
260 .segment_map_update = segmentation.update_map != 0,
261 .segment_map_temporal_update = segmentation.temporal_update != 0,
262 .segment_abs_delta = segmentation.abs_delta,
263 .segment_feature_enable = segmentation.feature_mask,
264 .segment_feature_data = segmentation.feature_data,
265 .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
266 .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
267 !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
268 !(vp9_flags == (FrameFlags::IntraOnly)) &&
269 (vp9_flags == (FrameFlags::LastShowFrame)) &&
270 !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
271 .ref_deltas = loop_filter.ref_deltas,
272 .mode_deltas = loop_filter.mode_deltas,
273 .frame_size = current_frame_size,
274 .first_level = first_level,
275 .sharpness_level = sharpness_level,
276 .bitstream_size = bitstream_size,
277 };
278 }
279};
280static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
281
282struct EntropyProbs {
283 INSERT_PADDING_BYTES(1024);
284 std::array<std::array<u8, 4>, 7> inter_mode_prob{};
285 std::array<u8, 4> intra_inter_prob{};
286 INSERT_PADDING_BYTES(80);
287 std::array<std::array<u8, 1>, 2> tx_8x8_prob{};
288 std::array<std::array<u8, 2>, 2> tx_16x16_prob{};
289 std::array<std::array<u8, 3>, 2> tx_32x32_prob{};
290 std::array<u8, 4> y_mode_prob_e8{};
291 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
292 INSERT_PADDING_BYTES(64);
293 std::array<std::array<u8, 4>, 16> partition_prob{};
294 INSERT_PADDING_BYTES(10);
295 std::array<std::array<u8, 2>, 4> switchable_interp_prob{};
296 std::array<u8, 5> comp_inter_prob{};
297 std::array<u8, 4> skip_probs{};
298 std::array<u8, 3> joints{};
299 std::array<u8, 2> sign{};
300 std::array<std::array<u8, 1>, 2> class_0{};
301 std::array<std::array<u8, 3>, 2> fr{};
302 std::array<u8, 2> class_0_hp{};
303 std::array<u8, 2> high_precision{};
304 std::array<std::array<u8, 10>, 2> classes{};
305 std::array<std::array<std::array<u8, 3>, 2>, 2> class_0_fr{};
306 std::array<std::array<u8, 10>, 2> pred_bits{};
307 std::array<std::array<u8, 2>, 5> single_ref_prob{};
308 std::array<u8, 5> comp_ref_prob{};
309 INSERT_PADDING_BYTES(17);
310 std::array<std::array<std::array<std::array<std::array<std::array<u8, 4>, 6>, 6>, 2>, 2>, 4>
311 coef_probs{};
312
313 void Convert(Vp9EntropyProbs& fc) {
314 std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size());
315
316 std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(),
317 fc.intra_inter_prob.size());
318
319 std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size());
320 std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size());
321 std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size());
322
323 for (s32 i = 0; i < 4; i++) {
324 for (s32 j = 0; j < 9; j++) {
325 fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
326 }
327 }
328
329 std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size());
330
331 std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(),
332 fc.switchable_interp_prob.size());
333 std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size());
334 std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size());
335
336 std::memcpy(fc.joints.data(), joints.data(), fc.joints.size());
337
338 std::memcpy(fc.sign.data(), sign.data(), fc.sign.size());
339 std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size());
340 std::memcpy(fc.fr.data(), fr.data(), fc.fr.size());
341 std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size());
342 std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size());
343 std::memcpy(fc.classes.data(), classes.data(), fc.classes.size());
344 std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size());
345 std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size());
346 std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size());
347 std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size());
348
349 std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size());
350 }
351};
352static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
353
354enum class Ref { Last, Golden, AltRef };
355
356struct RefPoolElement {
357 s64 frame{};
358 Ref ref{};
359 bool refresh{};
360};
361
362struct FrameContexts {
363 s64 from{};
364 bool adapted{};
365 Vp9EntropyProbs probs{};
366};
367
368}; // namespace Decoder
369}; // namespace Tegra
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
new file mode 100644
index 000000000..a5234ee47
--- /dev/null
+++ b/src/video_core/command_classes/host1x.cpp
@@ -0,0 +1,39 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/command_classes/host1x.h"
7#include "video_core/gpu.h"
8
9Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
10
11Tegra::Host1x::~Host1x() = default;
12
13void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) {
14 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32);
15 std::memcpy(state_offset, &arguments, sizeof(u32));
16}
17
18void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments) {
19 StateWrite(static_cast<u32>(method), arguments[0]);
20 switch (method) {
21 case Method::WaitSyncpt:
22 Execute(arguments[0]);
23 break;
24 case Method::LoadSyncptPayload32:
25 syncpoint_value = arguments[0];
26 break;
27 case Method::WaitSyncpt32:
28 Execute(arguments[0]);
29 break;
30 default:
31 UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
32 break;
33 }
34}
35
36void Tegra::Host1x::Execute(u32 data) {
37 // This method waits on a valid syncpoint.
38 // TODO: Implement when proper Async is in place
39}
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
new file mode 100644
index 000000000..501a5ed2e
--- /dev/null
+++ b/src/video_core/command_classes/host1x.h
@@ -0,0 +1,78 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra {
12class GPU;
13class Nvdec;
14
15class Host1x {
16public:
17 struct Host1xClassRegisters {
18 u32 incr_syncpt{};
19 u32 incr_syncpt_ctrl{};
20 u32 incr_syncpt_error{};
21 INSERT_PADDING_WORDS(5);
22 u32 wait_syncpt{};
23 u32 wait_syncpt_base{};
24 u32 wait_syncpt_incr{};
25 u32 load_syncpt_base{};
26 u32 incr_syncpt_base{};
27 u32 clear{};
28 u32 wait{};
29 u32 wait_with_interrupt{};
30 u32 delay_use{};
31 u32 tick_count_high{};
32 u32 tick_count_low{};
33 u32 tick_ctrl{};
34 INSERT_PADDING_WORDS(23);
35 u32 ind_ctrl{};
36 u32 ind_off2{};
37 u32 ind_off{};
38 std::array<u32, 31> ind_data{};
39 INSERT_PADDING_WORDS(1);
40 u32 load_syncpoint_payload32{};
41 u32 stall_ctrl{};
42 u32 wait_syncpt32{};
43 u32 wait_syncpt_base32{};
44 u32 load_syncpt_base32{};
45 u32 incr_syncpt_base32{};
46 u32 stall_count_high{};
47 u32 stall_count_low{};
48 u32 xref_ctrl{};
49 u32 channel_xref_high{};
50 u32 channel_xref_low{};
51 };
52 static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size");
53
54 enum class Method : u32 {
55 WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4,
56 LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4,
57 WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4,
58 };
59
60 explicit Host1x(GPU& gpu);
61 ~Host1x();
62
63 /// Writes the method into the state, Invoke Execute() if encountered
64 void ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments);
65
66private:
67 /// For Host1x, execute is waiting on a syncpoint previously written into the state
68 void Execute(u32 data);
69
70 /// Write argument into the provided offset
71 void StateWrite(u32 offset, u32 arguments);
72
73 u32 syncpoint_value{};
74 Host1xClassRegisters state{};
75 GPU& gpu;
76};
77
78} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
new file mode 100644
index 000000000..ede9466eb
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -0,0 +1,56 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <bitset>
6#include "common/assert.h"
7#include "common/bit_util.h"
8#include "core/memory.h"
9#include "video_core/command_classes/nvdec.h"
10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h"
12
13namespace Tegra {
14
15Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
16
17Nvdec::~Nvdec() = default;
18
19void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments) {
20 if (method == Method::SetVideoCodec) {
21 codec->StateWrite(static_cast<u32>(method), arguments[0]);
22 } else {
23 codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8);
24 }
25
26 switch (method) {
27 case Method::SetVideoCodec:
28 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0]));
29 break;
30 case Method::Execute:
31 Execute();
32 break;
33 }
34}
35
36AVFrame* Nvdec::GetFrame() {
37 return codec->GetCurrentFrame();
38}
39
40const AVFrame* Nvdec::GetFrame() const {
41 return codec->GetCurrentFrame();
42}
43
44void Nvdec::Execute() {
45 switch (codec->GetCurrentCodec()) {
46 case NvdecCommon::VideoCodec::H264:
47 case NvdecCommon::VideoCodec::Vp9:
48 codec->Decode();
49 break;
50 default:
51 UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
52 break;
53 }
54}
55
56} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
new file mode 100644
index 000000000..c1a9d843e
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.h
@@ -0,0 +1,39 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10#include "video_core/command_classes/codecs/codec.h"
11
12namespace Tegra {
13class GPU;
14
15class Nvdec {
16public:
17 enum class Method : u32 {
18 SetVideoCodec = 0x80,
19 Execute = 0xc0,
20 };
21
22 explicit Nvdec(GPU& gpu);
23 ~Nvdec();
24
25 /// Writes the method into the state, Invoke Execute() if encountered
26 void ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments);
27
28 /// Return most recently decoded frame
29 AVFrame* GetFrame();
30 const AVFrame* GetFrame() const;
31
32private:
33 /// Invoke codec to decode a frame
34 void Execute();
35
36 GPU& gpu;
37 std::unique_ptr<Tegra::Codec> codec;
38};
39} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
new file mode 100644
index 000000000..01b5e086d
--- /dev/null
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9
10namespace Tegra::NvdecCommon {
11
12struct NvdecRegisters {
13 INSERT_PADDING_WORDS(256);
14 u64 set_codec_id{};
15 INSERT_PADDING_WORDS(254);
16 u64 set_platform_id{};
17 u64 picture_info_offset{};
18 u64 frame_bitstream_offset{};
19 u64 frame_number{};
20 u64 h264_slice_data_offsets{};
21 u64 h264_mv_dump_offset{};
22 INSERT_PADDING_WORDS(6);
23 u64 frame_stats_offset{};
24 u64 h264_last_surface_luma_offset{};
25 u64 h264_last_surface_chroma_offset{};
26 std::array<u64, 17> surface_luma_offset{};
27 std::array<u64, 17> surface_chroma_offset{};
28 INSERT_PADDING_WORDS(132);
29 u64 vp9_entropy_probs_offset{};
30 u64 vp9_backward_updates_offset{};
31 u64 vp9_last_frame_segmap_offset{};
32 u64 vp9_curr_frame_segmap_offset{};
33 INSERT_PADDING_WORDS(2);
34 u64 vp9_last_frame_mvs_offset{};
35 u64 vp9_curr_frame_mvs_offset{};
36 INSERT_PADDING_WORDS(2);
37};
38static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
39
40enum class VideoCodec : u32 {
41 None = 0x0,
42 H264 = 0x3,
43 Vp8 = 0x5,
44 H265 = 0x7,
45 Vp9 = 0x9,
46};
47
48} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
new file mode 100644
index 000000000..a0ab44855
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -0,0 +1,60 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#include <algorithm>
22#include "sync_manager.h"
23#include "video_core/gpu.h"
24
25namespace Tegra {
26SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
27SyncptIncrManager::~SyncptIncrManager() = default;
28
29void SyncptIncrManager::Increment(u32 id) {
30 increments.push_back(SyncptIncr{0, id, true});
31 IncrementAllDone();
32}
33
34u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
35 const u32 handle = current_id++;
36 increments.push_back(SyncptIncr{handle, class_id, id});
37 return handle;
38}
39
40void SyncptIncrManager::SignalDone(u32 handle) {
41 auto done_incr = std::find_if(increments.begin(), increments.end(),
42 [handle](SyncptIncr incr) { return incr.id == handle; });
43 if (done_incr != increments.end()) {
44 const SyncptIncr incr = *done_incr;
45 *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true};
46 }
47 IncrementAllDone();
48}
49
50void SyncptIncrManager::IncrementAllDone() {
51 std::size_t done_count = 0;
52 for (; done_count < increments.size(); ++done_count) {
53 if (!increments[done_count].complete) {
54 break;
55 }
56 gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
57 }
58 increments.erase(increments.begin(), increments.begin() + done_count);
59}
60} // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
new file mode 100644
index 000000000..353b67573
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.h
@@ -0,0 +1,64 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#pragma once
22
23#include <mutex>
24#include <vector>
25#include "common/common_types.h"
26
27namespace Tegra {
28class GPU;
29struct SyncptIncr {
30 u32 id;
31 u32 class_id;
32 u32 syncpt_id;
33 bool complete;
34
35 SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false)
36 : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
37};
38
39class SyncptIncrManager {
40public:
41 explicit SyncptIncrManager(GPU& gpu);
42 ~SyncptIncrManager();
43
44 /// Add syncpoint id and increment all
45 void Increment(u32 id);
46
47 /// Returns a handle to increment later
48 u32 IncrementWhenDone(u32 class_id, u32 id);
49
50 /// IncrememntAllDone, including handle
51 void SignalDone(u32 handle);
52
53 /// Increment all sequential pending increments that are already done.
54 void IncrementAllDone();
55
56private:
57 std::vector<SyncptIncr> increments;
58 std::mutex increment_lock;
59 u32 current_id{};
60
61 GPU& gpu;
62};
63
64} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
new file mode 100644
index 000000000..66e15a1a8
--- /dev/null
+++ b/src/video_core/command_classes/vic.cpp
@@ -0,0 +1,180 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include "common/assert.h"
7#include "video_core/command_classes/nvdec.h"
8#include "video_core/command_classes/vic.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h"
12#include "video_core/texture_cache/surface_params.h"
13
14extern "C" {
15#include <libswscale/swscale.h>
16}
17
18namespace Tegra {
19
20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
21 : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {}
22Vic::~Vic() = default;
23
24void Vic::VicStateWrite(u32 offset, u32 arguments) {
25 u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
26 std::memcpy(state_offset, &arguments, sizeof(u32));
27}
28
29void Vic::ProcessMethod(Vic::Method method, const std::vector<u32>& arguments) {
30 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
31 VicStateWrite(static_cast<u32>(method), arguments[0]);
32 const u64 arg = static_cast<u64>(arguments[0]) << 8;
33 switch (method) {
34 case Method::Execute:
35 Execute();
36 break;
37 case Method::SetConfigStructOffset:
38 config_struct_address = arg;
39 break;
40 case Method::SetOutputSurfaceLumaOffset:
41 output_surface_luma_address = arg;
42 break;
43 case Method::SetOutputSurfaceChromaUOffset:
44 output_surface_chroma_u_address = arg;
45 break;
46 case Method::SetOutputSurfaceChromaVOffset:
47 output_surface_chroma_v_address = arg;
48 break;
49 default:
50 break;
51 }
52}
53
54void Vic::Execute() {
55 if (output_surface_luma_address == 0) {
56 LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}",
57 vic_state.output_surface.luma_offset);
58 return;
59 }
60 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
61 const VideoPixelFormat pixel_format =
62 static_cast<VideoPixelFormat>(config.pixel_format.Value());
63 switch (pixel_format) {
64 case VideoPixelFormat::BGRA8:
65 case VideoPixelFormat::RGBA8: {
66 LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
67 const auto* frame = nvdec_processor->GetFrame();
68
69 if (!frame || frame->width == 0 || frame->height == 0) {
70 return;
71 }
72 if (scaler_ctx == nullptr || frame->width != scaler_width ||
73 frame->height != scaler_height) {
74 const AVPixelFormat target_format =
75 (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA;
76
77 sws_freeContext(scaler_ctx);
78 scaler_ctx = nullptr;
79
80 // FFmpeg returns all frames in YUV420, convert it into expected format
81 scaler_ctx =
82 sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width,
83 frame->height, target_format, 0, nullptr, nullptr, nullptr);
84
85 scaler_width = frame->width;
86 scaler_height = frame->height;
87 }
88 // Get Converted frame
89 const std::size_t linear_size = frame->width * frame->height * 4;
90
91 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
92 AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free};
93
94 const int converted_stride{frame->width * 4};
95 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
96
97 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
98 &converted_frame_buf_addr, &converted_stride);
99
100 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
101 if (blk_kind != 0) {
102 // swizzle pitch linear to block linear
103 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
104 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
105 block_height, 0);
106 std::vector<u8> swizzled_data(size);
107 Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4,
108 swizzled_data.data(), converted_frame_buffer.get(),
109 false, block_height, 0, 1);
110
111 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
112 gpu.Maxwell3D().OnMemoryWrite();
113 } else {
114 // send pitch linear frame
115 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
116 linear_size);
117 gpu.Maxwell3D().OnMemoryWrite();
118 }
119 break;
120 }
121 case VideoPixelFormat::Yuv420: {
122 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
123
124 const auto* frame = nvdec_processor->GetFrame();
125
126 if (!frame || frame->width == 0 || frame->height == 0) {
127 return;
128 }
129
130 const std::size_t surface_width = config.surface_width_minus1 + 1;
131 const std::size_t surface_height = config.surface_height_minus1 + 1;
132 const std::size_t half_width = surface_width / 2;
133 const std::size_t half_height = config.surface_height_minus1 / 2;
134 const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
135
136 const auto* luma_ptr = frame->data[0];
137 const auto* chroma_b_ptr = frame->data[1];
138 const auto* chroma_r_ptr = frame->data[2];
139 const auto stride = frame->linesize[0];
140 const auto half_stride = frame->linesize[1];
141
142 std::vector<u8> luma_buffer(aligned_width * surface_height);
143 std::vector<u8> chroma_buffer(aligned_width * half_height);
144
145 // Populate luma buffer
146 for (std::size_t y = 0; y < surface_height - 1; ++y) {
147 std::size_t src = y * stride;
148 std::size_t dst = y * aligned_width;
149
150 std::size_t size = surface_width;
151
152 for (std::size_t offset = 0; offset < size; ++offset) {
153 luma_buffer[dst + offset] = luma_ptr[src + offset];
154 }
155 }
156 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
157 luma_buffer.size());
158
159 // Populate chroma buffer from both channels with interleaving.
160 for (std::size_t y = 0; y < half_height; ++y) {
161 std::size_t src = y * half_stride;
162 std::size_t dst = y * aligned_width;
163
164 for (std::size_t x = 0; x < half_width; ++x) {
165 chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
166 chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x];
167 }
168 }
169 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
170 chroma_buffer.size());
171 gpu.Maxwell3D().OnMemoryWrite();
172 break;
173 }
174 default:
175 UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value());
176 break;
177 }
178}
179
180} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
new file mode 100644
index 000000000..dd0a2aed8
--- /dev/null
+++ b/src/video_core/command_classes/vic.h
@@ -0,0 +1,110 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9#include "common/bit_field.h"
10#include "common/common_types.h"
11
12struct SwsContext;
13
14namespace Tegra {
15class GPU;
16class Nvdec;
17
18struct PlaneOffsets {
19 u32 luma_offset{};
20 u32 chroma_u_offset{};
21 u32 chroma_v_offset{};
22};
23
24struct VicRegisters {
25 INSERT_PADDING_WORDS(64);
26 u32 nop{};
27 INSERT_PADDING_WORDS(15);
28 u32 pm_trigger{};
29 INSERT_PADDING_WORDS(47);
30 u32 set_application_id{};
31 u32 set_watchdog_timer{};
32 INSERT_PADDING_WORDS(17);
33 u32 context_save_area{};
34 u32 context_switch{};
35 INSERT_PADDING_WORDS(43);
36 u32 execute{};
37 INSERT_PADDING_WORDS(63);
38 std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
39 u32 picture_index{};
40 u32 control_params{};
41 u32 config_struct_offset{};
42 u32 filter_struct_offset{};
43 u32 palette_offset{};
44 u32 hist_offset{};
45 u32 context_id{};
46 u32 fce_ucode_size{};
47 PlaneOffsets output_surface{};
48 u32 fce_ucode_offset{};
49 INSERT_PADDING_WORDS(4);
50 std::array<u32, 8> slot_context_id{};
51 INSERT_PADDING_WORDS(16);
52};
53static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
54
55class Vic {
56public:
57 enum class Method : u32 {
58 Execute = 0xc0,
59 SetControlParams = 0x1c1,
60 SetConfigStructOffset = 0x1c2,
61 SetOutputSurfaceLumaOffset = 0x1c8,
62 SetOutputSurfaceChromaUOffset = 0x1c9,
63 SetOutputSurfaceChromaVOffset = 0x1ca
64 };
65
66 explicit Vic(GPU& gpu, std::shared_ptr<Tegra::Nvdec> nvdec_processor);
67 ~Vic();
68
69 /// Write to the device state.
70 void ProcessMethod(Vic::Method method, const std::vector<u32>& arguments);
71
72private:
73 void Execute();
74
75 void VicStateWrite(u32 offset, u32 arguments);
76 VicRegisters vic_state{};
77
78 enum class VideoPixelFormat : u64_le {
79 RGBA8 = 0x1f,
80 BGRA8 = 0x20,
81 Yuv420 = 0x44,
82 };
83
84 union VicConfig {
85 u64_le raw{};
86 BitField<0, 7, u64_le> pixel_format;
87 BitField<7, 2, u64_le> chroma_loc_horiz;
88 BitField<9, 2, u64_le> chroma_loc_vert;
89 BitField<11, 4, u64_le> block_linear_kind;
90 BitField<15, 4, u64_le> block_linear_height_log2;
91 BitField<19, 3, u64_le> reserved0;
92 BitField<22, 10, u64_le> reserved1;
93 BitField<32, 14, u64_le> surface_width_minus1;
94 BitField<46, 14, u64_le> surface_height_minus1;
95 };
96
97 GPU& gpu;
98 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
99
100 GPUVAddr config_struct_address{};
101 GPUVAddr output_surface_luma_address{};
102 GPUVAddr output_surface_chroma_u_address{};
103 GPUVAddr output_surface_chroma_v_address{};
104
105 SwsContext* scaler_ctx{};
106 s32 scaler_width{};
107 s32 scaler_height{};
108};
109
110} // namespace Tegra
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4bb9256e9..171f78183 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -27,9 +27,10 @@ namespace Tegra {
27 27
28MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 28MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
29 29
30GPU::GPU(Core::System& system_, bool is_async_) 30GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
31 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, 31 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
32 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, 32 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
33 cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
33 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, 34 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
34 fermi_2d{std::make_unique<Engines::Fermi2D>()}, 35 fermi_2d{std::make_unique<Engines::Fermi2D>()},
35 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 36 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
@@ -77,10 +78,18 @@ DmaPusher& GPU::DmaPusher() {
77 return *dma_pusher; 78 return *dma_pusher;
78} 79}
79 80
81Tegra::CDmaPusher& GPU::CDmaPusher() {
82 return *cdma_pusher;
83}
84
80const DmaPusher& GPU::DmaPusher() const { 85const DmaPusher& GPU::DmaPusher() const {
81 return *dma_pusher; 86 return *dma_pusher;
82} 87}
83 88
89const Tegra::CDmaPusher& GPU::CDmaPusher() const {
90 return *cdma_pusher;
91}
92
84void GPU::WaitFence(u32 syncpoint_id, u32 value) { 93void GPU::WaitFence(u32 syncpoint_id, u32 value) {
85 // Synced GPU, is always in sync 94 // Synced GPU, is always in sync
86 if (!is_async) { 95 if (!is_async) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 2d15d1c6f..b8c613b11 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -13,6 +13,7 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "core/hle/service/nvdrv/nvdata.h" 14#include "core/hle/service/nvdrv/nvdata.h"
15#include "core/hle/service/nvflinger/buffer_queue.h" 15#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "video_core/cdma_pusher.h"
16#include "video_core/dma_pusher.h" 17#include "video_core/dma_pusher.h"
17 18
18using CacheAddr = std::uintptr_t; 19using CacheAddr = std::uintptr_t;
@@ -157,7 +158,7 @@ public:
157 method_count(method_count) {} 158 method_count(method_count) {}
158 }; 159 };
159 160
160 explicit GPU(Core::System& system, bool is_async); 161 explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
161 virtual ~GPU(); 162 virtual ~GPU();
162 163
163 /// Binds a renderer to the GPU. 164 /// Binds a renderer to the GPU.
@@ -209,6 +210,15 @@ public:
209 /// Returns a reference to the GPU DMA pusher. 210 /// Returns a reference to the GPU DMA pusher.
210 Tegra::DmaPusher& DmaPusher(); 211 Tegra::DmaPusher& DmaPusher();
211 212
213 /// Returns a const reference to the GPU DMA pusher.
214 const Tegra::DmaPusher& DmaPusher() const;
215
216 /// Returns a reference to the GPU CDMA pusher.
217 Tegra::CDmaPusher& CDmaPusher();
218
219 /// Returns a const reference to the GPU CDMA pusher.
220 const Tegra::CDmaPusher& CDmaPusher() const;
221
212 VideoCore::RendererBase& Renderer() { 222 VideoCore::RendererBase& Renderer() {
213 return *renderer; 223 return *renderer;
214 } 224 }
@@ -249,8 +259,9 @@ public:
249 return is_async; 259 return is_async;
250 } 260 }
251 261
252 /// Returns a const reference to the GPU DMA pusher. 262 bool UseNvdec() const {
253 const Tegra::DmaPusher& DmaPusher() const; 263 return use_nvdec;
264 }
254 265
255 struct Regs { 266 struct Regs {
256 static constexpr size_t NUM_REGS = 0x40; 267 static constexpr size_t NUM_REGS = 0x40;
@@ -311,6 +322,9 @@ public:
311 /// Push GPU command entries to be processed 322 /// Push GPU command entries to be processed
312 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; 323 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
313 324
325 /// Push GPU command buffer entries to be processed
326 virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0;
327
314 /// Swap buffers (render frame) 328 /// Swap buffers (render frame)
315 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 329 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
316 330
@@ -349,7 +363,9 @@ protected:
349 Core::System& system; 363 Core::System& system;
350 std::unique_ptr<Tegra::MemoryManager> memory_manager; 364 std::unique_ptr<Tegra::MemoryManager> memory_manager;
351 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 365 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
366 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
352 std::unique_ptr<VideoCore::RendererBase> renderer; 367 std::unique_ptr<VideoCore::RendererBase> renderer;
368 const bool use_nvdec;
353 369
354private: 370private:
355 /// Mapping of command subchannels to their bound engine ids 371 /// Mapping of command subchannels to their bound engine ids
@@ -372,6 +388,7 @@ private:
372 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; 388 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
373 389
374 std::mutex sync_mutex; 390 std::mutex sync_mutex;
391 std::mutex device_mutex;
375 392
376 std::condition_variable sync_cv; 393 std::condition_variable sync_cv;
377 394
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 70a3d5738..a9baaf7ef 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,12 +10,13 @@
10 10
11namespace VideoCommon { 11namespace VideoCommon {
12 12
13GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {} 13GPUAsynch::GPUAsynch(Core::System& system, bool use_nvdec)
14 : GPU{system, true, use_nvdec}, gpu_thread{system} {}
14 15
15GPUAsynch::~GPUAsynch() = default; 16GPUAsynch::~GPUAsynch() = default;
16 17
17void GPUAsynch::Start() { 18void GPUAsynch::Start() {
18 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); 19 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
19 cpu_context = renderer->GetRenderWindow().CreateSharedContext(); 20 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
20 cpu_context->MakeCurrent(); 21 cpu_context->MakeCurrent();
21} 22}
@@ -32,6 +33,27 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
32 gpu_thread.SubmitList(std::move(entries)); 33 gpu_thread.SubmitList(std::move(entries));
33} 34}
34 35
36void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
37 if (!use_nvdec) {
38 return;
39 }
40 // This condition fires when a video stream ends, clear all intermediary data
41 if (entries[0].raw == 0xDEADB33F) {
42 cdma_pusher.reset();
43 return;
44 }
45 if (!cdma_pusher) {
46 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
47 }
48
49 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
50 // TODO(ameerj): RE proper async nvdec operation
51 // gpu_thread.SubmitCommandBuffer(std::move(entries));
52
53 cdma_pusher->Push(std::move(entries));
54 cdma_pusher->DispatchCalls();
55}
56
35void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 57void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
36 gpu_thread.SwapBuffers(framebuffer); 58 gpu_thread.SwapBuffers(framebuffer);
37} 59}
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index f89c855a5..0c0872e73 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -20,13 +20,14 @@ namespace VideoCommon {
20/// Implementation of GPU interface that runs the GPU asynchronously 20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch final : public Tegra::GPU { 21class GPUAsynch final : public Tegra::GPU {
22public: 22public:
23 explicit GPUAsynch(Core::System& system); 23 explicit GPUAsynch(Core::System& system, bool use_nvdec);
24 ~GPUAsynch() override; 24 ~GPUAsynch() override;
25 25
26 void Start() override; 26 void Start() override;
27 void ObtainContext() override; 27 void ObtainContext() override;
28 void ReleaseContext() override; 28 void ReleaseContext() override;
29 void PushGPUEntries(Tegra::CommandList&& entries) override; 29 void PushGPUEntries(Tegra::CommandList&& entries) override;
30 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
30 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 31 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
31 void FlushRegion(VAddr addr, u64 size) override; 32 void FlushRegion(VAddr addr, u64 size) override;
32 void InvalidateRegion(VAddr addr, u64 size) override; 33 void InvalidateRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 1ca47ddef..ecf7bbdf3 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,7 +7,7 @@
7 7
8namespace VideoCommon { 8namespace VideoCommon {
9 9
10GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {} 10GPUSynch::GPUSynch(Core::System& system, bool use_nvdec) : GPU{system, false, use_nvdec} {}
11 11
12GPUSynch::~GPUSynch() = default; 12GPUSynch::~GPUSynch() = default;
13 13
@@ -26,6 +26,22 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
26 dma_pusher->DispatchCalls(); 26 dma_pusher->DispatchCalls();
27} 27}
28 28
29void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
30 if (!use_nvdec) {
31 return;
32 }
33 // This condition fires when a video stream ends, clears all intermediary data
34 if (entries[0].raw == 0xDEADB33F) {
35 cdma_pusher.reset();
36 return;
37 }
38 if (!cdma_pusher) {
39 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
40 }
41 cdma_pusher->Push(std::move(entries));
42 cdma_pusher->DispatchCalls();
43}
44
29void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 45void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
30 renderer->SwapBuffers(framebuffer); 46 renderer->SwapBuffers(framebuffer);
31} 47}
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 297258cb1..9d778c71a 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -19,13 +19,14 @@ namespace VideoCommon {
19/// Implementation of GPU interface that runs the GPU synchronously 19/// Implementation of GPU interface that runs the GPU synchronously
20class GPUSynch final : public Tegra::GPU { 20class GPUSynch final : public Tegra::GPU {
21public: 21public:
22 explicit GPUSynch(Core::System& system); 22 explicit GPUSynch(Core::System& system, bool use_nvdec);
23 ~GPUSynch() override; 23 ~GPUSynch() override;
24 24
25 void Start() override; 25 void Start() override;
26 void ObtainContext() override; 26 void ObtainContext() override;
27 void ReleaseContext() override; 27 void ReleaseContext() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override; 28 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 30 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
30 void FlushRegion(VAddr addr, u64 size) override; 31 void FlushRegion(VAddr addr, u64 size) override;
31 void InvalidateRegion(VAddr addr, u64 size) override; 32 void InvalidateRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index bf761abf2..4b8f58283 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -18,7 +18,7 @@ namespace VideoCommon::GPUThread {
18/// Runs the GPU thread 18/// Runs the GPU thread
19static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, 19static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
20 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, 20 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
21 SynchState& state) { 21 SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
22 std::string name = "yuzu:GPU"; 22 std::string name = "yuzu:GPU";
23 MicroProfileOnThreadCreate(name.c_str()); 23 MicroProfileOnThreadCreate(name.c_str());
24 Common::SetCurrentThreadName(name.c_str()); 24 Common::SetCurrentThreadName(name.c_str());
@@ -42,6 +42,10 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
42 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { 42 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
43 dma_pusher.Push(std::move(submit_list->entries)); 43 dma_pusher.Push(std::move(submit_list->entries));
44 dma_pusher.DispatchCalls(); 44 dma_pusher.DispatchCalls();
45 } else if (const auto command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
46 // NVDEC
47 cdma_pusher.Push(std::move(command_list->entries));
48 cdma_pusher.DispatchCalls();
45 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 49 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
46 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 50 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
47 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { 51 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@@ -75,15 +79,19 @@ ThreadManager::~ThreadManager() {
75 79
76void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 80void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
77 Core::Frontend::GraphicsContext& context, 81 Core::Frontend::GraphicsContext& context,
78 Tegra::DmaPusher& dma_pusher) { 82 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
79 thread = std::thread{RunThread, std::ref(system), std::ref(renderer), 83 thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
80 std::ref(context), std::ref(dma_pusher), std::ref(state)}; 84 std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
81} 85}
82 86
83void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 87void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
84 PushCommand(SubmitListCommand(std::move(entries))); 88 PushCommand(SubmitListCommand(std::move(entries)));
85} 89}
86 90
91void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) {
92 PushCommand(SubmitChCommandEntries(std::move(entries)));
93}
94
87void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 95void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
88 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); 96 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
89} 97}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 5a28335d6..32a34e3a7 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -37,6 +37,14 @@ struct SubmitListCommand final {
37 Tegra::CommandList entries; 37 Tegra::CommandList entries;
38}; 38};
39 39
40/// Command to signal to the GPU thread that a cdma command list is ready for processing
41struct SubmitChCommandEntries final {
42 explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries)
43 : entries{std::move(entries)} {}
44
45 Tegra::ChCommandHeaderList entries;
46};
47
40/// Command to signal to the GPU thread that a swap buffers is pending 48/// Command to signal to the GPU thread that a swap buffers is pending
41struct SwapBuffersCommand final { 49struct SwapBuffersCommand final {
42 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) 50 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
@@ -77,9 +85,9 @@ struct OnCommandListEndCommand final {};
77struct GPUTickCommand final {}; 85struct GPUTickCommand final {};
78 86
79using CommandData = 87using CommandData =
80 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, 88 std::variant<EndProcessingCommand, SubmitListCommand, SubmitChCommandEntries,
81 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, 89 SwapBuffersCommand, FlushRegionCommand, InvalidateRegionCommand,
82 GPUTickCommand>; 90 FlushAndInvalidateRegionCommand, OnCommandListEndCommand, GPUTickCommand>;
83 91
84struct CommandDataContainer { 92struct CommandDataContainer {
85 CommandDataContainer() = default; 93 CommandDataContainer() = default;
@@ -109,11 +117,14 @@ public:
109 117
110 /// Creates and starts the GPU thread. 118 /// Creates and starts the GPU thread.
111 void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, 119 void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
112 Tegra::DmaPusher& dma_pusher); 120 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher);
113 121
114 /// Push GPU command entries to be processed 122 /// Push GPU command entries to be processed
115 void SubmitList(Tegra::CommandList&& entries); 123 void SubmitList(Tegra::CommandList&& entries);
116 124
125 /// Push GPU CDMA command buffer entries to be processed
126 void SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries);
127
117 /// Swap buffers (render frame) 128 /// Swap buffers (render frame)
118 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 129 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
119 130
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 02cf53d15..6e70bd362 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -11,6 +11,7 @@
11#include "video_core/gpu.h" 11#include "video_core/gpu.h"
12#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_base.h"
14 15
15namespace Tegra { 16namespace Tegra {
16 17
@@ -44,6 +45,12 @@ GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_
44 return Map(cpu_addr, *FindFreeRange(size, align), size); 45 return Map(cpu_addr, *FindFreeRange(size, align), size);
45} 46}
46 47
48GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
49 const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
50 ASSERT(gpu_addr);
51 return Map(cpu_addr, *gpu_addr, size);
52}
53
47void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { 54void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
48 if (!size) { 55 if (!size) {
49 return; 56 return;
@@ -108,7 +115,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
108 page_table[PageEntryIndex(gpu_addr)] = page_entry; 115 page_table[PageEntryIndex(gpu_addr)] = page_entry;
109} 116}
110 117
111std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const { 118std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
119 bool start_32bit_address) const {
112 if (!align) { 120 if (!align) {
113 align = page_size; 121 align = page_size;
114 } else { 122 } else {
@@ -116,7 +124,7 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
116 } 124 }
117 125
118 u64 available_size{}; 126 u64 available_size{};
119 GPUVAddr gpu_addr{address_space_start}; 127 GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start};
120 while (gpu_addr + available_size < address_space_size) { 128 while (gpu_addr + available_size < address_space_size) {
121 if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) { 129 if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
122 available_size += page_size; 130 available_size += page_size;
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 53c8d122a..c078193d9 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -116,6 +116,7 @@ public:
116 116
117 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); 117 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
118 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); 118 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
119 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
119 [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); 120 [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
120 [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); 121 [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
121 void Unmap(GPUVAddr gpu_addr, std::size_t size); 122 void Unmap(GPUVAddr gpu_addr, std::size_t size);
@@ -124,7 +125,8 @@ private:
124 [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; 125 [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
125 void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); 126 void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
126 GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); 127 GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
127 [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; 128 [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
129 bool start_32bit_address = false) const;
128 130
129 void TryLockPage(PageEntry page_entry, std::size_t size); 131 void TryLockPage(PageEntry page_entry, std::size_t size);
130 void TryUnlockPage(PageEntry page_entry, std::size_t size); 132 void TryUnlockPage(PageEntry page_entry, std::size_t size);
@@ -135,6 +137,7 @@ private:
135 137
136 static constexpr u64 address_space_size = 1ULL << 40; 138 static constexpr u64 address_space_size = 1ULL << 40;
137 static constexpr u64 address_space_start = 1ULL << 32; 139 static constexpr u64 address_space_start = 1ULL << 32;
140 static constexpr u64 address_space_start_low = 1ULL << 16;
138 static constexpr u64 page_bits{16}; 141 static constexpr u64 page_bits{16};
139 static constexpr u64 page_size{1 << page_bits}; 142 static constexpr u64 page_size{1 << page_bits};
140 static constexpr u64 page_mask{page_size - 1}; 143 static constexpr u64 page_mask{page_size - 1};
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index a14df06a3..dd5cee4a1 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -44,10 +44,11 @@ namespace VideoCore {
44 44
45std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { 45std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
46 std::unique_ptr<Tegra::GPU> gpu; 46 std::unique_ptr<Tegra::GPU> gpu;
47 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
47 if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 48 if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
48 gpu = std::make_unique<VideoCommon::GPUAsynch>(system); 49 gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec);
49 } else { 50 } else {
50 gpu = std::make_unique<VideoCommon::GPUSynch>(system); 51 gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec);
51 } 52 }
52 53
53 auto context = emu_window.CreateSharedContext(); 54 auto context = emu_window.CreateSharedContext();
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index cc0291b15..4659e1f89 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -265,9 +265,11 @@ if (MSVC)
265 include(CopyYuzuQt5Deps) 265 include(CopyYuzuQt5Deps)
266 include(CopyYuzuSDLDeps) 266 include(CopyYuzuSDLDeps)
267 include(CopyYuzuUnicornDeps) 267 include(CopyYuzuUnicornDeps)
268 include(CopyYuzuFFmpegDeps)
268 copy_yuzu_Qt5_deps(yuzu) 269 copy_yuzu_Qt5_deps(yuzu)
269 copy_yuzu_SDL_deps(yuzu) 270 copy_yuzu_SDL_deps(yuzu)
270 copy_yuzu_unicorn_deps(yuzu) 271 copy_yuzu_unicorn_deps(yuzu)
272 copy_yuzu_FFmpeg_deps(yuzu)
271endif() 273endif()
272 274
273if (NOT APPLE) 275if (NOT APPLE)
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index d2913d613..abbc83929 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -717,6 +717,8 @@ void Config::ReadRendererValues() {
717 ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); 717 ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0);
718 ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, 718 ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
719 QStringLiteral("use_asynchronous_gpu_emulation"), false); 719 QStringLiteral("use_asynchronous_gpu_emulation"), false);
720 ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
721 true);
720 ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); 722 ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
721 ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), 723 ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
722 false); 724 false);
@@ -1265,6 +1267,8 @@ void Config::SaveRendererValues() {
1265 Settings::values.gpu_accuracy.UsingGlobal(), 0); 1267 Settings::values.gpu_accuracy.UsingGlobal(), 0);
1266 WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), 1268 WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
1267 Settings::values.use_asynchronous_gpu_emulation, false); 1269 Settings::values.use_asynchronous_gpu_emulation, false);
1270 WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
1271 true);
1268 WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); 1272 WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
1269 WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), 1273 WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
1270 Settings::values.use_assembly_shaders, false); 1274 Settings::values.use_assembly_shaders, false);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 07d818548..4f083ecda 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -70,9 +70,11 @@ void ConfigureGraphics::SetConfiguration() {
70 ui->api->setEnabled(runtime_lock); 70 ui->api->setEnabled(runtime_lock);
71 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); 71 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
72 ui->use_disk_shader_cache->setEnabled(runtime_lock); 72 ui->use_disk_shader_cache->setEnabled(runtime_lock);
73 ui->use_nvdec_emulation->setEnabled(runtime_lock);
73 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); 74 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
74 ui->use_asynchronous_gpu_emulation->setChecked( 75 ui->use_asynchronous_gpu_emulation->setChecked(
75 Settings::values.use_asynchronous_gpu_emulation.GetValue()); 76 Settings::values.use_asynchronous_gpu_emulation.GetValue());
77 ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
76 78
77 if (Settings::configuring_global) { 79 if (Settings::configuring_global) {
78 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); 80 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
@@ -116,6 +118,9 @@ void ConfigureGraphics::ApplyConfiguration() {
116 Settings::values.use_asynchronous_gpu_emulation.SetValue( 118 Settings::values.use_asynchronous_gpu_emulation.SetValue(
117 ui->use_asynchronous_gpu_emulation->isChecked()); 119 ui->use_asynchronous_gpu_emulation->isChecked());
118 } 120 }
121 if (Settings::values.use_nvdec_emulation.UsingGlobal()) {
122 Settings::values.use_nvdec_emulation.SetValue(ui->use_nvdec_emulation->isChecked());
123 }
119 if (Settings::values.bg_red.UsingGlobal()) { 124 if (Settings::values.bg_red.UsingGlobal()) {
120 Settings::values.bg_red.SetValue(static_cast<float>(bg_color.redF())); 125 Settings::values.bg_red.SetValue(static_cast<float>(bg_color.redF()));
121 Settings::values.bg_green.SetValue(static_cast<float>(bg_color.greenF())); 126 Settings::values.bg_green.SetValue(static_cast<float>(bg_color.greenF()));
@@ -144,6 +149,8 @@ void ConfigureGraphics::ApplyConfiguration() {
144 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, 149 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation,
145 ui->use_asynchronous_gpu_emulation, 150 ui->use_asynchronous_gpu_emulation,
146 use_asynchronous_gpu_emulation); 151 use_asynchronous_gpu_emulation);
152 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
153 ui->use_nvdec_emulation, use_nvdec_emulation);
147 154
148 if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { 155 if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
149 Settings::values.bg_red.SetGlobal(true); 156 Settings::values.bg_red.SetGlobal(true);
@@ -240,6 +247,7 @@ void ConfigureGraphics::SetupPerGameUI() {
240 ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); 247 ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal());
241 ui->use_asynchronous_gpu_emulation->setEnabled( 248 ui->use_asynchronous_gpu_emulation->setEnabled(
242 Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); 249 Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
250 ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
243 ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); 251 ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
244 ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); 252 ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
245 253
@@ -253,6 +261,8 @@ void ConfigureGraphics::SetupPerGameUI() {
253 261
254 ConfigurationShared::SetColoredTristate( 262 ConfigurationShared::SetColoredTristate(
255 ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); 263 ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
264 ConfigurationShared::SetColoredTristate(
265 ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
256 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, 266 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
257 Settings::values.use_asynchronous_gpu_emulation, 267 Settings::values.use_asynchronous_gpu_emulation,
258 use_asynchronous_gpu_emulation); 268 use_asynchronous_gpu_emulation);
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index b4961f719..1fefc88eb 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -46,6 +46,7 @@ private:
46 std::unique_ptr<Ui::ConfigureGraphics> ui; 46 std::unique_ptr<Ui::ConfigureGraphics> ui;
47 QColor bg_color; 47 QColor bg_color;
48 48
49 ConfigurationShared::CheckState use_nvdec_emulation;
49 ConfigurationShared::CheckState use_disk_shader_cache; 50 ConfigurationShared::CheckState use_disk_shader_cache;
50 ConfigurationShared::CheckState use_asynchronous_gpu_emulation; 51 ConfigurationShared::CheckState use_asynchronous_gpu_emulation;
51 52
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 62aa337e7..58486eb1e 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -98,6 +98,13 @@
98 </widget> 98 </widget>
99 </item> 99 </item>
100 <item> 100 <item>
101 <widget class="QCheckBox" name="use_nvdec_emulation">
102 <property name="text">
103 <string>Use NVDEC emulation</string>
104 </property>
105 </widget>
106 </item>
107 <item>
101 <widget class="QWidget" name="aspect_ratio_layout" native="true"> 108 <widget class="QWidget" name="aspect_ratio_layout" native="true">
102 <layout class="QHBoxLayout" name="horizontalLayout_6"> 109 <layout class="QHBoxLayout" name="horizontalLayout_6">
103 <property name="leftMargin"> 110 <property name="leftMargin">