summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/kernel/errors.h2
-rw-r--r--src/core/hle/kernel/svc.cpp91
-rw-r--r--src/core/hle/kernel/thread.cpp5
-rw-r--r--src/core/hle/service/audio/hwopus.cpp37
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp7
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp72
-rw-r--r--src/video_core/engines/shader_bytecode.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp82
8 files changed, 298 insertions, 34 deletions
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index e5fa67ae8..885259618 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -22,6 +22,7 @@ enum {
22 HandleTableFull = 105, 22 HandleTableFull = 105,
23 InvalidMemoryState = 106, 23 InvalidMemoryState = 106,
24 InvalidMemoryPermissions = 108, 24 InvalidMemoryPermissions = 108,
25 InvalidMemoryRange = 110,
25 InvalidThreadPriority = 112, 26 InvalidThreadPriority = 112,
26 InvalidProcessorId = 113, 27 InvalidProcessorId = 113,
27 InvalidHandle = 114, 28 InvalidHandle = 114,
@@ -56,6 +57,7 @@ constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidA
56constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState); 57constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState);
57constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel, 58constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel,
58 ErrCodes::InvalidMemoryPermissions); 59 ErrCodes::InvalidMemoryPermissions);
60constexpr ResultCode ERR_INVALID_MEMORY_RANGE(ErrorModule::Kernel, ErrCodes::InvalidMemoryRange);
59constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle); 61constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
60constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId); 62constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
61constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize); 63constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 3afcce3fe..3e4dd61dc 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -39,6 +39,73 @@ namespace {
39constexpr bool Is4KBAligned(VAddr address) { 39constexpr bool Is4KBAligned(VAddr address) {
40 return (address & 0xFFF) == 0; 40 return (address & 0xFFF) == 0;
41} 41}
42
43// Checks if address + size is greater than the given address
44// This can return false if the size causes an overflow of a 64-bit type
45// or if the given size is zero.
46constexpr bool IsValidAddressRange(VAddr address, u64 size) {
47 return address + size > address;
48}
49
50// Checks if a given address range lies within a larger address range.
51constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
52 VAddr address_range_end) {
53 const VAddr end_address = address + size - 1;
54 return address_range_begin <= address && end_address <= address_range_end - 1;
55}
56
57bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
58 return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
59 vm.GetAddressSpaceEndAddress());
60}
61
62bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
63 return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
64 vm.GetNewMapRegionEndAddress());
65}
66
67// Helper function that performs the common sanity checks for svcMapMemory
68// and svcUnmapMemory. This is doable, as both functions perform their sanitizing
69// in the same order.
70ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_addr, VAddr src_addr,
71 u64 size) {
72 if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
73 return ERR_INVALID_ADDRESS;
74 }
75
76 if (size == 0 || !Is4KBAligned(size)) {
77 return ERR_INVALID_SIZE;
78 }
79
80 if (!IsValidAddressRange(dst_addr, size)) {
81 return ERR_INVALID_ADDRESS_STATE;
82 }
83
84 if (!IsValidAddressRange(src_addr, size)) {
85 return ERR_INVALID_ADDRESS_STATE;
86 }
87
88 if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
89 return ERR_INVALID_ADDRESS_STATE;
90 }
91
92 if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
93 return ERR_INVALID_MEMORY_RANGE;
94 }
95
96 const VAddr dst_end_address = dst_addr + size;
97 if (dst_end_address > vm_manager.GetHeapRegionBaseAddress() &&
98 dst_addr < vm_manager.GetHeapRegionEndAddress()) {
99 return ERR_INVALID_MEMORY_RANGE;
100 }
101
102 if (dst_end_address > vm_manager.GetNewMapRegionBaseAddress() &&
103 dst_addr < vm_manager.GetMapRegionEndAddress()) {
104 return ERR_INVALID_MEMORY_RANGE;
105 }
106
107 return RESULT_SUCCESS;
108}
42} // Anonymous namespace 109} // Anonymous namespace
43 110
44/// Set the process heap to a given Size. It can both extend and shrink the heap. 111/// Set the process heap to a given Size. It can both extend and shrink the heap.
@@ -69,15 +136,15 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
69 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, 136 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
70 src_addr, size); 137 src_addr, size);
71 138
72 if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) { 139 auto* const current_process = Core::CurrentProcess();
73 return ERR_INVALID_ADDRESS; 140 const auto& vm_manager = current_process->VMManager();
74 }
75 141
76 if (size == 0 || !Is4KBAligned(size)) { 142 const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
77 return ERR_INVALID_SIZE; 143 if (result != RESULT_SUCCESS) {
144 return result;
78 } 145 }
79 146
80 return Core::CurrentProcess()->MirrorMemory(dst_addr, src_addr, size); 147 return current_process->MirrorMemory(dst_addr, src_addr, size);
81} 148}
82 149
83/// Unmaps a region that was previously mapped with svcMapMemory 150/// Unmaps a region that was previously mapped with svcMapMemory
@@ -85,15 +152,15 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
85 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, 152 LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
86 src_addr, size); 153 src_addr, size);
87 154
88 if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) { 155 auto* const current_process = Core::CurrentProcess();
89 return ERR_INVALID_ADDRESS; 156 const auto& vm_manager = current_process->VMManager();
90 }
91 157
92 if (size == 0 || !Is4KBAligned(size)) { 158 const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
93 return ERR_INVALID_SIZE; 159 if (result != RESULT_SUCCESS) {
160 return result;
94 } 161 }
95 162
96 return Core::CurrentProcess()->UnmapMemory(dst_addr, src_addr, size); 163 return current_process->UnmapMemory(dst_addr, src_addr, size);
97} 164}
98 165
99/// Connect to an OS service given the port name, returns the handle to the port to out 166/// Connect to an OS service given the port name, returns the handle to the port to out
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 33aed8c23..352ce1725 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -183,13 +183,10 @@ void Thread::ResumeFromWait() {
183 */ 183 */
184static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top, 184static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top,
185 VAddr entry_point, u64 arg) { 185 VAddr entry_point, u64 arg) {
186 memset(&context, 0, sizeof(Core::ARM_Interface::ThreadContext)); 186 context = {};
187
188 context.cpu_registers[0] = arg; 187 context.cpu_registers[0] = arg;
189 context.pc = entry_point; 188 context.pc = entry_point;
190 context.sp = stack_top; 189 context.sp = stack_top;
191 context.pstate = 0;
192 context.fpcr = 0;
193} 190}
194 191
195ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point, 192ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point,
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index fc6067e59..7168c6a10 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -2,8 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <chrono>
5#include <cstring> 6#include <cstring>
6#include <memory> 7#include <memory>
8#include <optional>
7#include <vector> 9#include <vector>
8 10
9#include <opus.h> 11#include <opus.h>
@@ -33,7 +35,8 @@ public:
33 {1, nullptr, "SetContext"}, 35 {1, nullptr, "SetContext"},
34 {2, nullptr, "DecodeInterleavedForMultiStream"}, 36 {2, nullptr, "DecodeInterleavedForMultiStream"},
35 {3, nullptr, "SetContextForMultiStream"}, 37 {3, nullptr, "SetContextForMultiStream"},
36 {4, nullptr, "Unknown4"}, 38 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance,
39 "DecodeInterleavedWithPerformance"},
37 {5, nullptr, "Unknown5"}, 40 {5, nullptr, "Unknown5"},
38 {6, nullptr, "Unknown6"}, 41 {6, nullptr, "Unknown6"},
39 {7, nullptr, "Unknown7"}, 42 {7, nullptr, "Unknown7"},
@@ -59,8 +62,31 @@ private:
59 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); 62 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
60 } 63 }
61 64
62 bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, 65 void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) {
63 std::vector<opus_int16>& output) { 66 u32 consumed = 0;
67 u32 sample_count = 0;
68 u64 performance = 0;
69 std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
70 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
71 performance)) {
72 IPC::ResponseBuilder rb{ctx, 2};
73 // TODO(ogniK): Use correct error code
74 rb.Push(ResultCode(-1));
75 return;
76 }
77 IPC::ResponseBuilder rb{ctx, 6};
78 rb.Push(RESULT_SUCCESS);
79 rb.Push<u32>(consumed);
80 rb.Push<u64>(performance);
81 rb.Push<u32>(sample_count);
82 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
83 }
84
85 bool Decoder_DecodeInterleaved(
86 u32& consumed, u32& sample_count, const std::vector<u8>& input,
87 std::vector<opus_int16>& output,
88 std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) {
89 const auto start_time = std::chrono::high_resolution_clock::now();
64 std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 90 std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
65 if (sizeof(OpusHeader) > input.size()) 91 if (sizeof(OpusHeader) > input.size())
66 return false; 92 return false;
@@ -80,8 +106,13 @@ private:
80 (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)), 0); 106 (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)), 0);
81 if (out_sample_count < 0) 107 if (out_sample_count < 0)
82 return false; 108 return false;
109 const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
83 sample_count = out_sample_count; 110 sample_count = out_sample_count;
84 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); 111 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
112 if (performance_time.has_value()) {
113 performance_time->get() =
114 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
115 }
85 return true; 116 return true;
86 } 117 }
87 118
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 884837b17..c41ef7058 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -174,10 +174,11 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
174 auto& system_instance = Core::System::GetInstance(); 174 auto& system_instance = Core::System::GetInstance();
175 175
176 // Remove this memory region from the rasterizer cache. 176 // Remove this memory region from the rasterizer cache.
177 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(params.offset,
178 itr->second.size);
179
180 auto& gpu = system_instance.GPU(); 177 auto& gpu = system_instance.GPU();
178 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
179 ASSERT(cpu_addr);
180 system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
181
181 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); 182 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
182 183
183 buffer_mappings.erase(itr->second.offset); 184 buffer_mappings.erase(itr->second.offset);
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index a2287cc1b..43651d8a6 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -11,6 +11,13 @@
11 11
12namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
13 13
14namespace NvErrCodes {
15enum {
16 OperationNotPermitted = -1,
17 InvalidValue = -22,
18};
19}
20
14nvmap::nvmap() = default; 21nvmap::nvmap() = default;
15nvmap::~nvmap() = default; 22nvmap::~nvmap() = default;
16 23
@@ -44,7 +51,11 @@ u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& o
44u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) { 51u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
45 IocCreateParams params; 52 IocCreateParams params;
46 std::memcpy(&params, input.data(), sizeof(params)); 53 std::memcpy(&params, input.data(), sizeof(params));
54 LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);
47 55
56 if (!params.size) {
57 return static_cast<u32>(NvErrCodes::InvalidValue);
58 }
48 // Create a new nvmap object and obtain a handle to it. 59 // Create a new nvmap object and obtain a handle to it.
49 auto object = std::make_shared<Object>(); 60 auto object = std::make_shared<Object>();
50 object->id = next_id++; 61 object->id = next_id++;
@@ -55,8 +66,6 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
55 u32 handle = next_handle++; 66 u32 handle = next_handle++;
56 handles[handle] = std::move(object); 67 handles[handle] = std::move(object);
57 68
58 LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);
59
60 params.handle = handle; 69 params.handle = handle;
61 70
62 std::memcpy(output.data(), &params, sizeof(params)); 71 std::memcpy(output.data(), &params, sizeof(params));
@@ -66,9 +75,29 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
66u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) { 75u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
67 IocAllocParams params; 76 IocAllocParams params;
68 std::memcpy(&params, input.data(), sizeof(params)); 77 std::memcpy(&params, input.data(), sizeof(params));
78 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
79
80 if (!params.handle) {
81 return static_cast<u32>(NvErrCodes::InvalidValue);
82 }
83
84 if ((params.align - 1) & params.align) {
85 return static_cast<u32>(NvErrCodes::InvalidValue);
86 }
87
88 const u32 min_alignment = 0x1000;
89 if (params.align < min_alignment) {
90 params.align = min_alignment;
91 }
69 92
70 auto object = GetObject(params.handle); 93 auto object = GetObject(params.handle);
71 ASSERT(object); 94 if (!object) {
95 return static_cast<u32>(NvErrCodes::InvalidValue);
96 }
97
98 if (object->status == Object::Status::Allocated) {
99 return static_cast<u32>(NvErrCodes::OperationNotPermitted);
100 }
72 101
73 object->flags = params.flags; 102 object->flags = params.flags;
74 object->align = params.align; 103 object->align = params.align;
@@ -76,8 +105,6 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
76 object->addr = params.addr; 105 object->addr = params.addr;
77 object->status = Object::Status::Allocated; 106 object->status = Object::Status::Allocated;
78 107
79 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
80
81 std::memcpy(output.data(), &params, sizeof(params)); 108 std::memcpy(output.data(), &params, sizeof(params));
82 return 0; 109 return 0;
83} 110}
@@ -88,8 +115,14 @@ u32 nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {
88 115
89 LOG_WARNING(Service_NVDRV, "called"); 116 LOG_WARNING(Service_NVDRV, "called");
90 117
118 if (!params.handle) {
119 return static_cast<u32>(NvErrCodes::InvalidValue);
120 }
121
91 auto object = GetObject(params.handle); 122 auto object = GetObject(params.handle);
92 ASSERT(object); 123 if (!object) {
124 return static_cast<u32>(NvErrCodes::OperationNotPermitted);
125 }
93 126
94 params.id = object->id; 127 params.id = object->id;
95 128
@@ -105,7 +138,14 @@ u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {
105 138
106 auto itr = std::find_if(handles.begin(), handles.end(), 139 auto itr = std::find_if(handles.begin(), handles.end(),
107 [&](const auto& entry) { return entry.second->id == params.id; }); 140 [&](const auto& entry) { return entry.second->id == params.id; });
108 ASSERT(itr != handles.end()); 141 if (itr == handles.end()) {
142 return static_cast<u32>(NvErrCodes::InvalidValue);
143 }
144
145 auto& object = itr->second;
146 if (object->status != Object::Status::Allocated) {
147 return static_cast<u32>(NvErrCodes::InvalidValue);
148 }
109 149
110 itr->second->refcount++; 150 itr->second->refcount++;
111 151
@@ -125,8 +165,13 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
125 LOG_WARNING(Service_NVDRV, "(STUBBED) called type={}", params.param); 165 LOG_WARNING(Service_NVDRV, "(STUBBED) called type={}", params.param);
126 166
127 auto object = GetObject(params.handle); 167 auto object = GetObject(params.handle);
128 ASSERT(object); 168 if (!object) {
129 ASSERT(object->status == Object::Status::Allocated); 169 return static_cast<u32>(NvErrCodes::InvalidValue);
170 }
171
172 if (object->status != Object::Status::Allocated) {
173 return static_cast<u32>(NvErrCodes::OperationNotPermitted);
174 }
130 175
131 switch (static_cast<ParamTypes>(params.param)) { 176 switch (static_cast<ParamTypes>(params.param)) {
132 case ParamTypes::Size: 177 case ParamTypes::Size:
@@ -163,9 +208,12 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
163 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 208 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
164 209
165 auto itr = handles.find(params.handle); 210 auto itr = handles.find(params.handle);
166 ASSERT(itr != handles.end()); 211 if (itr == handles.end()) {
167 212 return static_cast<u32>(NvErrCodes::InvalidValue);
168 ASSERT(itr->second->refcount > 0); 213 }
214 if (!itr->second->refcount) {
215 return static_cast<u32>(NvErrCodes::InvalidValue);
216 }
169 217
170 itr->second->refcount--; 218 itr->second->refcount--;
171 219
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 550ab1148..9a59b65b3 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 {
214 XHi = 3, 214 XHi = 3,
215}; 215};
216 216
217enum class VmadType : u64 {
218 Size16_Low = 0,
219 Size16_High = 1,
220 Size32 = 2,
221 Invalid = 3,
222};
223
224enum class VmadShr : u64 {
225 Shr7 = 1,
226 Shr15 = 2,
227};
228
217enum class XmadMode : u64 { 229enum class XmadMode : u64 {
218 None = 0, 230 None = 0,
219 CLo = 1, 231 CLo = 1,
@@ -452,6 +464,7 @@ union Instruction {
452 BitField<48, 16, u64> opcode; 464 BitField<48, 16, u64> opcode;
453 465
454 union { 466 union {
467 BitField<20, 16, u64> imm20_16;
455 BitField<20, 19, u64> imm20_19; 468 BitField<20, 19, u64> imm20_19;
456 BitField<20, 32, s64> imm20_32; 469 BitField<20, 32, s64> imm20_32;
457 BitField<45, 1, u64> negate_b; 470 BitField<45, 1, u64> negate_b;
@@ -493,6 +506,10 @@ union Instruction {
493 } 506 }
494 } lop3; 507 } lop3;
495 508
509 u16 GetImm20_16() const {
510 return static_cast<u16>(imm20_16);
511 }
512
496 u32 GetImm20_19() const { 513 u32 GetImm20_19() const {
497 u32 imm{static_cast<u32>(imm20_19)}; 514 u32 imm{static_cast<u32>(imm20_19)};
498 imm <<= 12; 515 imm <<= 12;
@@ -1017,6 +1034,23 @@ union Instruction {
1017 } isberd; 1034 } isberd;
1018 1035
1019 union { 1036 union {
1037 BitField<48, 1, u64> signed_a;
1038 BitField<38, 1, u64> is_byte_chunk_a;
1039 BitField<36, 2, VmadType> type_a;
1040 BitField<36, 2, u64> byte_height_a;
1041
1042 BitField<49, 1, u64> signed_b;
1043 BitField<50, 1, u64> use_register_b;
1044 BitField<30, 1, u64> is_byte_chunk_b;
1045 BitField<28, 2, VmadType> type_b;
1046 BitField<28, 2, u64> byte_height_b;
1047
1048 BitField<51, 2, VmadShr> shr;
1049 BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
1050 BitField<47, 1, u64> cc;
1051 } vmad;
1052
1053 union {
1020 BitField<20, 16, u64> imm20_16; 1054 BitField<20, 16, u64> imm20_16;
1021 BitField<36, 1, u64> product_shift_left; 1055 BitField<36, 1, u64> product_shift_left;
1022 BitField<37, 1, u64> merge_37; 1056 BitField<37, 1, u64> merge_37;
@@ -1083,6 +1117,7 @@ public:
1083 IPA, 1117 IPA,
1084 OUT_R, // Emit vertex/primitive 1118 OUT_R, // Emit vertex/primitive
1085 ISBERD, 1119 ISBERD,
1120 VMAD,
1086 FFMA_IMM, // Fused Multiply and Add 1121 FFMA_IMM, // Fused Multiply and Add
1087 FFMA_CR, 1122 FFMA_CR,
1088 FFMA_RC, 1123 FFMA_RC,
@@ -1320,6 +1355,7 @@ private:
1320 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1355 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1321 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1356 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
1322 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 1357 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
1358 INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
1323 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), 1359 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
1324 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), 1360 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
1325 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), 1361 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c82a0dcfa..8dfb49507 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2953,6 +2953,88 @@ private:
2953 LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); 2953 LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
2954 break; 2954 break;
2955 } 2955 }
2956 case OpCode::Id::VMAD: {
2957 const bool signed_a = instr.vmad.signed_a == 1;
2958 const bool signed_b = instr.vmad.signed_b == 1;
2959 const bool result_signed = signed_a || signed_b;
2960 boost::optional<std::string> forced_result;
2961
2962 auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
2963 Tegra::Shader::VmadType type, u64 byte_height) {
2964 const std::string value = [&]() {
2965 if (!is_chunk) {
2966 const auto offset = static_cast<u32>(byte_height * 8);
2967 return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
2968 }
2969 const std::string zero = "0";
2970
2971 switch (type) {
2972 case Tegra::Shader::VmadType::Size16_Low:
2973 return '(' + op + " & 0xffff)";
2974 case Tegra::Shader::VmadType::Size16_High:
2975 return '(' + op + " >> 16)";
2976 case Tegra::Shader::VmadType::Size32:
2977 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
2978 // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
2979 // explanation is found: assert.
2980 UNREACHABLE_MSG("Unimplemented");
2981 return zero;
2982 case Tegra::Shader::VmadType::Invalid:
2983 // Note(Rodrigo): This flag is invalid according to nvdisasm. From my
2984 // testing (even though it's invalid) this makes the whole instruction
2985 // assign zero to target register.
2986 forced_result = boost::make_optional(zero);
2987 return zero;
2988 default:
2989 UNREACHABLE();
2990 return zero;
2991 }
2992 }();
2993
2994 if (is_signed) {
2995 return "int(" + value + ')';
2996 }
2997 return value;
2998 };
2999
3000 const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
3001 instr.vmad.is_byte_chunk_a != 0, signed_a,
3002 instr.vmad.type_a, instr.vmad.byte_height_a);
3003
3004 std::string op_b;
3005 if (instr.vmad.use_register_b) {
3006 op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
3007 instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
3008 instr.vmad.byte_height_b);
3009 } else {
3010 op_b = '(' +
3011 std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
3012 : instr.alu.GetImm20_16()) +
3013 ')';
3014 }
3015
3016 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
3017
3018 std::string result;
3019 if (forced_result) {
3020 result = *forced_result;
3021 } else {
3022 result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
3023
3024 switch (instr.vmad.shr) {
3025 case Tegra::Shader::VmadShr::Shr7:
3026 result = '(' + result + " >> 7)";
3027 break;
3028 case Tegra::Shader::VmadShr::Shr15:
3029 result = '(' + result + " >> 15)";
3030 break;
3031 }
3032 }
3033 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
3034 instr.vmad.saturate == 1, 0, Register::Size::Word,
3035 instr.vmad.cc);
3036 break;
3037 }
2956 default: { 3038 default: {
2957 LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); 3039 LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
2958 UNREACHABLE(); 3040 UNREACHABLE();