summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-04-14 20:40:39 -0400
committerGravatar GitHub2018-04-14 20:40:39 -0400
commitfdca7b5f7a4ca626c15e70ae6f684e88686277f5 (patch)
tree57b8c1f1952c53d54a0c14b00543dd21302d661b /src
parentMerge pull request #323 from Hexagon12/stub-hid (diff)
parentshaders: Add NumTextureSamplers const, remove unused #pragma. (diff)
downloadyuzu-fdca7b5f7a4ca626c15e70ae6f684e88686277f5.tar.gz
yuzu-fdca7b5f7a4ca626c15e70ae6f684e88686277f5.tar.xz
yuzu-fdca7b5f7a4ca626c15e70ae6f684e88686277f5.zip
Merge pull request #329 from bunnei/shader-gen-part-1
OpenGL shader generation part 1
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt3
-rw-r--r--src/common/bit_field.h10
-rw-r--r--src/common/cityhash.cpp340
-rw-r--r--src/common/cityhash.h110
-rw-r--r--src/common/hash.cpp141
-rw-r--r--src/common/hash.h55
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/engines/maxwell_3d.h12
-rw-r--r--src/video_core/engines/shader_bytecode.h327
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp246
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h64
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h116
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp514
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h75
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp65
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h151
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp169
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h56
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h10
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp12
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h2
-rw-r--r--src/video_core/utils.h2
26 files changed, 1872 insertions, 642 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2ba1da195..d6eb9055b 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_library(common STATIC
32 break_points.cpp 32 break_points.cpp
33 break_points.h 33 break_points.h
34 chunk_file.h 34 chunk_file.h
35 cityhash.cpp
36 cityhash.h
35 code_block.h 37 code_block.h
36 color.h 38 color.h
37 common_funcs.h 39 common_funcs.h
@@ -39,7 +41,6 @@ add_library(common STATIC
39 common_types.h 41 common_types.h
40 file_util.cpp 42 file_util.cpp
41 file_util.h 43 file_util.h
42 hash.cpp
43 hash.h 44 hash.h
44 linear_disk_cache.h 45 linear_disk_cache.h
45 logging/backend.cpp 46 logging/backend.cpp
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 0cc0a1be0..5638bdbba 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -115,7 +115,7 @@ private:
115 // assignment would copy the full storage value, rather than just the bits 115 // assignment would copy the full storage value, rather than just the bits
116 // relevant to this particular bit field. 116 // relevant to this particular bit field.
117 // We don't delete it because we want BitField to be trivially copyable. 117 // We don't delete it because we want BitField to be trivially copyable.
118 BitField& operator=(const BitField&) = default; 118 constexpr BitField& operator=(const BitField&) = default;
119 119
120 // StorageType is T for non-enum types and the underlying type of T if 120 // StorageType is T for non-enum types and the underlying type of T if
121 // T is an enumeration. Note that T is wrapped within an enable_if in the 121 // T is an enumeration. Note that T is wrapped within an enable_if in the
@@ -166,20 +166,20 @@ public:
166 // so that we can use this within unions 166 // so that we can use this within unions
167 constexpr BitField() = default; 167 constexpr BitField() = default;
168 168
169 FORCE_INLINE operator T() const { 169 constexpr FORCE_INLINE operator T() const {
170 return Value(); 170 return Value();
171 } 171 }
172 172
173 FORCE_INLINE void Assign(const T& value) { 173 constexpr FORCE_INLINE void Assign(const T& value) {
174 storage = (storage & ~mask) | FormatValue(value); 174 storage = (storage & ~mask) | FormatValue(value);
175 } 175 }
176 176
177 FORCE_INLINE T Value() const { 177 constexpr T Value() const {
178 return ExtractValue(storage); 178 return ExtractValue(storage);
179 } 179 }
180 180
181 // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 181 // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015
182 FORCE_INLINE bool ToBool() const { 182 constexpr FORCE_INLINE bool ToBool() const {
183 return Value() != 0; 183 return Value() != 0;
184 } 184 }
185 185
diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp
new file mode 100644
index 000000000..de31ffbd8
--- /dev/null
+++ b/src/common/cityhash.cpp
@@ -0,0 +1,340 @@
1// Copyright (c) 2011 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// CityHash, by Geoff Pike and Jyrki Alakuijala
22//
23// This file provides CityHash64() and related functions.
24//
25// It's probably possible to create even faster hash functions by
26// writing a program that systematically explores some of the space of
27// possible hash functions, by using SIMD instructions, or by
28// compromising on hash quality.
29
30#include <algorithm>
31#include <string.h> // for memcpy and memset
32#include "cityhash.h"
33#include "common/swap.h"
34
35// #include "config.h"
36#ifdef __GNUC__
37#define HAVE_BUILTIN_EXPECT 1
38#endif
39#ifdef COMMON_BIG_ENDIAN
40#define WORDS_BIGENDIAN 1
41#endif
42
43using namespace std;
44
45typedef uint8_t uint8;
46typedef uint32_t uint32;
47typedef uint64_t uint64;
48
49namespace Common {
50
51static uint64 UNALIGNED_LOAD64(const char* p) {
52 uint64 result;
53 memcpy(&result, p, sizeof(result));
54 return result;
55}
56
57static uint32 UNALIGNED_LOAD32(const char* p) {
58 uint32 result;
59 memcpy(&result, p, sizeof(result));
60 return result;
61}
62
63#ifdef WORDS_BIGENDIAN
64#define uint32_in_expected_order(x) (swap32(x))
65#define uint64_in_expected_order(x) (swap64(x))
66#else
67#define uint32_in_expected_order(x) (x)
68#define uint64_in_expected_order(x) (x)
69#endif
70
71#if !defined(LIKELY)
72#if HAVE_BUILTIN_EXPECT
73#define LIKELY(x) (__builtin_expect(!!(x), 1))
74#else
75#define LIKELY(x) (x)
76#endif
77#endif
78
79static uint64 Fetch64(const char* p) {
80 return uint64_in_expected_order(UNALIGNED_LOAD64(p));
81}
82
83static uint32 Fetch32(const char* p) {
84 return uint32_in_expected_order(UNALIGNED_LOAD32(p));
85}
86
87// Some primes between 2^63 and 2^64 for various uses.
88static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
89static const uint64 k1 = 0xb492b66fbe98f273ULL;
90static const uint64 k2 = 0x9ae16a3b2f90404fULL;
91
92// Bitwise right rotate. Normally this will compile to a single
93// instruction, especially if the shift is a manifest constant.
94static uint64 Rotate(uint64 val, int shift) {
95 // Avoid shifting by 64: doing so yields an undefined result.
96 return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
97}
98
99static uint64 ShiftMix(uint64 val) {
100 return val ^ (val >> 47);
101}
102
103static uint64 HashLen16(uint64 u, uint64 v) {
104 return Hash128to64(uint128(u, v));
105}
106
107static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
108 // Murmur-inspired hashing.
109 uint64 a = (u ^ v) * mul;
110 a ^= (a >> 47);
111 uint64 b = (v ^ a) * mul;
112 b ^= (b >> 47);
113 b *= mul;
114 return b;
115}
116
117static uint64 HashLen0to16(const char* s, size_t len) {
118 if (len >= 8) {
119 uint64 mul = k2 + len * 2;
120 uint64 a = Fetch64(s) + k2;
121 uint64 b = Fetch64(s + len - 8);
122 uint64 c = Rotate(b, 37) * mul + a;
123 uint64 d = (Rotate(a, 25) + b) * mul;
124 return HashLen16(c, d, mul);
125 }
126 if (len >= 4) {
127 uint64 mul = k2 + len * 2;
128 uint64 a = Fetch32(s);
129 return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
130 }
131 if (len > 0) {
132 uint8 a = s[0];
133 uint8 b = s[len >> 1];
134 uint8 c = s[len - 1];
135 uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
136 uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
137 return ShiftMix(y * k2 ^ z * k0) * k2;
138 }
139 return k2;
140}
141
142// This probably works well for 16-byte strings as well, but it may be overkill
143// in that case.
144static uint64 HashLen17to32(const char* s, size_t len) {
145 uint64 mul = k2 + len * 2;
146 uint64 a = Fetch64(s) * k1;
147 uint64 b = Fetch64(s + 8);
148 uint64 c = Fetch64(s + len - 8) * mul;
149 uint64 d = Fetch64(s + len - 16) * k2;
150 return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul);
151}
152
153// Return a 16-byte hash for 48 bytes. Quick and dirty.
154// Callers do best to use "random-looking" values for a and b.
155static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a,
156 uint64 b) {
157 a += w;
158 b = Rotate(b + a + z, 21);
159 uint64 c = a;
160 a += x;
161 a += y;
162 b += Rotate(a, 44);
163 return make_pair(a + z, b + c);
164}
165
166// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
167static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) {
168 return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a,
169 b);
170}
171
172// Return an 8-byte hash for 33 to 64 bytes.
173static uint64 HashLen33to64(const char* s, size_t len) {
174 uint64 mul = k2 + len * 2;
175 uint64 a = Fetch64(s) * k2;
176 uint64 b = Fetch64(s + 8);
177 uint64 c = Fetch64(s + len - 24);
178 uint64 d = Fetch64(s + len - 32);
179 uint64 e = Fetch64(s + 16) * k2;
180 uint64 f = Fetch64(s + 24) * 9;
181 uint64 g = Fetch64(s + len - 8);
182 uint64 h = Fetch64(s + len - 16) * mul;
183 uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
184 uint64 v = ((a + g) ^ d) + f + 1;
185 uint64 w = swap64((u + v) * mul) + h;
186 uint64 x = Rotate(e + f, 42) + c;
187 uint64 y = (swap64((v + w) * mul) + g) * mul;
188 uint64 z = e + f + c;
189 a = swap64((x + z) * mul + y) + b;
190 b = ShiftMix((z + a) * mul + d + h) * mul;
191 return b + x;
192}
193
194uint64 CityHash64(const char* s, size_t len) {
195 if (len <= 32) {
196 if (len <= 16) {
197 return HashLen0to16(s, len);
198 } else {
199 return HashLen17to32(s, len);
200 }
201 } else if (len <= 64) {
202 return HashLen33to64(s, len);
203 }
204
205 // For strings over 64 bytes we hash the end first, and then as we
206 // loop we keep 56 bytes of state: v, w, x, y, and z.
207 uint64 x = Fetch64(s + len - 40);
208 uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
209 uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
210 pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
211 pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
212 x = x * k1 + Fetch64(s);
213
214 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
215 len = (len - 1) & ~static_cast<size_t>(63);
216 do {
217 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
218 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
219 x ^= w.second;
220 y += v.first + Fetch64(s + 40);
221 z = Rotate(z + w.first, 33) * k1;
222 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
223 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
224 std::swap(z, x);
225 s += 64;
226 len -= 64;
227 } while (len != 0);
228 return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
229 HashLen16(v.second, w.second) + x);
230}
231
232uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
233 return CityHash64WithSeeds(s, len, k2, seed);
234}
235
236uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) {
237 return HashLen16(CityHash64(s, len) - seed0, seed1);
238}
239
240// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
241// of any length representable in signed long. Based on City and Murmur.
242static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
243 uint64 a = Uint128Low64(seed);
244 uint64 b = Uint128High64(seed);
245 uint64 c = 0;
246 uint64 d = 0;
247 signed long l = static_cast<long>(len) - 16;
248 if (l <= 0) { // len <= 16
249 a = ShiftMix(a * k1) * k1;
250 c = b * k1 + HashLen0to16(s, len);
251 d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
252 } else { // len > 16
253 c = HashLen16(Fetch64(s + len - 8) + k1, a);
254 d = HashLen16(b + len, c + Fetch64(s + len - 16));
255 a += d;
256 do {
257 a ^= ShiftMix(Fetch64(s) * k1) * k1;
258 a *= k1;
259 b ^= a;
260 c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
261 c *= k1;
262 d ^= c;
263 s += 16;
264 l -= 16;
265 } while (l > 0);
266 }
267 a = HashLen16(a, c);
268 b = HashLen16(d, b);
269 return uint128(a ^ b, HashLen16(b, a));
270}
271
272uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
273 if (len < 128) {
274 return CityMurmur(s, len, seed);
275 }
276
277 // We expect len >= 128 to be the common case. Keep 56 bytes of state:
278 // v, w, x, y, and z.
279 pair<uint64, uint64> v, w;
280 uint64 x = Uint128Low64(seed);
281 uint64 y = Uint128High64(seed);
282 uint64 z = len * k1;
283 v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
284 v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
285 w.first = Rotate(y + z, 35) * k1 + x;
286 w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
287
288 // This is the same inner loop as CityHash64(), manually unrolled.
289 do {
290 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
291 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
292 x ^= w.second;
293 y += v.first + Fetch64(s + 40);
294 z = Rotate(z + w.first, 33) * k1;
295 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
296 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
297 std::swap(z, x);
298 s += 64;
299 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
300 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
301 x ^= w.second;
302 y += v.first + Fetch64(s + 40);
303 z = Rotate(z + w.first, 33) * k1;
304 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
305 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
306 std::swap(z, x);
307 s += 64;
308 len -= 128;
309 } while (LIKELY(len >= 128));
310 x += Rotate(v.first + z, 49) * k0;
311 y = y * k0 + Rotate(w.second, 37);
312 z = z * k0 + Rotate(w.first, 27);
313 w.first *= 9;
314 v.first *= k0;
315 // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
316 for (size_t tail_done = 0; tail_done < len;) {
317 tail_done += 32;
318 y = Rotate(x + y, 42) * k0 + v.second;
319 w.first += Fetch64(s + len - tail_done + 16);
320 x = x * k0 + w.first;
321 z += w.second + Fetch64(s + len - tail_done);
322 w.second += v.first;
323 v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
324 v.first *= k0;
325 }
326 // At this point our 56 bytes of state should contain more than
327 // enough information for a strong 128-bit hash. We use two
328 // different 56-byte-to-8-byte hashes to get a 16-byte final result.
329 x = HashLen16(x, v.first);
330 y = HashLen16(y + z, w.first);
331 return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
332}
333
334uint128 CityHash128(const char* s, size_t len) {
335 return len >= 16
336 ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
337 : CityHash128WithSeed(s, len, uint128(k0, k1));
338}
339
340} // namespace Common
diff --git a/src/common/cityhash.h b/src/common/cityhash.h
new file mode 100644
index 000000000..bcebdb150
--- /dev/null
+++ b/src/common/cityhash.h
@@ -0,0 +1,110 @@
1// Copyright (c) 2011 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// CityHash, by Geoff Pike and Jyrki Alakuijala
22//
23// http://code.google.com/p/cityhash/
24//
25// This file provides a few functions for hashing strings. All of them are
26// high-quality functions in the sense that they pass standard tests such
27// as Austin Appleby's SMHasher. They are also fast.
28//
29// For 64-bit x86 code, on short strings, we don't know of anything faster than
30// CityHash64 that is of comparable quality. We believe our nearest competitor
31// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
32// tables and most other hashing (excluding cryptography).
33//
34// For 64-bit x86 code, on long strings, the picture is more complicated.
35// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
36// CityHashCrc128 appears to be faster than all competitors of comparable
37// quality. CityHash128 is also good but not quite as fast. We believe our
38// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
39// other 64-bit CPUs, but for long strings we know that Spooky is slightly
40// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
41// Note that CityHashCrc128 is declared in citycrc.h.
42//
43// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
44// is of comparable quality. We believe our nearest competitor is Murmur3A.
45// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
46//
47// Functions in the CityHash family are not suitable for cryptography.
48//
49// Please see CityHash's README file for more details on our performance
50// measurements and so on.
51//
52// WARNING: This code has been only lightly tested on big-endian platforms!
53// It is known to work well on little-endian platforms that have a small penalty
54// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
55// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
56// bug reports are welcome.
57//
58// By the way, for some hash functions, given strings a and b, the hash
59// of a+b is easily derived from the hashes of a and b. This property
60// doesn't hold for any hash functions in this file.
61
62#pragma once
63
64#include <utility>
65#include <stdint.h>
66#include <stdlib.h> // for size_t.
67
68namespace Common {
69
70typedef std::pair<uint64_t, uint64_t> uint128;
71
72inline uint64_t Uint128Low64(const uint128& x) {
73 return x.first;
74}
75inline uint64_t Uint128High64(const uint128& x) {
76 return x.second;
77}
78
79// Hash function for a byte array.
80uint64_t CityHash64(const char* buf, size_t len);
81
82// Hash function for a byte array. For convenience, a 64-bit seed is also
83// hashed into the result.
84uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed);
85
86// Hash function for a byte array. For convenience, two seeds are also
87// hashed into the result.
88uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1);
89
90// Hash function for a byte array.
91uint128 CityHash128(const char* s, size_t len);
92
93// Hash function for a byte array. For convenience, a 128-bit seed is also
94// hashed into the result.
95uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed);
96
97// Hash 128 input bits down to 64 bits of output.
98// This is intended to be a reasonably good hash function.
99inline uint64_t Hash128to64(const uint128& x) {
100 // Murmur-inspired hashing.
101 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
102 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
103 a ^= (a >> 47);
104 uint64_t b = (Uint128High64(x) ^ a) * kMul;
105 b ^= (b >> 47);
106 b *= kMul;
107 return b;
108}
109
110} // namespace Common
diff --git a/src/common/hash.cpp b/src/common/hash.cpp
deleted file mode 100644
index a02e9e5b9..000000000
--- a/src/common/hash.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#if defined(_MSC_VER)
6#include <stdlib.h>
7#endif
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10#include "common/hash.h"
11
12namespace Common {
13
14// MurmurHash3 was written by Austin Appleby, and is placed in the public
15// domain. The author hereby disclaims copyright to this source code.
16
17// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
18// the conversion here
19static FORCE_INLINE u64 getblock64(const u64* p, size_t i) {
20 return p[i];
21}
22
23// Finalization mix - force all bits of a hash block to avalanche
24static FORCE_INLINE u64 fmix64(u64 k) {
25 k ^= k >> 33;
26 k *= 0xff51afd7ed558ccdllu;
27 k ^= k >> 33;
28 k *= 0xc4ceb9fe1a85ec53llu;
29 k ^= k >> 33;
30
31 return k;
32}
33
34// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
35// platforms (MurmurHash3_x64_128). It was taken from:
36// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
37void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) {
38 const u8* data = (const u8*)key;
39 const size_t nblocks = len / 16;
40
41 u64 h1 = seed;
42 u64 h2 = seed;
43
44 const u64 c1 = 0x87c37b91114253d5llu;
45 const u64 c2 = 0x4cf5ad432745937fllu;
46
47 // Body
48
49 const u64* blocks = (const u64*)(data);
50
51 for (size_t i = 0; i < nblocks; i++) {
52 u64 k1 = getblock64(blocks, i * 2 + 0);
53 u64 k2 = getblock64(blocks, i * 2 + 1);
54
55 k1 *= c1;
56 k1 = _rotl64(k1, 31);
57 k1 *= c2;
58 h1 ^= k1;
59
60 h1 = _rotl64(h1, 27);
61 h1 += h2;
62 h1 = h1 * 5 + 0x52dce729;
63
64 k2 *= c2;
65 k2 = _rotl64(k2, 33);
66 k2 *= c1;
67 h2 ^= k2;
68
69 h2 = _rotl64(h2, 31);
70 h2 += h1;
71 h2 = h2 * 5 + 0x38495ab5;
72 }
73
74 // Tail
75
76 const u8* tail = (const u8*)(data + nblocks * 16);
77
78 u64 k1 = 0;
79 u64 k2 = 0;
80
81 switch (len & 15) {
82 case 15:
83 k2 ^= ((u64)tail[14]) << 48;
84 case 14:
85 k2 ^= ((u64)tail[13]) << 40;
86 case 13:
87 k2 ^= ((u64)tail[12]) << 32;
88 case 12:
89 k2 ^= ((u64)tail[11]) << 24;
90 case 11:
91 k2 ^= ((u64)tail[10]) << 16;
92 case 10:
93 k2 ^= ((u64)tail[9]) << 8;
94 case 9:
95 k2 ^= ((u64)tail[8]) << 0;
96 k2 *= c2;
97 k2 = _rotl64(k2, 33);
98 k2 *= c1;
99 h2 ^= k2;
100
101 case 8:
102 k1 ^= ((u64)tail[7]) << 56;
103 case 7:
104 k1 ^= ((u64)tail[6]) << 48;
105 case 6:
106 k1 ^= ((u64)tail[5]) << 40;
107 case 5:
108 k1 ^= ((u64)tail[4]) << 32;
109 case 4:
110 k1 ^= ((u64)tail[3]) << 24;
111 case 3:
112 k1 ^= ((u64)tail[2]) << 16;
113 case 2:
114 k1 ^= ((u64)tail[1]) << 8;
115 case 1:
116 k1 ^= ((u64)tail[0]) << 0;
117 k1 *= c1;
118 k1 = _rotl64(k1, 31);
119 k1 *= c2;
120 h1 ^= k1;
121 };
122
123 // Finalization
124
125 h1 ^= len;
126 h2 ^= len;
127
128 h1 += h2;
129 h2 += h1;
130
131 h1 = fmix64(h1);
132 h2 = fmix64(h2);
133
134 h1 += h2;
135 h2 += h1;
136
137 ((u64*)out)[0] = h1;
138 ((u64*)out)[1] = h2;
139}
140
141} // namespace Common
diff --git a/src/common/hash.h b/src/common/hash.h
index ee2560dad..73c326980 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -5,12 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <cstddef> 7#include <cstddef>
8#include <cstring>
9#include "common/cityhash.h"
8#include "common/common_types.h" 10#include "common/common_types.h"
9 11
10namespace Common { 12namespace Common {
11 13
12void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
13
14/** 14/**
15 * Computes a 64-bit hash over the specified block of data 15 * Computes a 64-bit hash over the specified block of data
16 * @param data Block of data to compute hash over 16 * @param data Block of data to compute hash over
@@ -18,9 +18,54 @@ void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
18 * @returns 64-bit hash value that was computed over the data block 18 * @returns 64-bit hash value that was computed over the data block
19 */ 19 */
20static inline u64 ComputeHash64(const void* data, size_t len) { 20static inline u64 ComputeHash64(const void* data, size_t len) {
21 u64 res[2]; 21 return CityHash64(static_cast<const char*>(data), len);
22 MurmurHash3_128(data, len, 0, res); 22}
23 return res[0]; 23
24/**
25 * Computes a 64-bit hash of a struct. In addition to being trivially copyable, it is also critical
26 * that either the struct includes no padding, or that any padding is initialized to a known value
27 * by memsetting the struct to 0 before filling it in.
28 */
29template <typename T>
30static inline u64 ComputeStructHash64(const T& data) {
31 static_assert(std::is_trivially_copyable<T>(),
32 "Type passed to ComputeStructHash64 must be trivially copyable");
33 return ComputeHash64(&data, sizeof(data));
24} 34}
25 35
36/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
37template <typename T>
38struct HashableStruct {
39 // In addition to being trivially copyable, T must also have a trivial default constructor,
40 // because any member initialization would be overridden by memset
41 static_assert(std::is_trivial<T>(), "Type passed to HashableStruct must be trivial");
42 /*
43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the
44 * object representation of X." and "implicitly-defined copy assignment operator for a union X
45 * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy.
46 * This is important because the padding bytes are included in the hash and comparison between
47 * objects.
48 */
49 union {
50 T state;
51 };
52
53 HashableStruct() {
54 // Memset structure to zero padding bits, so that they will be deterministic when hashing
55 std::memset(&state, 0, sizeof(T));
56 }
57
58 bool operator==(const HashableStruct<T>& o) const {
59 return std::memcmp(&state, &o.state, sizeof(T)) == 0;
60 };
61
62 bool operator!=(const HashableStruct<T>& o) const {
63 return !(*this == o);
64 };
65
66 size_t Hash() const {
67 return Common::ComputeStructHash64(state);
68 }
69};
70
26} // namespace Common 71} // namespace Common
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a710c4bc5..281810357 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,7 @@ add_library(video_core STATIC
9 engines/maxwell_3d.h 9 engines/maxwell_3d.h
10 engines/maxwell_compute.cpp 10 engines/maxwell_compute.cpp
11 engines/maxwell_compute.h 11 engines/maxwell_compute.h
12 engines/shader_bytecode.h
12 gpu.cpp 13 gpu.cpp
13 gpu.h 14 gpu.h
14 macro_interpreter.cpp 15 macro_interpreter.cpp
@@ -27,6 +28,8 @@ add_library(video_core STATIC
27 renderer_opengl/gl_shader_decompiler.h 28 renderer_opengl/gl_shader_decompiler.h
28 renderer_opengl/gl_shader_gen.cpp 29 renderer_opengl/gl_shader_gen.cpp
29 renderer_opengl/gl_shader_gen.h 30 renderer_opengl/gl_shader_gen.h
31 renderer_opengl/gl_shader_manager.cpp
32 renderer_opengl/gl_shader_manager.h
30 renderer_opengl/gl_shader_util.cpp 33 renderer_opengl/gl_shader_util.cpp
31 renderer_opengl/gl_shader_util.h 34 renderer_opengl/gl_shader_util.h
32 renderer_opengl/gl_state.cpp 35 renderer_opengl/gl_state.cpp
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 98b39b2ff..9c6236c39 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -427,14 +427,11 @@ public:
427 BitField<0, 1, u32> enable; 427 BitField<0, 1, u32> enable;
428 BitField<4, 4, ShaderProgram> program; 428 BitField<4, 4, ShaderProgram> program;
429 }; 429 };
430 u32 start_id; 430 u32 offset;
431 INSERT_PADDING_WORDS(1); 431 INSERT_PADDING_WORDS(14);
432 u32 gpr_alloc;
433 ShaderStage type;
434 INSERT_PADDING_WORDS(9);
435 } shader_config[MaxShaderProgram]; 432 } shader_config[MaxShaderProgram];
436 433
437 INSERT_PADDING_WORDS(0x8C); 434 INSERT_PADDING_WORDS(0x80);
438 435
439 struct { 436 struct {
440 u32 cb_size; 437 u32 cb_size;
@@ -507,6 +504,7 @@ public:
507 }; 504 };
508 505
509 State state{}; 506 State state{};
507 MemoryManager& memory_manager;
510 508
511 /// Reads a register value located at the input method address 509 /// Reads a register value located at the input method address
512 u32 GetRegisterValue(u32 method) const; 510 u32 GetRegisterValue(u32 method) const;
@@ -521,8 +519,6 @@ public:
521 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; 519 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
522 520
523private: 521private:
524 MemoryManager& memory_manager;
525
526 std::unordered_map<u32, std::vector<u32>> uploaded_macros; 522 std::unordered_map<u32, std::vector<u32>> uploaded_macros;
527 523
528 /// Macro method that is currently being executed / being fed parameters. 524 /// Macro method that is currently being executed / being fed parameters.
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
new file mode 100644
index 000000000..eff0c35a1
--- /dev/null
+++ b/src/video_core/engines/shader_bytecode.h
@@ -0,0 +1,327 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include <string>
9#include "common/bit_field.h"
10
11namespace Tegra {
12namespace Shader {
13
14struct Register {
15 Register() = default;
16
17 constexpr Register(u64 value) : value(value) {}
18
19 constexpr u64 GetIndex() const {
20 return value;
21 }
22
23 constexpr operator u64() const {
24 return value;
25 }
26
27 template <typename T>
28 constexpr u64 operator-(const T& oth) const {
29 return value - oth;
30 }
31
32 template <typename T>
33 constexpr u64 operator&(const T& oth) const {
34 return value & oth;
35 }
36
37 constexpr u64 operator&(const Register& oth) const {
38 return value & oth.value;
39 }
40
41 constexpr u64 operator~() const {
42 return ~value;
43 }
44
45private:
46 u64 value;
47};
48
49union Attribute {
50 Attribute() = default;
51
52 constexpr Attribute(u64 value) : value(value) {}
53
54 enum class Index : u64 {
55 Position = 7,
56 Attribute_0 = 8,
57 };
58
59 union {
60 BitField<22, 2, u64> element;
61 BitField<24, 6, Index> index;
62 BitField<47, 3, u64> size;
63 } fmt20;
64
65 union {
66 BitField<30, 2, u64> element;
67 BitField<32, 6, Index> index;
68 } fmt28;
69
70 BitField<39, 8, u64> reg;
71 u64 value;
72};
73
74union Uniform {
75 BitField<20, 14, u64> offset;
76 BitField<34, 5, u64> index;
77};
78
79union OpCode {
80 enum class Id : u64 {
81 TEXS = 0x6C,
82 IPA = 0xE0,
83 FFMA_IMM = 0x65,
84 FFMA_CR = 0x93,
85 FFMA_RC = 0xA3,
86 FFMA_RR = 0xB3,
87
88 FADD_C = 0x98B,
89 FMUL_C = 0x98D,
90 MUFU = 0xA10,
91 FADD_R = 0xB8B,
92 FMUL_R = 0xB8D,
93 LD_A = 0x1DFB,
94 ST_A = 0x1DFE,
95
96 FSETP_R = 0x5BB,
97 FSETP_C = 0x4BB,
98 EXIT = 0xE30,
99 KIL = 0xE33,
100
101 FMUL_IMM = 0x70D,
102 FMUL_IMM_x = 0x72D,
103 FADD_IMM = 0x70B,
104 FADD_IMM_x = 0x72B,
105 };
106
107 enum class Type {
108 Trivial,
109 Arithmetic,
110 Ffma,
111 Flow,
112 Memory,
113 Unknown,
114 };
115
116 struct Info {
117 Type type;
118 std::string name;
119 };
120
121 OpCode() = default;
122
123 constexpr OpCode(Id value) : value(static_cast<u64>(value)) {}
124
125 constexpr OpCode(u64 value) : value{value} {}
126
127 constexpr Id EffectiveOpCode() const {
128 switch (op1) {
129 case Id::TEXS:
130 return op1;
131 }
132
133 switch (op2) {
134 case Id::IPA:
135 return op2;
136 }
137
138 switch (op3) {
139 case Id::FFMA_IMM:
140 case Id::FFMA_CR:
141 case Id::FFMA_RC:
142 case Id::FFMA_RR:
143 return op3;
144 }
145
146 switch (op4) {
147 case Id::EXIT:
148 case Id::FSETP_R:
149 case Id::FSETP_C:
150 case Id::KIL:
151 return op4;
152 }
153
154 switch (op5) {
155 case Id::MUFU:
156 case Id::LD_A:
157 case Id::ST_A:
158 case Id::FADD_R:
159 case Id::FADD_C:
160 case Id::FMUL_R:
161 case Id::FMUL_C:
162 return op5;
163
164 case Id::FMUL_IMM:
165 case Id::FMUL_IMM_x:
166 return Id::FMUL_IMM;
167
168 case Id::FADD_IMM:
169 case Id::FADD_IMM_x:
170 return Id::FADD_IMM;
171 }
172
173 return static_cast<Id>(value);
174 }
175
176 static const Info& GetInfo(const OpCode& opcode) {
177 static const std::map<Id, Info> info_table{BuildInfoTable()};
178 const auto& search{info_table.find(opcode.EffectiveOpCode())};
179 if (search != info_table.end()) {
180 return search->second;
181 }
182
183 static const Info unknown{Type::Unknown, "UNK"};
184 return unknown;
185 }
186
187 constexpr operator Id() const {
188 return static_cast<Id>(value);
189 }
190
191 constexpr OpCode operator<<(size_t bits) const {
192 return value << bits;
193 }
194
195 constexpr OpCode operator>>(size_t bits) const {
196 return value >> bits;
197 }
198
199 template <typename T>
200 constexpr u64 operator-(const T& oth) const {
201 return value - oth;
202 }
203
204 constexpr u64 operator&(const OpCode& oth) const {
205 return value & oth.value;
206 }
207
208 constexpr u64 operator~() const {
209 return ~value;
210 }
211
212 static std::map<Id, Info> BuildInfoTable() {
213 std::map<Id, Info> info_table;
214 info_table[Id::TEXS] = {Type::Memory, "texs"};
215 info_table[Id::LD_A] = {Type::Memory, "ld_a"};
216 info_table[Id::ST_A] = {Type::Memory, "st_a"};
217 info_table[Id::MUFU] = {Type::Arithmetic, "mufu"};
218 info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"};
219 info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"};
220 info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"};
221 info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"};
222 info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"};
223 info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"};
224 info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"};
225 info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
226 info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
227 info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
228 info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
229 info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
230 info_table[Id::EXIT] = {Type::Trivial, "exit"};
231 info_table[Id::IPA] = {Type::Trivial, "ipa"};
232 info_table[Id::KIL] = {Type::Flow, "kil"};
233 return info_table;
234 }
235
236 BitField<57, 7, Id> op1;
237 BitField<56, 8, Id> op2;
238 BitField<55, 9, Id> op3;
239 BitField<52, 12, Id> op4;
240 BitField<51, 13, Id> op5;
241 u64 value;
242};
243static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");
244
245} // namespace Shader
246} // namespace Tegra
247
248namespace std {
249
250// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330.
251template <>
252struct make_unsigned<Tegra::Shader::Attribute> {
253 using type = Tegra::Shader::Attribute;
254};
255
256template <>
257struct make_unsigned<Tegra::Shader::Register> {
258 using type = Tegra::Shader::Register;
259};
260
261template <>
262struct make_unsigned<Tegra::Shader::OpCode> {
263 using type = Tegra::Shader::OpCode;
264};
265
266} // namespace std
267
268namespace Tegra {
269namespace Shader {
270
271enum class Pred : u64 {
272 UnusedIndex = 0x7,
273 NeverExecute = 0xf,
274};
275
276enum class SubOp : u64 {
277 Cos = 0x0,
278 Sin = 0x1,
279 Ex2 = 0x2,
280 Lg2 = 0x3,
281 Rcp = 0x4,
282 Rsq = 0x5,
283};
284
285union Instruction {
286 Instruction& operator=(const Instruction& instr) {
287 hex = instr.hex;
288 return *this;
289 }
290
291 OpCode opcode;
292 BitField<0, 8, Register> gpr0;
293 BitField<8, 8, Register> gpr8;
294 BitField<16, 4, Pred> pred;
295 BitField<20, 8, Register> gpr20;
296 BitField<20, 7, SubOp> sub_op;
297 BitField<28, 8, Register> gpr28;
298 BitField<36, 13, u64> imm36;
299 BitField<39, 8, Register> gpr39;
300
301 union {
302 BitField<45, 1, u64> negate_b;
303 BitField<46, 1, u64> abs_a;
304 BitField<48, 1, u64> negate_a;
305 BitField<49, 1, u64> abs_b;
306 BitField<50, 1, u64> abs_d;
307 } alu;
308
309 union {
310 BitField<48, 1, u64> negate_b;
311 BitField<49, 1, u64> negate_c;
312 } ffma;
313
314 BitField<60, 1, u64> is_b_gpr;
315 BitField<59, 1, u64> is_c_gpr;
316
317 Attribute attribute;
318 Uniform uniform;
319
320 u64 hex;
321};
322static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
323static_assert(std::is_standard_layout<Instruction>::value,
324 "Structure does not have standard layout");
325
326} // namespace Shader
327} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f217a265b..f75d4c658 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
34MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); 34MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
35MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); 35MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
36 36
37enum class UniformBindings : GLuint { Common, VS, FS };
38
39static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
40 size_t expected_size) {
41 GLuint ub_index = glGetUniformBlockIndex(shader, name);
42 if (ub_index != GL_INVALID_INDEX) {
43 GLint ub_size = 0;
44 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
45 ASSERT_MSG(ub_size == expected_size,
46 "Uniform block size did not match! Got %d, expected %zu",
47 static_cast<int>(ub_size), expected_size);
48 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
49 }
50}
51
52static void SetShaderUniformBlockBindings(GLuint shader) {
53 SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
54 sizeof(RasterizerOpenGL::UniformData));
55 SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS,
56 sizeof(RasterizerOpenGL::VSUniformData));
57 SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS,
58 sizeof(RasterizerOpenGL::FSUniformData));
59}
60
61RasterizerOpenGL::RasterizerOpenGL() { 37RasterizerOpenGL::RasterizerOpenGL() {
62 shader_dirty = true;
63
64 has_ARB_buffer_storage = false; 38 has_ARB_buffer_storage = false;
65 has_ARB_direct_state_access = false; 39 has_ARB_direct_state_access = false;
66 has_ARB_separate_shader_objects = false; 40 has_ARB_separate_shader_objects = false;
@@ -88,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() {
88 } 62 }
89 } 63 }
90 64
65 ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
66
91 // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 67 // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
92 state.clip_distance[0] = true; 68 state.clip_distance[0] = true;
93 69
@@ -102,36 +78,31 @@ RasterizerOpenGL::RasterizerOpenGL() {
102 state.draw.uniform_buffer = uniform_buffer.handle; 78 state.draw.uniform_buffer = uniform_buffer.handle;
103 state.Apply(); 79 state.Apply();
104 80
105 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW);
106 glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);
107
108 uniform_block_data.dirty = true;
109
110 // Create render framebuffer 81 // Create render framebuffer
111 framebuffer.Create(); 82 framebuffer.Create();
112 83
113 if (has_ARB_separate_shader_objects) { 84 hw_vao.Create();
114 hw_vao.Create(); 85 hw_vao_enabled_attributes.fill(false);
115 hw_vao_enabled_attributes.fill(false);
116 86
117 stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); 87 stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
118 stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); 88 stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
119 state.draw.vertex_buffer = stream_buffer->GetHandle(); 89 state.draw.vertex_buffer = stream_buffer->GetHandle();
120 90
121 pipeline.Create(); 91 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
122 state.draw.program_pipeline = pipeline.handle; 92
123 state.draw.shader_program = 0; 93 state.draw.shader_program = 0;
124 state.draw.vertex_array = hw_vao.handle; 94 state.draw.vertex_array = hw_vao.handle;
125 state.Apply(); 95 state.Apply();
126 96
127 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); 97 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
128 98
129 vs_uniform_buffer.Create(); 99 for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
130 glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); 100 auto& buffer = uniform_buffers[index];
131 glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); 101 buffer.Create();
132 glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); 102 glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
133 } else { 103 glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
134 UNREACHABLE(); 104 GL_STREAM_COPY);
105 glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
135 } 106 }
136 107
137 accelerate_draw = AccelDraw::Disabled; 108 accelerate_draw = AccelDraw::Disabled;
@@ -200,26 +171,74 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
200 buffer_offset += data_size; 171 buffer_offset += data_size;
201} 172}
202 173
203void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { 174void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) {
204 MICROPROFILE_SCOPE(OpenGL_VS); 175 // Helper function for uploading uniform data
205 LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); 176 const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
206 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); 177 if (has_ARB_direct_state_access) {
207} 178 glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
179 } else {
180 glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
181 glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
182 }
183 };
208 184
209void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { 185 auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
210 MICROPROFILE_SCOPE(OpenGL_FS); 186 ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
211 UNREACHABLE();
212}
213 187
214bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { 188 for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
215 if (!has_ARB_separate_shader_objects) { 189 ptr_pos += sizeof(GLShader::MaxwellUniformData);
216 UNREACHABLE(); 190
217 return false; 191 auto& shader_config = gpu.regs.shader_config[index];
192 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
193
194 // VertexB program is always enabled, despite bit setting
195 const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB};
196
197 // Skip stages that are not enabled
198 if (!is_enabled) {
199 continue;
200 }
201
202 // Upload uniform data as one UBO per stage
203 const auto& stage = index - 1; // Stage indices are 0 - 5
204 const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
205 copy_buffer(uniform_buffers[stage].handle, ubo_offset,
206 sizeof(GLShader::MaxwellUniformData));
207 GLShader::MaxwellUniformData* ub_ptr =
208 reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]);
209 ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
210
211 // Fetch program code from memory
212 GLShader::ProgramCode program_code;
213 const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
214 const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)};
215 Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64));
216 GLShader::ShaderSetup setup{std::move(program_code)};
217
218 switch (program) {
219 case Maxwell::ShaderProgram::VertexB: {
220 GLShader::MaxwellVSConfig vs_config{setup};
221 shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
222 break;
223 }
224 case Maxwell::ShaderProgram::Fragment: {
225 GLShader::MaxwellFSConfig fs_config{setup};
226 shader_program_manager->UseProgrammableFragmentShader(fs_config, setup);
227 break;
228 }
229 default:
230 LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index,
231 shader_config.enable.Value(), shader_config.offset);
232 UNREACHABLE();
233 }
218 } 234 }
219 235
236 shader_program_manager->UseTrivialGeometryShader();
237}
238
239bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
220 accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; 240 accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
221 DrawArrays(); 241 DrawArrays();
222
223 return true; 242 return true;
224} 243}
225 244
@@ -280,18 +299,6 @@ void RasterizerOpenGL::DrawArrays() {
280 // Sync and bind the texture surfaces 299 // Sync and bind the texture surfaces
281 BindTextures(); 300 BindTextures();
282 301
283 // Sync and bind the shader
284 if (shader_dirty) {
285 SetShader();
286 shader_dirty = false;
287 }
288
289 // Sync the uniform data
290 if (uniform_block_data.dirty) {
291 glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data);
292 uniform_block_data.dirty = false;
293 }
294
295 // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable 302 // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
296 // scissor test to prevent drawing outside of the framebuffer region 303 // scissor test to prevent drawing outside of the framebuffer region
297 state.scissor.enabled = true; 304 state.scissor.enabled = true;
@@ -311,7 +318,9 @@ void RasterizerOpenGL::DrawArrays() {
311 if (is_indexed) { 318 if (is_indexed) {
312 UNREACHABLE(); 319 UNREACHABLE();
313 } 320 }
314 buffer_size += sizeof(VSUniformData); 321
322 // Uniform space for the 5 shader stages
323 buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
315 324
316 size_t ptr_pos = 0; 325 size_t ptr_pos = 0;
317 u8* buffer_ptr; 326 u8* buffer_ptr;
@@ -327,25 +336,12 @@ void RasterizerOpenGL::DrawArrays() {
327 UNREACHABLE(); 336 UNREACHABLE();
328 } 337 }
329 338
330 SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), 339 SetupShaders(buffer_ptr, buffer_offset, ptr_pos);
331 buffer_offset + static_cast<GLintptr>(ptr_pos));
332 const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
333 ptr_pos += sizeof(VSUniformData);
334 340
335 stream_buffer->Unmap(); 341 stream_buffer->Unmap();
336 342
337 const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { 343 shader_program_manager->ApplyTo(state);
338 if (has_ARB_direct_state_access) { 344 state.Apply();
339 glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
340 } else {
341 glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
342 glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
343 }
344 };
345
346 copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData));
347
348 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle);
349 345
350 if (is_indexed) { 346 if (is_indexed) {
351 UNREACHABLE(); 347 UNREACHABLE();
@@ -531,72 +527,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
531 } 527 }
532} 528}
533 529
534void RasterizerOpenGL::SetShader() {
535 // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to
536 // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell
537 // shaders.
538
539 static constexpr char vertex_shader[] = R"(
540#version 150 core
541
542in vec2 vert_position;
543in vec2 vert_tex_coord;
544out vec2 frag_tex_coord;
545
546void main() {
547 // Multiply input position by the rotscale part of the matrix and then manually translate by
548 // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
549 // to `vec3(vert_position.xy, 1.0)`
550 gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0);
551 frag_tex_coord = vert_tex_coord;
552}
553)";
554
555 static constexpr char fragment_shader[] = R"(
556#version 150 core
557
558in vec2 frag_tex_coord;
559out vec4 color;
560
561uniform sampler2D tex[32];
562
563void main() {
564 color = texture(tex[0], frag_tex_coord);
565}
566)";
567
568 if (current_shader) {
569 return;
570 }
571
572 LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
573
574 current_shader = &test_shader;
575 if (has_ARB_separate_shader_objects) {
576 test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true);
577 glActiveShaderProgram(pipeline.handle, test_shader.shader.handle);
578 } else {
579 UNREACHABLE();
580 }
581
582 state.draw.shader_program = test_shader.shader.handle;
583 state.Apply();
584
585 for (u32 texture = 0; texture < texture_samplers.size(); ++texture) {
586 // Set the texture samplers to correspond to different texture units
587 std::string uniform_name = "tex[" + std::to_string(texture) + "]";
588 GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str());
589 if (uniform_tex != -1) {
590 glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
591 }
592 }
593
594 if (has_ARB_separate_shader_objects) {
595 state.draw.shader_program = 0;
596 state.Apply();
597 }
598}
599
600void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, 530void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
601 const Surface& depth_surface, bool has_stencil) { 531 const Surface& depth_surface, bool has_stencil) {
602 state.draw.draw_framebuffer = framebuffer.handle; 532 state.draw.draw_framebuffer = framebuffer.handle;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d868bf421..71c21c69b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -15,10 +15,12 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/hash.h" 16#include "common/hash.h"
17#include "common/vector_math.h" 17#include "common/vector_math.h"
18#include "video_core/engines/maxwell_3d.h"
18#include "video_core/rasterizer_interface.h" 19#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 20#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
21#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/renderer_opengl/gl_shader_gen.h"
23#include "video_core/renderer_opengl/gl_shader_manager.h"
22#include "video_core/renderer_opengl/gl_state.h" 24#include "video_core/renderer_opengl/gl_state.h"
23#include "video_core/renderer_opengl/gl_stream_buffer.h" 25#include "video_core/renderer_opengl/gl_stream_buffer.h"
24 26
@@ -45,7 +47,7 @@ public:
45 /// OpenGL shader generated for a given Maxwell register state 47 /// OpenGL shader generated for a given Maxwell register state
46 struct MaxwellShader { 48 struct MaxwellShader {
47 /// OpenGL shader resource 49 /// OpenGL shader resource
48 OGLShader shader; 50 OGLProgram shader;
49 }; 51 };
50 52
51 struct VertexShader { 53 struct VertexShader {
@@ -56,34 +58,6 @@ public:
56 OGLShader shader; 58 OGLShader shader;
57 }; 59 };
58 60
59 /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
60 // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
61 // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
62 // Not following that rule will cause problems on some AMD drivers.
63 struct UniformData {};
64
65 // static_assert(
66 // sizeof(UniformData) == 0x460,
67 // "The size of the UniformData structure has changed, update the structure in the shader");
68 static_assert(sizeof(UniformData) < 16384,
69 "UniformData structure must be less than 16kb as per the OpenGL spec");
70
71 struct VSUniformData {};
72 // static_assert(
73 // sizeof(VSUniformData) == 1856,
74 // "The size of the VSUniformData structure has changed, update the structure in the
75 // shader");
76 static_assert(sizeof(VSUniformData) < 16384,
77 "VSUniformData structure must be less than 16kb as per the OpenGL spec");
78
79 struct FSUniformData {};
80 // static_assert(
81 // sizeof(FSUniformData) == 1856,
82 // "The size of the FSUniformData structure has changed, update the structure in the
83 // shader");
84 static_assert(sizeof(FSUniformData) < 16384,
85 "FSUniformData structure must be less than 16kb as per the OpenGL spec");
86
87private: 61private:
88 class SamplerInfo { 62 class SamplerInfo {
89 public: 63 public:
@@ -122,9 +96,6 @@ private:
122 /// Syncs the clip coefficients to match the guest state 96 /// Syncs the clip coefficients to match the guest state
123 void SyncClipCoef(); 97 void SyncClipCoef();
124 98
125 /// Sets the OpenGL shader in accordance with the current guest state
126 void SetShader();
127
128 /// Syncs the cull mode to match the guest state 99 /// Syncs the cull mode to match the guest state
129 void SyncCullMode(); 100 void SyncCullMode();
130 101
@@ -152,23 +123,12 @@ private:
152 123
153 RasterizerCacheOpenGL res_cache; 124 RasterizerCacheOpenGL res_cache;
154 125
155 /// Shader used for test renderering - to be removed once we have emulated shaders 126 std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
156 MaxwellShader test_shader{};
157
158 const MaxwellShader* current_shader{};
159 bool shader_dirty{};
160
161 struct {
162 UniformData data;
163 bool dirty;
164 } uniform_block_data = {};
165
166 OGLPipeline pipeline;
167 OGLVertexArray sw_vao; 127 OGLVertexArray sw_vao;
168 OGLVertexArray hw_vao; 128 OGLVertexArray hw_vao;
169 std::array<bool, 16> hw_vao_enabled_attributes; 129 std::array<bool, 16> hw_vao_enabled_attributes;
170 130
171 std::array<SamplerInfo, 32> texture_samplers; 131 std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
172 static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; 132 static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
173 std::unique_ptr<OGLStreamBuffer> vertex_buffer; 133 std::unique_ptr<OGLStreamBuffer> vertex_buffer;
174 OGLBuffer uniform_buffer; 134 OGLBuffer uniform_buffer;
@@ -182,19 +142,9 @@ private:
182 void AnalyzeVertexArray(bool is_indexed); 142 void AnalyzeVertexArray(bool is_indexed);
183 void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); 143 void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
184 144
185 OGLBuffer vs_uniform_buffer; 145 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
186 std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map;
187 std::unordered_map<std::string, VertexShader> vs_shader_cache;
188 OGLShader vs_default_shader;
189
190 void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset);
191
192 OGLBuffer fs_uniform_buffer;
193 std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map;
194 std::unordered_map<std::string, FragmentShader> fs_shader_cache;
195 OGLShader fs_default_shader;
196 146
197 void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); 147 void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos);
198 148
199 enum class AccelDraw { Disabled, Arrays, Indexed }; 149 enum class AccelDraw { Disabled, Arrays, Indexed };
200 AccelDraw accelerate_draw; 150 AccelDraw accelerate_draw;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 5cbafa2e7..213b20a21 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -818,7 +818,7 @@ void main() {
818 color = texelFetch(tbo, tbo_offset).rabg; 818 color = texelFetch(tbo, tbo_offset).rabg;
819} 819}
820)"; 820)";
821 d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); 821 d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source);
822 822
823 OpenGLState state = OpenGLState::GetCurState(); 823 OpenGLState state = OpenGLState::GetCurState();
824 GLuint old_program = state.draw.shader_program; 824 GLuint old_program = state.draw.shader_program;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 06524fc59..e7ce506cf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -334,7 +334,7 @@ private:
334 OGLVertexArray attributeless_vao; 334 OGLVertexArray attributeless_vao;
335 OGLBuffer d24s8_abgr_buffer; 335 OGLBuffer d24s8_abgr_buffer;
336 GLsizeiptr d24s8_abgr_buffer_size; 336 GLsizeiptr d24s8_abgr_buffer_size;
337 OGLShader d24s8_abgr_shader; 337 OGLProgram d24s8_abgr_shader;
338 GLint d24s8_abgr_tbo_size_u_id; 338 GLint d24s8_abgr_tbo_size_u_id;
339 GLint d24s8_abgr_viewport_u_id; 339 GLint d24s8_abgr_viewport_u_id;
340}; 340};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 7da5e74d1..2f0e7ac1a 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -13,14 +13,16 @@
13class OGLTexture : private NonCopyable { 13class OGLTexture : private NonCopyable {
14public: 14public:
15 OGLTexture() = default; 15 OGLTexture() = default;
16 OGLTexture(OGLTexture&& o) { 16
17 std::swap(handle, o.handle); 17 OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {}
18 } 18
19 ~OGLTexture() { 19 ~OGLTexture() {
20 Release(); 20 Release();
21 } 21 }
22
22 OGLTexture& operator=(OGLTexture&& o) { 23 OGLTexture& operator=(OGLTexture&& o) {
23 std::swap(handle, o.handle); 24 Release();
25 handle = std::exchange(o.handle, 0);
24 return *this; 26 return *this;
25 } 27 }
26 28
@@ -46,14 +48,16 @@ public:
46class OGLSampler : private NonCopyable { 48class OGLSampler : private NonCopyable {
47public: 49public:
48 OGLSampler() = default; 50 OGLSampler() = default;
49 OGLSampler(OGLSampler&& o) { 51
50 std::swap(handle, o.handle); 52 OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {}
51 } 53
52 ~OGLSampler() { 54 ~OGLSampler() {
53 Release(); 55 Release();
54 } 56 }
57
55 OGLSampler& operator=(OGLSampler&& o) { 58 OGLSampler& operator=(OGLSampler&& o) {
56 std::swap(handle, o.handle); 59 Release();
60 handle = std::exchange(o.handle, 0);
57 return *this; 61 return *this;
58 } 62 }
59 63
@@ -79,25 +83,71 @@ public:
79class OGLShader : private NonCopyable { 83class OGLShader : private NonCopyable {
80public: 84public:
81 OGLShader() = default; 85 OGLShader() = default;
82 OGLShader(OGLShader&& o) { 86
83 std::swap(handle, o.handle); 87 OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {}
84 } 88
85 ~OGLShader() { 89 ~OGLShader() {
86 Release(); 90 Release();
87 } 91 }
92
88 OGLShader& operator=(OGLShader&& o) { 93 OGLShader& operator=(OGLShader&& o) {
89 std::swap(handle, o.handle); 94 Release();
95 handle = std::exchange(o.handle, 0);
90 return *this; 96 return *this;
91 } 97 }
92 98
93 /// Creates a new internal OpenGL resource and stores the handle 99 void Create(const char* source, GLenum type) {
94 void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, 100 if (handle != 0)
95 const std::vector<const char*>& feedback_vars = {}, 101 return;
96 bool separable_program = false) { 102 if (source == nullptr)
103 return;
104 handle = GLShader::LoadShader(source, type);
105 }
106
107 void Release() {
108 if (handle == 0)
109 return;
110 glDeleteShader(handle);
111 handle = 0;
112 }
113
114 GLuint handle = 0;
115};
116
117class OGLProgram : private NonCopyable {
118public:
119 OGLProgram() = default;
120
121 OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {}
122
123 ~OGLProgram() {
124 Release();
125 }
126
127 OGLProgram& operator=(OGLProgram&& o) {
128 Release();
129 handle = std::exchange(o.handle, 0);
130 return *this;
131 }
132
133 template <typename... T>
134 void Create(bool separable_program, T... shaders) {
97 if (handle != 0) 135 if (handle != 0)
98 return; 136 return;
99 handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, 137 handle = GLShader::LoadProgram(separable_program, shaders...);
100 separable_program); 138 }
139
140 /// Creates a new internal OpenGL resource and stores the handle
141 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
142 bool separable_program = false) {
143 OGLShader vert, geo, frag;
144 if (vert_shader)
145 vert.Create(vert_shader, GL_VERTEX_SHADER);
146 if (geo_shader)
147 geo.Create(geo_shader, GL_GEOMETRY_SHADER);
148 if (frag_shader)
149 frag.Create(frag_shader, GL_FRAGMENT_SHADER);
150 Create(separable_program, vert.handle, geo.handle, frag.handle);
101 } 151 }
102 152
103 /// Deletes the internal OpenGL resource 153 /// Deletes the internal OpenGL resource
@@ -148,14 +198,16 @@ public:
148class OGLBuffer : private NonCopyable { 198class OGLBuffer : private NonCopyable {
149public: 199public:
150 OGLBuffer() = default; 200 OGLBuffer() = default;
151 OGLBuffer(OGLBuffer&& o) { 201
152 std::swap(handle, o.handle); 202 OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {}
153 } 203
154 ~OGLBuffer() { 204 ~OGLBuffer() {
155 Release(); 205 Release();
156 } 206 }
207
157 OGLBuffer& operator=(OGLBuffer&& o) { 208 OGLBuffer& operator=(OGLBuffer&& o) {
158 std::swap(handle, o.handle); 209 Release();
210 handle = std::exchange(o.handle, 0);
159 return *this; 211 return *this;
160 } 212 }
161 213
@@ -214,14 +266,16 @@ public:
214class OGLVertexArray : private NonCopyable { 266class OGLVertexArray : private NonCopyable {
215public: 267public:
216 OGLVertexArray() = default; 268 OGLVertexArray() = default;
217 OGLVertexArray(OGLVertexArray&& o) { 269
218 std::swap(handle, o.handle); 270 OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {}
219 } 271
220 ~OGLVertexArray() { 272 ~OGLVertexArray() {
221 Release(); 273 Release();
222 } 274 }
275
223 OGLVertexArray& operator=(OGLVertexArray&& o) { 276 OGLVertexArray& operator=(OGLVertexArray&& o) {
224 std::swap(handle, o.handle); 277 Release();
278 handle = std::exchange(o.handle, 0);
225 return *this; 279 return *this;
226 } 280 }
227 281
@@ -247,14 +301,16 @@ public:
247class OGLFramebuffer : private NonCopyable { 301class OGLFramebuffer : private NonCopyable {
248public: 302public:
249 OGLFramebuffer() = default; 303 OGLFramebuffer() = default;
250 OGLFramebuffer(OGLFramebuffer&& o) { 304
251 std::swap(handle, o.handle); 305 OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {}
252 } 306
253 ~OGLFramebuffer() { 307 ~OGLFramebuffer() {
254 Release(); 308 Release();
255 } 309 }
310
256 OGLFramebuffer& operator=(OGLFramebuffer&& o) { 311 OGLFramebuffer& operator=(OGLFramebuffer&& o) {
257 std::swap(handle, o.handle); 312 Release();
313 handle = std::exchange(o.handle, 0);
258 return *this; 314 return *this;
259 } 315 }
260 316
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 564ea8f9e..1290fa4cd 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2,57 +2,499 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <map>
6#include <set>
5#include <string> 7#include <string>
6#include <queue>
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h"
9#include "video_core/renderer_opengl/gl_shader_decompiler.h" 11#include "video_core/renderer_opengl/gl_shader_decompiler.h"
10 12
11namespace Maxwell3D { 13namespace GLShader {
12namespace Shader {
13namespace Decompiler { 14namespace Decompiler {
14 15
16using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register;
20using Tegra::Shader::SubOp;
21using Tegra::Shader::Uniform;
22
15constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; 23constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
16 24
17class Impl { 25class DecompileFail : public std::runtime_error {
18public: 26public:
19 Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, 27 using std::runtime_error::runtime_error;
20 const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, 28};
21 const std::function<std::string(u32)>& inputreg_getter, 29
22 const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, 30/// Describes the behaviour of code path of a given entry point and a return point.
23 const std::string& emit_cb, const std::string& setemit_cb) 31enum class ExitMethod {
24 : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), 32 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
25 inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), 33 AlwaysReturn, ///< All code paths reach the return point.
26 sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} 34 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
27 35 AlwaysEnd, ///< All code paths reach a END instruction.
28 std::string Decompile() { 36};
29 UNREACHABLE(); 37
30 return {}; 38/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
39struct Subroutine {
40 /// Generates a name suitable for GLSL source code.
41 std::string GetName() const {
42 return "sub_" + std::to_string(begin) + "_" + std::to_string(end);
43 }
44
45 u32 begin; ///< Entry point of the subroutine.
46 u32 end; ///< Return point of the subroutine.
47 ExitMethod exit_method; ///< Exit method of the subroutine.
48 std::set<u32> labels; ///< Addresses refereced by JMP instructions.
49
50 bool operator<(const Subroutine& rhs) const {
51 return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
52 }
53};
54
55/// Analyzes shader code and produces a set of subroutines.
56class ControlFlowAnalyzer {
57public:
58 ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
59 : program_code(program_code) {
60
61 // Recursively finds all subroutines.
62 const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
63 if (program_main.exit_method != ExitMethod::AlwaysEnd)
64 throw DecompileFail("Program does not always end");
65 }
66
67 std::set<Subroutine> GetSubroutines() {
68 return std::move(subroutines);
31 } 69 }
32 70
33private: 71private:
34 const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; 72 const ProgramCode& program_code;
35 const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; 73 std::set<Subroutine> subroutines;
36 u32 main_offset; 74 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
37 const std::function<std::string(u32)>& inputreg_getter; 75
38 const std::function<std::string(u32)>& outputreg_getter; 76 /// Adds and analyzes a new subroutine if it is not added yet.
39 bool sanitize_mul; 77 const Subroutine& AddSubroutine(u32 begin, u32 end) {
40 const std::string& emit_cb; 78 auto iter = subroutines.find(Subroutine{begin, end});
41 const std::string& setemit_cb; 79 if (iter != subroutines.end())
80 return *iter;
81
82 Subroutine subroutine{begin, end};
83 subroutine.exit_method = Scan(begin, end, subroutine.labels);
84 if (subroutine.exit_method == ExitMethod::Undetermined)
85 throw DecompileFail("Recursive function detected");
86 return *subroutines.insert(std::move(subroutine)).first;
87 }
88
89 /// Scans a range of code for labels and determines the exit method.
90 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
91 auto [iter, inserted] =
92 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
93 ExitMethod& exit_method = iter->second;
94 if (!inserted)
95 return exit_method;
96
97 for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
98 const Instruction instr = {program_code[offset]};
99 switch (instr.opcode.EffectiveOpCode()) {
100 case OpCode::Id::EXIT: {
101 return exit_method = ExitMethod::AlwaysEnd;
102 }
103 }
104 }
105 return exit_method = ExitMethod::AlwaysReturn;
106 }
42}; 107};
43 108
44std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, 109class ShaderWriter {
45 const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, 110public:
46 u32 main_offset, 111 void AddLine(const std::string& text) {
47 const std::function<std::string(u32)>& inputreg_getter, 112 DEBUG_ASSERT(scope >= 0);
48 const std::function<std::string(u32)>& outputreg_getter, 113 if (!text.empty()) {
49 bool sanitize_mul, const std::string& emit_cb, 114 shader_source += std::string(static_cast<size_t>(scope) * 4, ' ');
50 const std::string& setemit_cb) { 115 }
51 Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, 116 shader_source += text + '\n';
52 sanitize_mul, emit_cb, setemit_cb); 117 }
53 return impl.Decompile(); 118
119 std::string GetResult() {
120 return std::move(shader_source);
121 }
122
123 int scope = 0;
124
125private:
126 std::string shader_source;
127};
128
129class GLSLGenerator {
130public:
131 GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
132 u32 main_offset, Maxwell3D::Regs::ShaderStage stage)
133 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
134 stage(stage) {
135
136 Generate();
137 }
138
139 std::string GetShaderCode() {
140 return declarations.GetResult() + shader.GetResult();
141 }
142
143private:
144 /// Gets the Subroutine object corresponding to the specified address.
145 const Subroutine& GetSubroutine(u32 begin, u32 end) const {
146 auto iter = subroutines.find(Subroutine{begin, end});
147 ASSERT(iter != subroutines.end());
148 return *iter;
149 }
150
151 /// Generates code representing an input attribute register.
152 std::string GetInputAttribute(Attribute::Index attribute) {
153 declr_input_attribute.insert(attribute);
154
155 const u32 index{static_cast<u32>(attribute) -
156 static_cast<u32>(Attribute::Index::Attribute_0)};
157 if (attribute >= Attribute::Index::Attribute_0) {
158 return "input_attribute_" + std::to_string(index);
159 }
160
161 LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
162 UNREACHABLE();
163 }
164
165 /// Generates code representing an output attribute register.
166 std::string GetOutputAttribute(Attribute::Index attribute) {
167 switch (attribute) {
168 case Attribute::Index::Position:
169 return "gl_Position";
170 default:
171 const u32 index{static_cast<u32>(attribute) -
172 static_cast<u32>(Attribute::Index::Attribute_0)};
173 if (attribute >= Attribute::Index::Attribute_0) {
174 declr_output_attribute.insert(attribute);
175 return "output_attribute_" + std::to_string(index);
176 }
177
178 LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index);
179 UNREACHABLE();
180 }
181 }
182
183 /// Generates code representing a temporary (GPR) register.
184 std::string GetRegister(const Register& reg) {
185 return *declr_register.insert("register_" + std::to_string(reg)).first;
186 }
187
188 /// Generates code representing a uniform (C buffer) register.
189 std::string GetUniform(const Uniform& reg) const {
190 std::string index = std::to_string(reg.index);
191 return "uniform_" + index + "[" + std::to_string(reg.offset >> 2) + "][" +
192 std::to_string(reg.offset & 3) + "]";
193 }
194
195 /**
196 * Adds code that calls a subroutine.
197 * @param subroutine the subroutine to call.
198 */
199 void CallSubroutine(const Subroutine& subroutine) {
200 if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
201 shader.AddLine(subroutine.GetName() + "();");
202 shader.AddLine("return true;");
203 } else if (subroutine.exit_method == ExitMethod::Conditional) {
204 shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }");
205 } else {
206 shader.AddLine(subroutine.GetName() + "();");
207 }
208 }
209
210 /**
211 * Writes code that does an assignment operation.
212 * @param reg the destination register code.
213 * @param value the code representing the value to assign.
214 */
215 void SetDest(u64 elem, const std::string& reg, const std::string& value,
216 u64 dest_num_components, u64 value_num_components) {
217 std::string swizzle = ".";
218 swizzle += "xyzw"[elem];
219
220 std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
221 std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
222
223 shader.AddLine(dest + " = " + src + ";");
224 }
225
226 /**
227 * Compiles a single instruction from Tegra to GLSL.
228 * @param offset the offset of the Tegra shader instruction.
229 * @return the offset of the next instruction to execute. Usually it is the current offset
230 * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
231 */
232 u32 CompileInstr(u32 offset) {
233 const Instruction instr = {program_code[offset]};
234
235 shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name);
236
237 switch (OpCode::GetInfo(instr.opcode).type) {
238 case OpCode::Type::Arithmetic: {
239 ASSERT(!instr.alu.abs_d);
240
241 std::string dest = GetRegister(instr.gpr0);
242 std::string op_a = instr.alu.negate_a ? "-" : "";
243 op_a += GetRegister(instr.gpr8);
244 if (instr.alu.abs_a) {
245 op_a = "abs(" + op_a + ")";
246 }
247
248 std::string op_b = instr.alu.negate_b ? "-" : "";
249 if (instr.is_b_gpr) {
250 op_b += GetRegister(instr.gpr20);
251 } else {
252 op_b += GetUniform(instr.uniform);
253 }
254 if (instr.alu.abs_b) {
255 op_b = "abs(" + op_b + ")";
256 }
257
258 switch (instr.opcode.EffectiveOpCode()) {
259 case OpCode::Id::FMUL_C:
260 case OpCode::Id::FMUL_R: {
261 SetDest(0, dest, op_a + " * " + op_b, 1, 1);
262 break;
263 }
264 case OpCode::Id::FADD_C:
265 case OpCode::Id::FADD_R: {
266 SetDest(0, dest, op_a + " + " + op_b, 1, 1);
267 break;
268 }
269 default: {
270 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
271 static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
272 OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
273 throw DecompileFail("Unhandled instruction");
274 break;
275 }
276 }
277 break;
278 }
279 case OpCode::Type::Ffma: {
280 ASSERT_MSG(!instr.ffma.negate_b, "untested");
281 ASSERT_MSG(!instr.ffma.negate_c, "untested");
282
283 std::string dest = GetRegister(instr.gpr0);
284 std::string op_a = GetRegister(instr.gpr8);
285
286 std::string op_b = instr.ffma.negate_b ? "-" : "";
287 op_b += GetUniform(instr.uniform);
288
289 std::string op_c = instr.ffma.negate_c ? "-" : "";
290 op_c += GetRegister(instr.gpr39);
291
292 switch (instr.opcode.EffectiveOpCode()) {
293 case OpCode::Id::FFMA_CR: {
294 SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
295 break;
296 }
297
298 default: {
299 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x",
300 static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
301 OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
302 throw DecompileFail("Unhandled instruction");
303 break;
304 }
305 }
306 break;
307 }
308 case OpCode::Type::Memory: {
309 std::string gpr0 = GetRegister(instr.gpr0);
310 const Attribute::Index attribute = instr.attribute.fmt20.index;
311
312 switch (instr.opcode.EffectiveOpCode()) {
313 case OpCode::Id::LD_A: {
314 ASSERT(instr.attribute.fmt20.size == 0);
315 SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
316 break;
317 }
318 case OpCode::Id::ST_A: {
319 ASSERT(instr.attribute.fmt20.size == 0);
320 SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
321 break;
322 }
323 default: {
324 LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x",
325 static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
326 OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
327 throw DecompileFail("Unhandled instruction");
328 break;
329 }
330 }
331 break;
332 }
333
334 default: {
335 switch (instr.opcode.EffectiveOpCode()) {
336 case OpCode::Id::EXIT: {
337 shader.AddLine("return true;");
338 offset = PROGRAM_END - 1;
339 break;
340 }
341
342 default: {
343 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
344 static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
345 OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
346 throw DecompileFail("Unhandled instruction");
347 break;
348 }
349 }
350
351 break;
352 }
353 }
354
355 return offset + 1;
356 }
357
358 /**
359 * Compiles a range of instructions from Tegra to GLSL.
360 * @param begin the offset of the starting instruction.
361 * @param end the offset where the compilation should stop (exclusive).
362 * @return the offset of the next instruction to compile. PROGRAM_END if the program
363 * terminates.
364 */
365 u32 CompileRange(u32 begin, u32 end) {
366 u32 program_counter;
367 for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
368 program_counter = CompileInstr(program_counter);
369 }
370 return program_counter;
371 }
372
373 void Generate() {
374 // Add declarations for all subroutines
375 for (const auto& subroutine : subroutines) {
376 shader.AddLine("bool " + subroutine.GetName() + "();");
377 }
378 shader.AddLine("");
379
380 // Add the main entry point
381 shader.AddLine("bool exec_shader() {");
382 ++shader.scope;
383 CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
384 --shader.scope;
385 shader.AddLine("}\n");
386
387 // Add definitions for all subroutines
388 for (const auto& subroutine : subroutines) {
389 std::set<u32> labels = subroutine.labels;
390
391 shader.AddLine("bool " + subroutine.GetName() + "() {");
392 ++shader.scope;
393
394 if (labels.empty()) {
395 if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
396 shader.AddLine("return false;");
397 }
398 } else {
399 labels.insert(subroutine.begin);
400 shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
401 shader.AddLine("while (true) {");
402 ++shader.scope;
403
404 shader.AddLine("switch (jmp_to) {");
405
406 for (auto label : labels) {
407 shader.AddLine("case " + std::to_string(label) + "u: {");
408 ++shader.scope;
409
410 auto next_it = labels.lower_bound(label + 1);
411 u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
412
413 u32 compile_end = CompileRange(label, next_label);
414 if (compile_end > next_label && compile_end != PROGRAM_END) {
415 // This happens only when there is a label inside a IF/LOOP block
416 shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }");
417 labels.emplace(compile_end);
418 }
419
420 --shader.scope;
421 shader.AddLine("}");
422 }
423
424 shader.AddLine("default: return false;");
425 shader.AddLine("}");
426
427 --shader.scope;
428 shader.AddLine("}");
429
430 shader.AddLine("return false;");
431 }
432
433 --shader.scope;
434 shader.AddLine("}\n");
435
436 DEBUG_ASSERT(shader.scope == 0);
437 }
438
439 GenerateDeclarations();
440 }
441
442 /// Add declarations for registers
443 void GenerateDeclarations() {
444 for (const auto& reg : declr_register) {
445 declarations.AddLine("float " + reg + " = 0.0;");
446 }
447 declarations.AddLine("");
448
449 for (const auto& index : declr_input_attribute) {
450 // TODO(bunnei): Use proper number of elements for these
451 declarations.AddLine("layout(location = " +
452 std::to_string(static_cast<u32>(index) -
453 static_cast<u32>(Attribute::Index::Attribute_0)) +
454 ") in vec4 " + GetInputAttribute(index) + ";");
455 }
456 declarations.AddLine("");
457
458 for (const auto& index : declr_output_attribute) {
459 // TODO(bunnei): Use proper number of elements for these
460 declarations.AddLine("layout(location = " +
461 std::to_string(static_cast<u32>(index) -
462 static_cast<u32>(Attribute::Index::Attribute_0)) +
463 ") out vec4 " + GetOutputAttribute(index) + ";");
464 }
465 declarations.AddLine("");
466 }
467
468private:
469 const std::set<Subroutine>& subroutines;
470 const ProgramCode& program_code;
471 const u32 main_offset;
472 Maxwell3D::Regs::ShaderStage stage;
473
474 ShaderWriter shader;
475 ShaderWriter declarations;
476
477 // Declarations
478 std::set<std::string> declr_register;
479 std::set<Attribute::Index> declr_input_attribute;
480 std::set<Attribute::Index> declr_output_attribute;
481}; // namespace Decompiler
482
483std::string GetCommonDeclarations() {
484 return "bool exec_shader();";
485}
486
487boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
488 Maxwell3D::Regs::ShaderStage stage) {
489 try {
490 auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines();
491 GLSLGenerator generator(subroutines, program_code, main_offset, stage);
492 return generator.GetShaderCode();
493 } catch (const DecompileFail& exception) {
494 LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what());
495 }
496 return boost::none;
54} 497}
55 498
56} // namespace Decompiler 499} // namespace Decompiler
57} // namespace Shader 500} // namespace GLShader
58} // namespace Maxwell3D
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 02ebfcbe8..2f4047d87 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,23 +5,20 @@
5#include <array> 5#include <array>
6#include <functional> 6#include <functional>
7#include <string> 7#include <string>
8#include <boost/optional.hpp>
8#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_opengl/gl_shader_gen.h"
9 12
10namespace Maxwell3D { 13namespace GLShader {
11namespace Shader {
12namespace Decompiler { 14namespace Decompiler {
13 15
14constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; 16using Tegra::Engines::Maxwell3D;
15constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000};
16 17
17std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, 18std::string GetCommonDeclarations();
18 const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, 19
19 u32 main_offset, 20boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
20 const std::function<std::string(u32)>& inputreg_getter, 21 Maxwell3D::Regs::ShaderStage stage);
21 const std::function<std::string(u32)>& outputreg_getter,
22 bool sanitize_mul, const std::string& emit_cb = "",
23 const std::string& setemit_cb = "");
24 22
25} // namespace Decompiler 23} // namespace Decompiler
26} // namespace Shader 24} // namespace GLShader
27} // namespace Maxwell3D
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 8f3c98800..524c2cfb5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -7,12 +7,12 @@
7 7
8namespace GLShader { 8namespace GLShader {
9 9
10std::string GenerateVertexShader(const MaxwellVSConfig& config) { 10std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) {
11 UNREACHABLE(); 11 UNREACHABLE();
12 return {}; 12 return {};
13} 13}
14 14
15std::string GenerateFragmentShader(const MaxwellFSConfig& config) { 15std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) {
16 UNREACHABLE(); 16 UNREACHABLE();
17 return {}; 17 return {};
18} 18}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 5101e7d30..925e66ee4 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,46 +4,67 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstring> 7#include <array>
8#include <string> 8#include <string>
9#include <type_traits> 9#include <type_traits>
10#include "common/common_types.h"
10#include "common/hash.h" 11#include "common/hash.h"
11 12
12namespace GLShader { 13namespace GLShader {
13 14
14enum Attributes { 15constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
15 ATTRIBUTE_POSITION,
16 ATTRIBUTE_COLOR,
17 ATTRIBUTE_TEXCOORD0,
18 ATTRIBUTE_TEXCOORD1,
19 ATTRIBUTE_TEXCOORD2,
20 ATTRIBUTE_TEXCOORD0_W,
21 ATTRIBUTE_NORMQUAT,
22 ATTRIBUTE_VIEW,
23};
24 16
25struct MaxwellShaderConfigCommon { 17using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>;
26 explicit MaxwellShaderConfigCommon(){}; 18
19struct ShaderSetup {
20 ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {}
21
22 ProgramCode program_code;
23 bool program_code_hash_dirty = true;
24
25 u64 GetProgramCodeHash() {
26 if (program_code_hash_dirty) {
27 program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
28 program_code_hash_dirty = false;
29 }
30 return program_code_hash;
31 }
32
33private:
34 u64 program_code_hash{};
27}; 35};
28 36
29struct MaxwellVSConfig : MaxwellShaderConfigCommon { 37struct MaxwellShaderConfigCommon {
30 explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} 38 void Init(ShaderSetup& setup) {
39 program_hash = setup.GetProgramCodeHash();
40 }
31 41
32 bool operator==(const MaxwellVSConfig& o) const { 42 u64 program_hash;
33 return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0;
34 };
35}; 43};
36 44
37struct MaxwellFSConfig : MaxwellShaderConfigCommon { 45struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
38 explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} 46 explicit MaxwellVSConfig(ShaderSetup& setup) {
47 state.Init(setup);
48 }
49};
39 50
40 bool operator==(const MaxwellFSConfig& o) const { 51struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
41 return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; 52 explicit MaxwellFSConfig(ShaderSetup& setup) {
42 }; 53 state.Init(setup);
54 }
43}; 55};
44 56
45std::string GenerateVertexShader(const MaxwellVSConfig& config); 57/**
46std::string GenerateFragmentShader(const MaxwellFSConfig& config); 58 * Generates the GLSL vertex shader program source code for the given VS program
59 * @returns String of the shader source code
60 */
61std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config);
62
63/**
64 * Generates the GLSL fragment shader program source code for the given FS program
65 * @returns String of the shader source code
66 */
67std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config);
47 68
48} // namespace GLShader 69} // namespace GLShader
49 70
@@ -52,14 +73,14 @@ namespace std {
52template <> 73template <>
53struct hash<GLShader::MaxwellVSConfig> { 74struct hash<GLShader::MaxwellVSConfig> {
54 size_t operator()(const GLShader::MaxwellVSConfig& k) const { 75 size_t operator()(const GLShader::MaxwellVSConfig& k) const {
55 return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); 76 return k.Hash();
56 } 77 }
57}; 78};
58 79
59template <> 80template <>
60struct hash<GLShader::MaxwellFSConfig> { 81struct hash<GLShader::MaxwellFSConfig> {
61 size_t operator()(const GLShader::MaxwellFSConfig& k) const { 82 size_t operator()(const GLShader::MaxwellFSConfig& k) const {
62 return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); 83 return k.Hash();
63 } 84 }
64}; 85};
65 86
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
new file mode 100644
index 000000000..7fceedce8
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -0,0 +1,65 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/hle/kernel/process.h"
7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/renderer_opengl/gl_shader_manager.h"
9
10namespace GLShader {
11
12namespace Impl {
13void SetShaderUniformBlockBinding(GLuint shader, const char* name,
14 Maxwell3D::Regs::ShaderStage binding, size_t expected_size) {
15 GLuint ub_index = glGetUniformBlockIndex(shader, name);
16 if (ub_index != GL_INVALID_INDEX) {
17 GLint ub_size = 0;
18 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
19 ASSERT_MSG(ub_size == expected_size,
20 "Uniform block size did not match! Got %d, expected %zu",
21 static_cast<int>(ub_size), expected_size);
22 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
23 }
24}
25
26void SetShaderUniformBlockBindings(GLuint shader) {
27 SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex,
28 sizeof(MaxwellUniformData));
29 SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry,
30 sizeof(MaxwellUniformData));
31 SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment,
32 sizeof(MaxwellUniformData));
33}
34
35void SetShaderSamplerBindings(GLuint shader) {
36 OpenGLState cur_state = OpenGLState::GetCurState();
37 GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
38 cur_state.Apply();
39
40 // Set the texture samplers to correspond to different texture units
41 for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
42 // Set the texture samplers to correspond to different texture units
43 std::string uniform_name = "tex[" + std::to_string(texture) + "]";
44 GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
45 if (uniform_tex != -1) {
46 glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
47 }
48 }
49
50 cur_state.draw.shader_program = old_program;
51 cur_state.Apply();
52}
53
54} // namespace Impl
55
56void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
57 const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
58 for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) {
59 const auto& const_buffer = shader_stage.const_buffers[index];
60 const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address);
61 Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer));
62 }
63}
64
65} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
new file mode 100644
index 000000000..5c8560cf5
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -0,0 +1,151 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <tuple>
8#include <unordered_map>
9#include <boost/functional/hash.hpp>
10#include <glad/glad.h>
11#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/gl_shader_gen.h"
13#include "video_core/renderer_opengl/maxwell_to_gl.h"
14
15namespace GLShader {
16
17/// Number of OpenGL texture samplers that can be used in the fragment shader
18static constexpr size_t NumTextureSamplers = 32;
19
20using Tegra::Engines::Maxwell3D;
21
22namespace Impl {
23void SetShaderUniformBlockBindings(GLuint shader);
24void SetShaderSamplerBindings(GLuint shader);
25} // namespace Impl
26
27/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
28// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
29// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
30// Not following that rule will cause problems on some AMD drivers.
31struct MaxwellUniformData {
32 void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
33
34 using ConstBuffer = std::array<GLvec4, 4>;
35 alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers;
36};
37static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect");
38static_assert(sizeof(MaxwellUniformData) < 16384,
39 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
40
41class OGLShaderStage {
42public:
43 OGLShaderStage() = default;
44
45 void Create(const char* source, GLenum type) {
46 OGLShader shader;
47 shader.Create(source, type);
48 program.Create(true, shader.handle);
49 Impl::SetShaderUniformBlockBindings(program.handle);
50 Impl::SetShaderSamplerBindings(program.handle);
51 }
52 GLuint GetHandle() const {
53 return program.handle;
54 }
55
56private:
57 OGLProgram program;
58};
59
60// TODO(wwylele): beautify this doc
61// This is a shader cache designed for translating PICA shader to GLSL shader.
62// The double cache is needed because diffent KeyConfigType, which includes a hash of the code
63// region (including its leftover unused code) can generate the same GLSL code.
64template <typename KeyConfigType,
65 std::string (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), GLenum ShaderType>
66class ShaderCache {
67public:
68 ShaderCache() = default;
69
70 GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) {
71 auto map_it = shader_map.find(key);
72 if (map_it == shader_map.end()) {
73 std::string program = CodeGenerator(setup, key);
74
75 auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{});
76 OGLShaderStage& cached_shader = iter->second;
77 if (new_shader) {
78 cached_shader.Create(program.c_str(), ShaderType);
79 }
80 shader_map[key] = &cached_shader;
81 return cached_shader.GetHandle();
82 } else {
83 return map_it->second->GetHandle();
84 }
85 }
86
87private:
88 std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
89 std::unordered_map<std::string, OGLShaderStage> shader_cache;
90};
91
92using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
93
94using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
95
96class ProgramManager {
97public:
98 ProgramManager() {
99 pipeline.Create();
100 }
101
102 void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) {
103 current.vs = vertex_shaders.Get(config, setup);
104 }
105
106 void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) {
107 current.fs = fragment_shaders.Get(config, setup);
108 }
109
110 void UseTrivialGeometryShader() {
111 current.gs = 0;
112 }
113
114 void ApplyTo(OpenGLState& state) {
115 // Workaround for AMD bug
116 glUseProgramStages(pipeline.handle,
117 GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
118 0);
119
120 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs);
121 glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs);
122 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs);
123 state.draw.shader_program = 0;
124 state.draw.program_pipeline = pipeline.handle;
125 }
126
127private:
128 struct ShaderTuple {
129 GLuint vs = 0, gs = 0, fs = 0;
130 bool operator==(const ShaderTuple& rhs) const {
131 return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
132 }
133 struct Hash {
134 std::size_t operator()(const ShaderTuple& tuple) const {
135 std::size_t hash = 0;
136 boost::hash_combine(hash, tuple.vs);
137 boost::hash_combine(hash, tuple.gs);
138 boost::hash_combine(hash, tuple.fs);
139 return hash;
140 }
141 };
142 };
143 ShaderTuple current;
144 VertexShaders vertex_shaders;
145 FragmentShaders fragment_shaders;
146
147 std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
148 OGLPipeline pipeline;
149};
150
151} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index a6c6204d5..8568fface 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,156 +10,41 @@
10 10
11namespace GLShader { 11namespace GLShader {
12 12
13GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, 13GLuint LoadShader(const char* source, GLenum type) {
14 const char* fragment_shader, const std::vector<const char*>& feedback_vars, 14 const char* debug_type;
15 bool separable_program) { 15 switch (type) {
16 // Create the shaders 16 case GL_VERTEX_SHADER:
17 GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; 17 debug_type = "vertex";
18 GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; 18 break;
19 GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; 19 case GL_GEOMETRY_SHADER:
20 debug_type = "geometry";
21 break;
22 case GL_FRAGMENT_SHADER:
23 debug_type = "fragment";
24 break;
25 default:
26 UNREACHABLE();
27 }
28 GLuint shader_id = glCreateShader(type);
29 glShaderSource(shader_id, 1, &source, nullptr);
30 NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
31 glCompileShader(shader_id);
20 32
21 GLint result = GL_FALSE; 33 GLint result = GL_FALSE;
22 int info_log_length; 34 GLint info_log_length;
23 35 glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
24 if (vertex_shader) { 36 glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
25 // Compile Vertex Shader
26 LOG_DEBUG(Render_OpenGL, "Compiling vertex shader...");
27
28 glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr);
29 glCompileShader(vertex_shader_id);
30
31 // Check Vertex Shader
32 glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result);
33 glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
34
35 if (info_log_length > 1) {
36 std::vector<char> vertex_shader_error(info_log_length);
37 glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]);
38 if (result == GL_TRUE) {
39 LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
40 } else {
41 LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s",
42 &vertex_shader_error[0]);
43 }
44 }
45 }
46
47 if (geometry_shader) {
48 // Compile Geometry Shader
49 LOG_DEBUG(Render_OpenGL, "Compiling geometry shader...");
50
51 glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr);
52 glCompileShader(geometry_shader_id);
53
54 // Check Geometry Shader
55 glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result);
56 glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
57
58 if (info_log_length > 1) {
59 std::vector<char> geometry_shader_error(info_log_length);
60 glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr,
61 &geometry_shader_error[0]);
62 if (result == GL_TRUE) {
63 LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]);
64 } else {
65 LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s",
66 &geometry_shader_error[0]);
67 }
68 }
69 }
70
71 if (fragment_shader) {
72 // Compile Fragment Shader
73 LOG_DEBUG(Render_OpenGL, "Compiling fragment shader...");
74
75 glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr);
76 glCompileShader(fragment_shader_id);
77
78 // Check Fragment Shader
79 glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result);
80 glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
81
82 if (info_log_length > 1) {
83 std::vector<char> fragment_shader_error(info_log_length);
84 glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr,
85 &fragment_shader_error[0]);
86 if (result == GL_TRUE) {
87 LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
88 } else {
89 LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s",
90 &fragment_shader_error[0]);
91 }
92 }
93 }
94
95 // Link the program
96 LOG_DEBUG(Render_OpenGL, "Linking program...");
97
98 GLuint program_id = glCreateProgram();
99 if (vertex_shader) {
100 glAttachShader(program_id, vertex_shader_id);
101 }
102 if (geometry_shader) {
103 glAttachShader(program_id, geometry_shader_id);
104 }
105 if (fragment_shader) {
106 glAttachShader(program_id, fragment_shader_id);
107 }
108
109 if (!feedback_vars.empty()) {
110 auto varyings = feedback_vars;
111 glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()),
112 &varyings[0], GL_INTERLEAVED_ATTRIBS);
113 }
114
115 if (separable_program) {
116 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
117 }
118
119 glLinkProgram(program_id);
120
121 // Check the program
122 glGetProgramiv(program_id, GL_LINK_STATUS, &result);
123 glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
124 37
125 if (info_log_length > 1) { 38 if (info_log_length > 1) {
126 std::vector<char> program_error(info_log_length); 39 std::string shader_error(info_log_length, ' ');
127 glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); 40 glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
128 if (result == GL_TRUE) { 41 if (result == GL_TRUE) {
129 LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); 42 NGLOG_DEBUG(Render_OpenGL, "{}", shader_error);
130 } else { 43 } else {
131 LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); 44 NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
132 } 45 }
133 } 46 }
134 47 return shader_id;
135 // If the program linking failed at least one of the shaders was probably bad
136 if (result == GL_FALSE) {
137 if (vertex_shader) {
138 LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
139 }
140 if (geometry_shader) {
141 LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader);
142 }
143 if (fragment_shader) {
144 LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
145 }
146 }
147 ASSERT_MSG(result == GL_TRUE, "Shader not linked");
148
149 if (vertex_shader) {
150 glDetachShader(program_id, vertex_shader_id);
151 glDeleteShader(vertex_shader_id);
152 }
153 if (geometry_shader) {
154 glDetachShader(program_id, geometry_shader_id);
155 glDeleteShader(geometry_shader_id);
156 }
157 if (fragment_shader) {
158 glDetachShader(program_id, fragment_shader_id);
159 glDeleteShader(fragment_shader_id);
160 }
161
162 return program_id;
163} 48}
164 49
165} // namespace GLShader 50} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index fc7b5e080..a1fa9e814 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -6,18 +6,60 @@
6 6
7#include <vector> 7#include <vector>
8#include <glad/glad.h> 8#include <glad/glad.h>
9#include "common/assert.h"
10#include "common/logging/log.h"
9 11
10namespace GLShader { 12namespace GLShader {
11 13
12/** 14/**
15 * Utility function to create and compile an OpenGL GLSL shader
16 * @param source String of the GLSL shader program
17 * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
18 */
19GLuint LoadShader(const char* source, GLenum type);
20
21/**
13 * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) 22 * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
14 * @param vertex_shader String of the GLSL vertex shader program 23 * @param separable_program whether to create a separable program
15 * @param geometry_shader String of the GLSL geometry shader program 24 * @param shaders ID of shaders to attach to the program
16 * @param fragment_shader String of the GLSL fragment shader program 25 * @returns Handle of the newly created OpenGL program object
17 * @returns Handle of the newly created OpenGL shader object
18 */ 26 */
19GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, 27template <typename... T>
20 const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, 28GLuint LoadProgram(bool separable_program, T... shaders) {
21 bool separable_program = false); 29 // Link the program
30 NGLOG_DEBUG(Render_OpenGL, "Linking program...");
31
32 GLuint program_id = glCreateProgram();
33
34 ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
35
36 if (separable_program) {
37 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
38 }
39
40 glLinkProgram(program_id);
41
42 // Check the program
43 GLint result = GL_FALSE;
44 GLint info_log_length;
45 glGetProgramiv(program_id, GL_LINK_STATUS, &result);
46 glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
47
48 if (info_log_length > 1) {
49 std::string program_error(info_log_length, ' ');
50 glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
51 if (result == GL_TRUE) {
52 NGLOG_DEBUG(Render_OpenGL, "{}", program_error);
53 } else {
54 NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
55 }
56 }
57
58 ASSERT_MSG(result == GL_TRUE, "Shader not linked");
59
60 ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
61
62 return program_id;
63}
22 64
23} // namespace GLShader 65} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 48ee80125..7909dcfc3 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -10,6 +10,14 @@
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
12 12
13using GLvec2 = std::array<GLfloat, 2>;
14using GLvec3 = std::array<GLfloat, 3>;
15using GLvec4 = std::array<GLfloat, 4>;
16
17using GLuvec2 = std::array<GLuint, 2>;
18using GLuvec3 = std::array<GLuint, 3>;
19using GLuvec4 = std::array<GLuint, 4>;
20
13namespace MaxwellToGL { 21namespace MaxwellToGL {
14 22
15using Maxwell = Tegra::Engines::Maxwell3D::Regs; 23using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -39,6 +47,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
39 47
40inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { 48inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
41 switch (topology) { 49 switch (topology) {
50 case Maxwell::PrimitiveTopology::Triangles:
51 return GL_TRIANGLES;
42 case Maxwell::PrimitiveTopology::TriangleStrip: 52 case Maxwell::PrimitiveTopology::TriangleStrip:
43 return GL_TRIANGLE_STRIP; 53 return GL_TRIANGLE_STRIP;
44 } 54 }
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 78b50b227..5e78723a2 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -57,7 +57,7 @@ uniform sampler2D color_texture;
57void main() { 57void main() {
58 // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to 58 // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
59 // support more framebuffer pixel formats. 59 // support more framebuffer pixel formats.
60 color = texture(color_texture, frag_tex_coord).abgr; 60 color = texture(color_texture, frag_tex_coord);
61} 61}
62)"; 62)";
63 63
@@ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() {
210 0.0f); 210 0.0f);
211 211
212 // Link shaders and get variable locations 212 // Link shaders and get variable locations
213 shader.Create(vertex_shader, nullptr, fragment_shader); 213 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
214 state.draw.shader_program = shader.handle; 214 state.draw.shader_program = shader.handle;
215 state.Apply(); 215 state.Apply();
216 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); 216 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
@@ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
311 } 311 }
312 312
313 std::array<ScreenRectVertex, 4> vertices = {{ 313 std::array<ScreenRectVertex, 4> vertices = {{
314 ScreenRectVertex(x, y, texcoords.top, right), 314 ScreenRectVertex(x, y, texcoords.top, left),
315 ScreenRectVertex(x + w, y, texcoords.bottom, right), 315 ScreenRectVertex(x + w, y, texcoords.bottom, left),
316 ScreenRectVertex(x, y + h, texcoords.top, left), 316 ScreenRectVertex(x, y + h, texcoords.top, right),
317 ScreenRectVertex(x + w, y + h, texcoords.bottom, left), 317 ScreenRectVertex(x + w, y + h, texcoords.bottom, right),
318 }}; 318 }};
319 319
320 state.texture_units[0].texture_2d = screen_info.display_texture; 320 state.texture_units[0].texture_2d = screen_info.display_texture;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c52f40037..2cc6d9a00 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -72,7 +72,7 @@ private:
72 // OpenGL object IDs 72 // OpenGL object IDs
73 OGLVertexArray vertex_array; 73 OGLVertexArray vertex_array;
74 OGLBuffer vertex_buffer; 74 OGLBuffer vertex_buffer;
75 OGLShader shader; 75 OGLProgram shader;
76 76
77 /// Display information for Switch screen 77 /// Display information for Switch screen
78 ScreenInfo screen_info; 78 ScreenInfo screen_info;
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index be0f7e22b..e0a14d48f 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe
151 const u32 coarse_y = y & ~127; 151 const u32 coarse_y = y & ~127;
152 u32 morton_offset = 152 u32 morton_offset =
153 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; 153 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
154 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; 154 u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
155 155
156 data_ptrs[morton_to_gl] = morton_data + morton_offset; 156 data_ptrs[morton_to_gl] = morton_data + morton_offset;
157 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; 157 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];