diff options
| author | 2018-04-14 20:40:39 -0400 | |
|---|---|---|
| committer | 2018-04-14 20:40:39 -0400 | |
| commit | fdca7b5f7a4ca626c15e70ae6f684e88686277f5 (patch) | |
| tree | 57b8c1f1952c53d54a0c14b00543dd21302d661b /src | |
| parent | Merge pull request #323 from Hexagon12/stub-hid (diff) | |
| parent | shaders: Add NumTextureSamplers const, remove unused #pragma. (diff) | |
| download | yuzu-fdca7b5f7a4ca626c15e70ae6f684e88686277f5.tar.gz yuzu-fdca7b5f7a4ca626c15e70ae6f684e88686277f5.tar.xz yuzu-fdca7b5f7a4ca626c15e70ae6f684e88686277f5.zip | |
Merge pull request #329 from bunnei/shader-gen-part-1
OpenGL shader generation part 1
Diffstat (limited to 'src')
26 files changed, 1872 insertions, 642 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2ba1da195..d6eb9055b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -32,6 +32,8 @@ add_library(common STATIC | |||
| 32 | break_points.cpp | 32 | break_points.cpp |
| 33 | break_points.h | 33 | break_points.h |
| 34 | chunk_file.h | 34 | chunk_file.h |
| 35 | cityhash.cpp | ||
| 36 | cityhash.h | ||
| 35 | code_block.h | 37 | code_block.h |
| 36 | color.h | 38 | color.h |
| 37 | common_funcs.h | 39 | common_funcs.h |
| @@ -39,7 +41,6 @@ add_library(common STATIC | |||
| 39 | common_types.h | 41 | common_types.h |
| 40 | file_util.cpp | 42 | file_util.cpp |
| 41 | file_util.h | 43 | file_util.h |
| 42 | hash.cpp | ||
| 43 | hash.h | 44 | hash.h |
| 44 | linear_disk_cache.h | 45 | linear_disk_cache.h |
| 45 | logging/backend.cpp | 46 | logging/backend.cpp |
diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 0cc0a1be0..5638bdbba 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h | |||
| @@ -115,7 +115,7 @@ private: | |||
| 115 | // assignment would copy the full storage value, rather than just the bits | 115 | // assignment would copy the full storage value, rather than just the bits |
| 116 | // relevant to this particular bit field. | 116 | // relevant to this particular bit field. |
| 117 | // We don't delete it because we want BitField to be trivially copyable. | 117 | // We don't delete it because we want BitField to be trivially copyable. |
| 118 | BitField& operator=(const BitField&) = default; | 118 | constexpr BitField& operator=(const BitField&) = default; |
| 119 | 119 | ||
| 120 | // StorageType is T for non-enum types and the underlying type of T if | 120 | // StorageType is T for non-enum types and the underlying type of T if |
| 121 | // T is an enumeration. Note that T is wrapped within an enable_if in the | 121 | // T is an enumeration. Note that T is wrapped within an enable_if in the |
| @@ -166,20 +166,20 @@ public: | |||
| 166 | // so that we can use this within unions | 166 | // so that we can use this within unions |
| 167 | constexpr BitField() = default; | 167 | constexpr BitField() = default; |
| 168 | 168 | ||
| 169 | FORCE_INLINE operator T() const { | 169 | constexpr FORCE_INLINE operator T() const { |
| 170 | return Value(); | 170 | return Value(); |
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | FORCE_INLINE void Assign(const T& value) { | 173 | constexpr FORCE_INLINE void Assign(const T& value) { |
| 174 | storage = (storage & ~mask) | FormatValue(value); | 174 | storage = (storage & ~mask) | FormatValue(value); |
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | FORCE_INLINE T Value() const { | 177 | constexpr T Value() const { |
| 178 | return ExtractValue(storage); | 178 | return ExtractValue(storage); |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 | 181 | // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 |
| 182 | FORCE_INLINE bool ToBool() const { | 182 | constexpr FORCE_INLINE bool ToBool() const { |
| 183 | return Value() != 0; | 183 | return Value() != 0; |
| 184 | } | 184 | } |
| 185 | 185 | ||
diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp new file mode 100644 index 000000000..de31ffbd8 --- /dev/null +++ b/src/common/cityhash.cpp | |||
| @@ -0,0 +1,340 @@ | |||
| 1 | // Copyright (c) 2011 Google, Inc. | ||
| 2 | // | ||
| 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 4 | // of this software and associated documentation files (the "Software"), to deal | ||
| 5 | // in the Software without restriction, including without limitation the rights | ||
| 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 7 | // copies of the Software, and to permit persons to whom the Software is | ||
| 8 | // furnished to do so, subject to the following conditions: | ||
| 9 | // | ||
| 10 | // The above copyright notice and this permission notice shall be included in | ||
| 11 | // all copies or substantial portions of the Software. | ||
| 12 | // | ||
| 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 19 | // THE SOFTWARE. | ||
| 20 | // | ||
| 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala | ||
| 22 | // | ||
| 23 | // This file provides CityHash64() and related functions. | ||
| 24 | // | ||
| 25 | // It's probably possible to create even faster hash functions by | ||
| 26 | // writing a program that systematically explores some of the space of | ||
| 27 | // possible hash functions, by using SIMD instructions, or by | ||
| 28 | // compromising on hash quality. | ||
| 29 | |||
| 30 | #include <algorithm> | ||
| 31 | #include <string.h> // for memcpy and memset | ||
| 32 | #include "cityhash.h" | ||
| 33 | #include "common/swap.h" | ||
| 34 | |||
| 35 | // #include "config.h" | ||
| 36 | #ifdef __GNUC__ | ||
| 37 | #define HAVE_BUILTIN_EXPECT 1 | ||
| 38 | #endif | ||
| 39 | #ifdef COMMON_BIG_ENDIAN | ||
| 40 | #define WORDS_BIGENDIAN 1 | ||
| 41 | #endif | ||
| 42 | |||
| 43 | using namespace std; | ||
| 44 | |||
| 45 | typedef uint8_t uint8; | ||
| 46 | typedef uint32_t uint32; | ||
| 47 | typedef uint64_t uint64; | ||
| 48 | |||
| 49 | namespace Common { | ||
| 50 | |||
| 51 | static uint64 UNALIGNED_LOAD64(const char* p) { | ||
| 52 | uint64 result; | ||
| 53 | memcpy(&result, p, sizeof(result)); | ||
| 54 | return result; | ||
| 55 | } | ||
| 56 | |||
| 57 | static uint32 UNALIGNED_LOAD32(const char* p) { | ||
| 58 | uint32 result; | ||
| 59 | memcpy(&result, p, sizeof(result)); | ||
| 60 | return result; | ||
| 61 | } | ||
| 62 | |||
| 63 | #ifdef WORDS_BIGENDIAN | ||
| 64 | #define uint32_in_expected_order(x) (swap32(x)) | ||
| 65 | #define uint64_in_expected_order(x) (swap64(x)) | ||
| 66 | #else | ||
| 67 | #define uint32_in_expected_order(x) (x) | ||
| 68 | #define uint64_in_expected_order(x) (x) | ||
| 69 | #endif | ||
| 70 | |||
| 71 | #if !defined(LIKELY) | ||
| 72 | #if HAVE_BUILTIN_EXPECT | ||
| 73 | #define LIKELY(x) (__builtin_expect(!!(x), 1)) | ||
| 74 | #else | ||
| 75 | #define LIKELY(x) (x) | ||
| 76 | #endif | ||
| 77 | #endif | ||
| 78 | |||
| 79 | static uint64 Fetch64(const char* p) { | ||
| 80 | return uint64_in_expected_order(UNALIGNED_LOAD64(p)); | ||
| 81 | } | ||
| 82 | |||
| 83 | static uint32 Fetch32(const char* p) { | ||
| 84 | return uint32_in_expected_order(UNALIGNED_LOAD32(p)); | ||
| 85 | } | ||
| 86 | |||
| 87 | // Some primes between 2^63 and 2^64 for various uses. | ||
| 88 | static const uint64 k0 = 0xc3a5c85c97cb3127ULL; | ||
| 89 | static const uint64 k1 = 0xb492b66fbe98f273ULL; | ||
| 90 | static const uint64 k2 = 0x9ae16a3b2f90404fULL; | ||
| 91 | |||
| 92 | // Bitwise right rotate. Normally this will compile to a single | ||
| 93 | // instruction, especially if the shift is a manifest constant. | ||
| 94 | static uint64 Rotate(uint64 val, int shift) { | ||
| 95 | // Avoid shifting by 64: doing so yields an undefined result. | ||
| 96 | return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); | ||
| 97 | } | ||
| 98 | |||
| 99 | static uint64 ShiftMix(uint64 val) { | ||
| 100 | return val ^ (val >> 47); | ||
| 101 | } | ||
| 102 | |||
| 103 | static uint64 HashLen16(uint64 u, uint64 v) { | ||
| 104 | return Hash128to64(uint128(u, v)); | ||
| 105 | } | ||
| 106 | |||
| 107 | static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { | ||
| 108 | // Murmur-inspired hashing. | ||
| 109 | uint64 a = (u ^ v) * mul; | ||
| 110 | a ^= (a >> 47); | ||
| 111 | uint64 b = (v ^ a) * mul; | ||
| 112 | b ^= (b >> 47); | ||
| 113 | b *= mul; | ||
| 114 | return b; | ||
| 115 | } | ||
| 116 | |||
| 117 | static uint64 HashLen0to16(const char* s, size_t len) { | ||
| 118 | if (len >= 8) { | ||
| 119 | uint64 mul = k2 + len * 2; | ||
| 120 | uint64 a = Fetch64(s) + k2; | ||
| 121 | uint64 b = Fetch64(s + len - 8); | ||
| 122 | uint64 c = Rotate(b, 37) * mul + a; | ||
| 123 | uint64 d = (Rotate(a, 25) + b) * mul; | ||
| 124 | return HashLen16(c, d, mul); | ||
| 125 | } | ||
| 126 | if (len >= 4) { | ||
| 127 | uint64 mul = k2 + len * 2; | ||
| 128 | uint64 a = Fetch32(s); | ||
| 129 | return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); | ||
| 130 | } | ||
| 131 | if (len > 0) { | ||
| 132 | uint8 a = s[0]; | ||
| 133 | uint8 b = s[len >> 1]; | ||
| 134 | uint8 c = s[len - 1]; | ||
| 135 | uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8); | ||
| 136 | uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2); | ||
| 137 | return ShiftMix(y * k2 ^ z * k0) * k2; | ||
| 138 | } | ||
| 139 | return k2; | ||
| 140 | } | ||
| 141 | |||
| 142 | // This probably works well for 16-byte strings as well, but it may be overkill | ||
| 143 | // in that case. | ||
| 144 | static uint64 HashLen17to32(const char* s, size_t len) { | ||
| 145 | uint64 mul = k2 + len * 2; | ||
| 146 | uint64 a = Fetch64(s) * k1; | ||
| 147 | uint64 b = Fetch64(s + 8); | ||
| 148 | uint64 c = Fetch64(s + len - 8) * mul; | ||
| 149 | uint64 d = Fetch64(s + len - 16) * k2; | ||
| 150 | return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul); | ||
| 151 | } | ||
| 152 | |||
| 153 | // Return a 16-byte hash for 48 bytes. Quick and dirty. | ||
| 154 | // Callers do best to use "random-looking" values for a and b. | ||
| 155 | static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, | ||
| 156 | uint64 b) { | ||
| 157 | a += w; | ||
| 158 | b = Rotate(b + a + z, 21); | ||
| 159 | uint64 c = a; | ||
| 160 | a += x; | ||
| 161 | a += y; | ||
| 162 | b += Rotate(a, 44); | ||
| 163 | return make_pair(a + z, b + c); | ||
| 164 | } | ||
| 165 | |||
| 166 | // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. | ||
| 167 | static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { | ||
| 168 | return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, | ||
| 169 | b); | ||
| 170 | } | ||
| 171 | |||
| 172 | // Return an 8-byte hash for 33 to 64 bytes. | ||
| 173 | static uint64 HashLen33to64(const char* s, size_t len) { | ||
| 174 | uint64 mul = k2 + len * 2; | ||
| 175 | uint64 a = Fetch64(s) * k2; | ||
| 176 | uint64 b = Fetch64(s + 8); | ||
| 177 | uint64 c = Fetch64(s + len - 24); | ||
| 178 | uint64 d = Fetch64(s + len - 32); | ||
| 179 | uint64 e = Fetch64(s + 16) * k2; | ||
| 180 | uint64 f = Fetch64(s + 24) * 9; | ||
| 181 | uint64 g = Fetch64(s + len - 8); | ||
| 182 | uint64 h = Fetch64(s + len - 16) * mul; | ||
| 183 | uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; | ||
| 184 | uint64 v = ((a + g) ^ d) + f + 1; | ||
| 185 | uint64 w = swap64((u + v) * mul) + h; | ||
| 186 | uint64 x = Rotate(e + f, 42) + c; | ||
| 187 | uint64 y = (swap64((v + w) * mul) + g) * mul; | ||
| 188 | uint64 z = e + f + c; | ||
| 189 | a = swap64((x + z) * mul + y) + b; | ||
| 190 | b = ShiftMix((z + a) * mul + d + h) * mul; | ||
| 191 | return b + x; | ||
| 192 | } | ||
| 193 | |||
| 194 | uint64 CityHash64(const char* s, size_t len) { | ||
| 195 | if (len <= 32) { | ||
| 196 | if (len <= 16) { | ||
| 197 | return HashLen0to16(s, len); | ||
| 198 | } else { | ||
| 199 | return HashLen17to32(s, len); | ||
| 200 | } | ||
| 201 | } else if (len <= 64) { | ||
| 202 | return HashLen33to64(s, len); | ||
| 203 | } | ||
| 204 | |||
| 205 | // For strings over 64 bytes we hash the end first, and then as we | ||
| 206 | // loop we keep 56 bytes of state: v, w, x, y, and z. | ||
| 207 | uint64 x = Fetch64(s + len - 40); | ||
| 208 | uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); | ||
| 209 | uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); | ||
| 210 | pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z); | ||
| 211 | pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); | ||
| 212 | x = x * k1 + Fetch64(s); | ||
| 213 | |||
| 214 | // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. | ||
| 215 | len = (len - 1) & ~static_cast<size_t>(63); | ||
| 216 | do { | ||
| 217 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; | ||
| 218 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; | ||
| 219 | x ^= w.second; | ||
| 220 | y += v.first + Fetch64(s + 40); | ||
| 221 | z = Rotate(z + w.first, 33) * k1; | ||
| 222 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); | ||
| 223 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); | ||
| 224 | std::swap(z, x); | ||
| 225 | s += 64; | ||
| 226 | len -= 64; | ||
| 227 | } while (len != 0); | ||
| 228 | return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, | ||
| 229 | HashLen16(v.second, w.second) + x); | ||
| 230 | } | ||
| 231 | |||
| 232 | uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) { | ||
| 233 | return CityHash64WithSeeds(s, len, k2, seed); | ||
| 234 | } | ||
| 235 | |||
| 236 | uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) { | ||
| 237 | return HashLen16(CityHash64(s, len) - seed0, seed1); | ||
| 238 | } | ||
| 239 | |||
| 240 | // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings | ||
| 241 | // of any length representable in signed long. Based on City and Murmur. | ||
| 242 | static uint128 CityMurmur(const char* s, size_t len, uint128 seed) { | ||
| 243 | uint64 a = Uint128Low64(seed); | ||
| 244 | uint64 b = Uint128High64(seed); | ||
| 245 | uint64 c = 0; | ||
| 246 | uint64 d = 0; | ||
| 247 | signed long l = static_cast<long>(len) - 16; | ||
| 248 | if (l <= 0) { // len <= 16 | ||
| 249 | a = ShiftMix(a * k1) * k1; | ||
| 250 | c = b * k1 + HashLen0to16(s, len); | ||
| 251 | d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); | ||
| 252 | } else { // len > 16 | ||
| 253 | c = HashLen16(Fetch64(s + len - 8) + k1, a); | ||
| 254 | d = HashLen16(b + len, c + Fetch64(s + len - 16)); | ||
| 255 | a += d; | ||
| 256 | do { | ||
| 257 | a ^= ShiftMix(Fetch64(s) * k1) * k1; | ||
| 258 | a *= k1; | ||
| 259 | b ^= a; | ||
| 260 | c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; | ||
| 261 | c *= k1; | ||
| 262 | d ^= c; | ||
| 263 | s += 16; | ||
| 264 | l -= 16; | ||
| 265 | } while (l > 0); | ||
| 266 | } | ||
| 267 | a = HashLen16(a, c); | ||
| 268 | b = HashLen16(d, b); | ||
| 269 | return uint128(a ^ b, HashLen16(b, a)); | ||
| 270 | } | ||
| 271 | |||
| 272 | uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) { | ||
| 273 | if (len < 128) { | ||
| 274 | return CityMurmur(s, len, seed); | ||
| 275 | } | ||
| 276 | |||
| 277 | // We expect len >= 128 to be the common case. Keep 56 bytes of state: | ||
| 278 | // v, w, x, y, and z. | ||
| 279 | pair<uint64, uint64> v, w; | ||
| 280 | uint64 x = Uint128Low64(seed); | ||
| 281 | uint64 y = Uint128High64(seed); | ||
| 282 | uint64 z = len * k1; | ||
| 283 | v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); | ||
| 284 | v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); | ||
| 285 | w.first = Rotate(y + z, 35) * k1 + x; | ||
| 286 | w.second = Rotate(x + Fetch64(s + 88), 53) * k1; | ||
| 287 | |||
| 288 | // This is the same inner loop as CityHash64(), manually unrolled. | ||
| 289 | do { | ||
| 290 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; | ||
| 291 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; | ||
| 292 | x ^= w.second; | ||
| 293 | y += v.first + Fetch64(s + 40); | ||
| 294 | z = Rotate(z + w.first, 33) * k1; | ||
| 295 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); | ||
| 296 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); | ||
| 297 | std::swap(z, x); | ||
| 298 | s += 64; | ||
| 299 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; | ||
| 300 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; | ||
| 301 | x ^= w.second; | ||
| 302 | y += v.first + Fetch64(s + 40); | ||
| 303 | z = Rotate(z + w.first, 33) * k1; | ||
| 304 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); | ||
| 305 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); | ||
| 306 | std::swap(z, x); | ||
| 307 | s += 64; | ||
| 308 | len -= 128; | ||
| 309 | } while (LIKELY(len >= 128)); | ||
| 310 | x += Rotate(v.first + z, 49) * k0; | ||
| 311 | y = y * k0 + Rotate(w.second, 37); | ||
| 312 | z = z * k0 + Rotate(w.first, 27); | ||
| 313 | w.first *= 9; | ||
| 314 | v.first *= k0; | ||
| 315 | // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. | ||
| 316 | for (size_t tail_done = 0; tail_done < len;) { | ||
| 317 | tail_done += 32; | ||
| 318 | y = Rotate(x + y, 42) * k0 + v.second; | ||
| 319 | w.first += Fetch64(s + len - tail_done + 16); | ||
| 320 | x = x * k0 + w.first; | ||
| 321 | z += w.second + Fetch64(s + len - tail_done); | ||
| 322 | w.second += v.first; | ||
| 323 | v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); | ||
| 324 | v.first *= k0; | ||
| 325 | } | ||
| 326 | // At this point our 56 bytes of state should contain more than | ||
| 327 | // enough information for a strong 128-bit hash. We use two | ||
| 328 | // different 56-byte-to-8-byte hashes to get a 16-byte final result. | ||
| 329 | x = HashLen16(x, v.first); | ||
| 330 | y = HashLen16(y + z, w.first); | ||
| 331 | return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); | ||
| 332 | } | ||
| 333 | |||
| 334 | uint128 CityHash128(const char* s, size_t len) { | ||
| 335 | return len >= 16 | ||
| 336 | ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0)) | ||
| 337 | : CityHash128WithSeed(s, len, uint128(k0, k1)); | ||
| 338 | } | ||
| 339 | |||
| 340 | } // namespace Common | ||
diff --git a/src/common/cityhash.h b/src/common/cityhash.h new file mode 100644 index 000000000..bcebdb150 --- /dev/null +++ b/src/common/cityhash.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright (c) 2011 Google, Inc. | ||
| 2 | // | ||
| 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 4 | // of this software and associated documentation files (the "Software"), to deal | ||
| 5 | // in the Software without restriction, including without limitation the rights | ||
| 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 7 | // copies of the Software, and to permit persons to whom the Software is | ||
| 8 | // furnished to do so, subject to the following conditions: | ||
| 9 | // | ||
| 10 | // The above copyright notice and this permission notice shall be included in | ||
| 11 | // all copies or substantial portions of the Software. | ||
| 12 | // | ||
| 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 19 | // THE SOFTWARE. | ||
| 20 | // | ||
| 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala | ||
| 22 | // | ||
| 23 | // http://code.google.com/p/cityhash/ | ||
| 24 | // | ||
| 25 | // This file provides a few functions for hashing strings. All of them are | ||
| 26 | // high-quality functions in the sense that they pass standard tests such | ||
| 27 | // as Austin Appleby's SMHasher. They are also fast. | ||
| 28 | // | ||
| 29 | // For 64-bit x86 code, on short strings, we don't know of anything faster than | ||
| 30 | // CityHash64 that is of comparable quality. We believe our nearest competitor | ||
| 31 | // is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash | ||
| 32 | // tables and most other hashing (excluding cryptography). | ||
| 33 | // | ||
| 34 | // For 64-bit x86 code, on long strings, the picture is more complicated. | ||
| 35 | // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., | ||
| 36 | // CityHashCrc128 appears to be faster than all competitors of comparable | ||
| 37 | // quality. CityHash128 is also good but not quite as fast. We believe our | ||
| 38 | // nearest competitor is Bob Jenkins' Spooky. We don't have great data for | ||
| 39 | // other 64-bit CPUs, but for long strings we know that Spooky is slightly | ||
| 40 | // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. | ||
| 41 | // Note that CityHashCrc128 is declared in citycrc.h. | ||
| 42 | // | ||
| 43 | // For 32-bit x86 code, we don't know of anything faster than CityHash32 that | ||
| 44 | // is of comparable quality. We believe our nearest competitor is Murmur3A. | ||
| 45 | // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) | ||
| 46 | // | ||
| 47 | // Functions in the CityHash family are not suitable for cryptography. | ||
| 48 | // | ||
| 49 | // Please see CityHash's README file for more details on our performance | ||
| 50 | // measurements and so on. | ||
| 51 | // | ||
| 52 | // WARNING: This code has been only lightly tested on big-endian platforms! | ||
| 53 | // It is known to work well on little-endian platforms that have a small penalty | ||
| 54 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. | ||
| 55 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads; | ||
| 56 | // bug reports are welcome. | ||
| 57 | // | ||
| 58 | // By the way, for some hash functions, given strings a and b, the hash | ||
| 59 | // of a+b is easily derived from the hashes of a and b. This property | ||
| 60 | // doesn't hold for any hash functions in this file. | ||
| 61 | |||
| 62 | #pragma once | ||
| 63 | |||
| 64 | #include <utility> | ||
| 65 | #include <stdint.h> | ||
| 66 | #include <stdlib.h> // for size_t. | ||
| 67 | |||
| 68 | namespace Common { | ||
| 69 | |||
| 70 | typedef std::pair<uint64_t, uint64_t> uint128; | ||
| 71 | |||
| 72 | inline uint64_t Uint128Low64(const uint128& x) { | ||
| 73 | return x.first; | ||
| 74 | } | ||
| 75 | inline uint64_t Uint128High64(const uint128& x) { | ||
| 76 | return x.second; | ||
| 77 | } | ||
| 78 | |||
| 79 | // Hash function for a byte array. | ||
| 80 | uint64_t CityHash64(const char* buf, size_t len); | ||
| 81 | |||
| 82 | // Hash function for a byte array. For convenience, a 64-bit seed is also | ||
| 83 | // hashed into the result. | ||
| 84 | uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed); | ||
| 85 | |||
| 86 | // Hash function for a byte array. For convenience, two seeds are also | ||
| 87 | // hashed into the result. | ||
| 88 | uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1); | ||
| 89 | |||
| 90 | // Hash function for a byte array. | ||
| 91 | uint128 CityHash128(const char* s, size_t len); | ||
| 92 | |||
| 93 | // Hash function for a byte array. For convenience, a 128-bit seed is also | ||
| 94 | // hashed into the result. | ||
| 95 | uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed); | ||
| 96 | |||
| 97 | // Hash 128 input bits down to 64 bits of output. | ||
| 98 | // This is intended to be a reasonably good hash function. | ||
| 99 | inline uint64_t Hash128to64(const uint128& x) { | ||
| 100 | // Murmur-inspired hashing. | ||
| 101 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; | ||
| 102 | uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; | ||
| 103 | a ^= (a >> 47); | ||
| 104 | uint64_t b = (Uint128High64(x) ^ a) * kMul; | ||
| 105 | b ^= (b >> 47); | ||
| 106 | b *= kMul; | ||
| 107 | return b; | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Common | ||
diff --git a/src/common/hash.cpp b/src/common/hash.cpp deleted file mode 100644 index a02e9e5b9..000000000 --- a/src/common/hash.cpp +++ /dev/null | |||
| @@ -1,141 +0,0 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #if defined(_MSC_VER) | ||
| 6 | #include <stdlib.h> | ||
| 7 | #endif | ||
| 8 | #include "common/common_funcs.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/hash.h" | ||
| 11 | |||
| 12 | namespace Common { | ||
| 13 | |||
| 14 | // MurmurHash3 was written by Austin Appleby, and is placed in the public | ||
| 15 | // domain. The author hereby disclaims copyright to this source code. | ||
| 16 | |||
| 17 | // Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do | ||
| 18 | // the conversion here | ||
| 19 | static FORCE_INLINE u64 getblock64(const u64* p, size_t i) { | ||
| 20 | return p[i]; | ||
| 21 | } | ||
| 22 | |||
| 23 | // Finalization mix - force all bits of a hash block to avalanche | ||
| 24 | static FORCE_INLINE u64 fmix64(u64 k) { | ||
| 25 | k ^= k >> 33; | ||
| 26 | k *= 0xff51afd7ed558ccdllu; | ||
| 27 | k ^= k >> 33; | ||
| 28 | k *= 0xc4ceb9fe1a85ec53llu; | ||
| 29 | k ^= k >> 33; | ||
| 30 | |||
| 31 | return k; | ||
| 32 | } | ||
| 33 | |||
| 34 | // This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit | ||
| 35 | // platforms (MurmurHash3_x64_128). It was taken from: | ||
| 36 | // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp | ||
| 37 | void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) { | ||
| 38 | const u8* data = (const u8*)key; | ||
| 39 | const size_t nblocks = len / 16; | ||
| 40 | |||
| 41 | u64 h1 = seed; | ||
| 42 | u64 h2 = seed; | ||
| 43 | |||
| 44 | const u64 c1 = 0x87c37b91114253d5llu; | ||
| 45 | const u64 c2 = 0x4cf5ad432745937fllu; | ||
| 46 | |||
| 47 | // Body | ||
| 48 | |||
| 49 | const u64* blocks = (const u64*)(data); | ||
| 50 | |||
| 51 | for (size_t i = 0; i < nblocks; i++) { | ||
| 52 | u64 k1 = getblock64(blocks, i * 2 + 0); | ||
| 53 | u64 k2 = getblock64(blocks, i * 2 + 1); | ||
| 54 | |||
| 55 | k1 *= c1; | ||
| 56 | k1 = _rotl64(k1, 31); | ||
| 57 | k1 *= c2; | ||
| 58 | h1 ^= k1; | ||
| 59 | |||
| 60 | h1 = _rotl64(h1, 27); | ||
| 61 | h1 += h2; | ||
| 62 | h1 = h1 * 5 + 0x52dce729; | ||
| 63 | |||
| 64 | k2 *= c2; | ||
| 65 | k2 = _rotl64(k2, 33); | ||
| 66 | k2 *= c1; | ||
| 67 | h2 ^= k2; | ||
| 68 | |||
| 69 | h2 = _rotl64(h2, 31); | ||
| 70 | h2 += h1; | ||
| 71 | h2 = h2 * 5 + 0x38495ab5; | ||
| 72 | } | ||
| 73 | |||
| 74 | // Tail | ||
| 75 | |||
| 76 | const u8* tail = (const u8*)(data + nblocks * 16); | ||
| 77 | |||
| 78 | u64 k1 = 0; | ||
| 79 | u64 k2 = 0; | ||
| 80 | |||
| 81 | switch (len & 15) { | ||
| 82 | case 15: | ||
| 83 | k2 ^= ((u64)tail[14]) << 48; | ||
| 84 | case 14: | ||
| 85 | k2 ^= ((u64)tail[13]) << 40; | ||
| 86 | case 13: | ||
| 87 | k2 ^= ((u64)tail[12]) << 32; | ||
| 88 | case 12: | ||
| 89 | k2 ^= ((u64)tail[11]) << 24; | ||
| 90 | case 11: | ||
| 91 | k2 ^= ((u64)tail[10]) << 16; | ||
| 92 | case 10: | ||
| 93 | k2 ^= ((u64)tail[9]) << 8; | ||
| 94 | case 9: | ||
| 95 | k2 ^= ((u64)tail[8]) << 0; | ||
| 96 | k2 *= c2; | ||
| 97 | k2 = _rotl64(k2, 33); | ||
| 98 | k2 *= c1; | ||
| 99 | h2 ^= k2; | ||
| 100 | |||
| 101 | case 8: | ||
| 102 | k1 ^= ((u64)tail[7]) << 56; | ||
| 103 | case 7: | ||
| 104 | k1 ^= ((u64)tail[6]) << 48; | ||
| 105 | case 6: | ||
| 106 | k1 ^= ((u64)tail[5]) << 40; | ||
| 107 | case 5: | ||
| 108 | k1 ^= ((u64)tail[4]) << 32; | ||
| 109 | case 4: | ||
| 110 | k1 ^= ((u64)tail[3]) << 24; | ||
| 111 | case 3: | ||
| 112 | k1 ^= ((u64)tail[2]) << 16; | ||
| 113 | case 2: | ||
| 114 | k1 ^= ((u64)tail[1]) << 8; | ||
| 115 | case 1: | ||
| 116 | k1 ^= ((u64)tail[0]) << 0; | ||
| 117 | k1 *= c1; | ||
| 118 | k1 = _rotl64(k1, 31); | ||
| 119 | k1 *= c2; | ||
| 120 | h1 ^= k1; | ||
| 121 | }; | ||
| 122 | |||
| 123 | // Finalization | ||
| 124 | |||
| 125 | h1 ^= len; | ||
| 126 | h2 ^= len; | ||
| 127 | |||
| 128 | h1 += h2; | ||
| 129 | h2 += h1; | ||
| 130 | |||
| 131 | h1 = fmix64(h1); | ||
| 132 | h2 = fmix64(h2); | ||
| 133 | |||
| 134 | h1 += h2; | ||
| 135 | h2 += h1; | ||
| 136 | |||
| 137 | ((u64*)out)[0] = h1; | ||
| 138 | ((u64*)out)[1] = h2; | ||
| 139 | } | ||
| 140 | |||
| 141 | } // namespace Common | ||
diff --git a/src/common/hash.h b/src/common/hash.h index ee2560dad..73c326980 100644 --- a/src/common/hash.h +++ b/src/common/hash.h | |||
| @@ -5,12 +5,12 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <cstring> | ||
| 9 | #include "common/cityhash.h" | ||
| 8 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 9 | 11 | ||
| 10 | namespace Common { | 12 | namespace Common { |
| 11 | 13 | ||
| 12 | void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); | ||
| 13 | |||
| 14 | /** | 14 | /** |
| 15 | * Computes a 64-bit hash over the specified block of data | 15 | * Computes a 64-bit hash over the specified block of data |
| 16 | * @param data Block of data to compute hash over | 16 | * @param data Block of data to compute hash over |
| @@ -18,9 +18,54 @@ void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); | |||
| 18 | * @returns 64-bit hash value that was computed over the data block | 18 | * @returns 64-bit hash value that was computed over the data block |
| 19 | */ | 19 | */ |
| 20 | static inline u64 ComputeHash64(const void* data, size_t len) { | 20 | static inline u64 ComputeHash64(const void* data, size_t len) { |
| 21 | u64 res[2]; | 21 | return CityHash64(static_cast<const char*>(data), len); |
| 22 | MurmurHash3_128(data, len, 0, res); | 22 | } |
| 23 | return res[0]; | 23 | |
| 24 | /** | ||
| 25 | * Computes a 64-bit hash of a struct. In addition to being trivially copyable, it is also critical | ||
| 26 | * that either the struct includes no padding, or that any padding is initialized to a known value | ||
| 27 | * by memsetting the struct to 0 before filling it in. | ||
| 28 | */ | ||
| 29 | template <typename T> | ||
| 30 | static inline u64 ComputeStructHash64(const T& data) { | ||
| 31 | static_assert(std::is_trivially_copyable<T>(), | ||
| 32 | "Type passed to ComputeStructHash64 must be trivially copyable"); | ||
| 33 | return ComputeHash64(&data, sizeof(data)); | ||
| 24 | } | 34 | } |
| 25 | 35 | ||
| 36 | /// A helper template that ensures the padding in a struct is initialized by memsetting to 0. | ||
| 37 | template <typename T> | ||
| 38 | struct HashableStruct { | ||
| 39 | // In addition to being trivially copyable, T must also have a trivial default constructor, | ||
| 40 | // because any member initialization would be overridden by memset | ||
| 41 | static_assert(std::is_trivial<T>(), "Type passed to HashableStruct must be trivial"); | ||
| 42 | /* | ||
| 43 | * We use a union because "implicitly-defined copy/move constructor for a union X copies the | ||
| 44 | * object representation of X." and "implicitly-defined copy assignment operator for a union X | ||
| 45 | * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy. | ||
| 46 | * This is important because the padding bytes are included in the hash and comparison between | ||
| 47 | * objects. | ||
| 48 | */ | ||
| 49 | union { | ||
| 50 | T state; | ||
| 51 | }; | ||
| 52 | |||
| 53 | HashableStruct() { | ||
| 54 | // Memset structure to zero padding bits, so that they will be deterministic when hashing | ||
| 55 | std::memset(&state, 0, sizeof(T)); | ||
| 56 | } | ||
| 57 | |||
| 58 | bool operator==(const HashableStruct<T>& o) const { | ||
| 59 | return std::memcmp(&state, &o.state, sizeof(T)) == 0; | ||
| 60 | }; | ||
| 61 | |||
| 62 | bool operator!=(const HashableStruct<T>& o) const { | ||
| 63 | return !(*this == o); | ||
| 64 | }; | ||
| 65 | |||
| 66 | size_t Hash() const { | ||
| 67 | return Common::ComputeStructHash64(state); | ||
| 68 | } | ||
| 69 | }; | ||
| 70 | |||
| 26 | } // namespace Common | 71 | } // namespace Common |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a710c4bc5..281810357 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -9,6 +9,7 @@ add_library(video_core STATIC | |||
| 9 | engines/maxwell_3d.h | 9 | engines/maxwell_3d.h |
| 10 | engines/maxwell_compute.cpp | 10 | engines/maxwell_compute.cpp |
| 11 | engines/maxwell_compute.h | 11 | engines/maxwell_compute.h |
| 12 | engines/shader_bytecode.h | ||
| 12 | gpu.cpp | 13 | gpu.cpp |
| 13 | gpu.h | 14 | gpu.h |
| 14 | macro_interpreter.cpp | 15 | macro_interpreter.cpp |
| @@ -27,6 +28,8 @@ add_library(video_core STATIC | |||
| 27 | renderer_opengl/gl_shader_decompiler.h | 28 | renderer_opengl/gl_shader_decompiler.h |
| 28 | renderer_opengl/gl_shader_gen.cpp | 29 | renderer_opengl/gl_shader_gen.cpp |
| 29 | renderer_opengl/gl_shader_gen.h | 30 | renderer_opengl/gl_shader_gen.h |
| 31 | renderer_opengl/gl_shader_manager.cpp | ||
| 32 | renderer_opengl/gl_shader_manager.h | ||
| 30 | renderer_opengl/gl_shader_util.cpp | 33 | renderer_opengl/gl_shader_util.cpp |
| 31 | renderer_opengl/gl_shader_util.h | 34 | renderer_opengl/gl_shader_util.h |
| 32 | renderer_opengl/gl_state.cpp | 35 | renderer_opengl/gl_state.cpp |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 98b39b2ff..9c6236c39 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -427,14 +427,11 @@ public: | |||
| 427 | BitField<0, 1, u32> enable; | 427 | BitField<0, 1, u32> enable; |
| 428 | BitField<4, 4, ShaderProgram> program; | 428 | BitField<4, 4, ShaderProgram> program; |
| 429 | }; | 429 | }; |
| 430 | u32 start_id; | 430 | u32 offset; |
| 431 | INSERT_PADDING_WORDS(1); | 431 | INSERT_PADDING_WORDS(14); |
| 432 | u32 gpr_alloc; | ||
| 433 | ShaderStage type; | ||
| 434 | INSERT_PADDING_WORDS(9); | ||
| 435 | } shader_config[MaxShaderProgram]; | 432 | } shader_config[MaxShaderProgram]; |
| 436 | 433 | ||
| 437 | INSERT_PADDING_WORDS(0x8C); | 434 | INSERT_PADDING_WORDS(0x80); |
| 438 | 435 | ||
| 439 | struct { | 436 | struct { |
| 440 | u32 cb_size; | 437 | u32 cb_size; |
| @@ -507,6 +504,7 @@ public: | |||
| 507 | }; | 504 | }; |
| 508 | 505 | ||
| 509 | State state{}; | 506 | State state{}; |
| 507 | MemoryManager& memory_manager; | ||
| 510 | 508 | ||
| 511 | /// Reads a register value located at the input method address | 509 | /// Reads a register value located at the input method address |
| 512 | u32 GetRegisterValue(u32 method) const; | 510 | u32 GetRegisterValue(u32 method) const; |
| @@ -521,8 +519,6 @@ public: | |||
| 521 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; | 519 | std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; |
| 522 | 520 | ||
| 523 | private: | 521 | private: |
| 524 | MemoryManager& memory_manager; | ||
| 525 | |||
| 526 | std::unordered_map<u32, std::vector<u32>> uploaded_macros; | 522 | std::unordered_map<u32, std::vector<u32>> uploaded_macros; |
| 527 | 523 | ||
| 528 | /// Macro method that is currently being executed / being fed parameters. | 524 | /// Macro method that is currently being executed / being fed parameters. |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h new file mode 100644 index 000000000..eff0c35a1 --- /dev/null +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -0,0 +1,327 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <map> | ||
| 8 | #include <string> | ||
| 9 | #include "common/bit_field.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | namespace Shader { | ||
| 13 | |||
| 14 | struct Register { | ||
| 15 | Register() = default; | ||
| 16 | |||
| 17 | constexpr Register(u64 value) : value(value) {} | ||
| 18 | |||
| 19 | constexpr u64 GetIndex() const { | ||
| 20 | return value; | ||
| 21 | } | ||
| 22 | |||
| 23 | constexpr operator u64() const { | ||
| 24 | return value; | ||
| 25 | } | ||
| 26 | |||
| 27 | template <typename T> | ||
| 28 | constexpr u64 operator-(const T& oth) const { | ||
| 29 | return value - oth; | ||
| 30 | } | ||
| 31 | |||
| 32 | template <typename T> | ||
| 33 | constexpr u64 operator&(const T& oth) const { | ||
| 34 | return value & oth; | ||
| 35 | } | ||
| 36 | |||
| 37 | constexpr u64 operator&(const Register& oth) const { | ||
| 38 | return value & oth.value; | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr u64 operator~() const { | ||
| 42 | return ~value; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | ||
| 46 | u64 value; | ||
| 47 | }; | ||
| 48 | |||
| 49 | union Attribute { | ||
| 50 | Attribute() = default; | ||
| 51 | |||
| 52 | constexpr Attribute(u64 value) : value(value) {} | ||
| 53 | |||
| 54 | enum class Index : u64 { | ||
| 55 | Position = 7, | ||
| 56 | Attribute_0 = 8, | ||
| 57 | }; | ||
| 58 | |||
| 59 | union { | ||
| 60 | BitField<22, 2, u64> element; | ||
| 61 | BitField<24, 6, Index> index; | ||
| 62 | BitField<47, 3, u64> size; | ||
| 63 | } fmt20; | ||
| 64 | |||
| 65 | union { | ||
| 66 | BitField<30, 2, u64> element; | ||
| 67 | BitField<32, 6, Index> index; | ||
| 68 | } fmt28; | ||
| 69 | |||
| 70 | BitField<39, 8, u64> reg; | ||
| 71 | u64 value; | ||
| 72 | }; | ||
| 73 | |||
| 74 | union Uniform { | ||
| 75 | BitField<20, 14, u64> offset; | ||
| 76 | BitField<34, 5, u64> index; | ||
| 77 | }; | ||
| 78 | |||
| 79 | union OpCode { | ||
| 80 | enum class Id : u64 { | ||
| 81 | TEXS = 0x6C, | ||
| 82 | IPA = 0xE0, | ||
| 83 | FFMA_IMM = 0x65, | ||
| 84 | FFMA_CR = 0x93, | ||
| 85 | FFMA_RC = 0xA3, | ||
| 86 | FFMA_RR = 0xB3, | ||
| 87 | |||
| 88 | FADD_C = 0x98B, | ||
| 89 | FMUL_C = 0x98D, | ||
| 90 | MUFU = 0xA10, | ||
| 91 | FADD_R = 0xB8B, | ||
| 92 | FMUL_R = 0xB8D, | ||
| 93 | LD_A = 0x1DFB, | ||
| 94 | ST_A = 0x1DFE, | ||
| 95 | |||
| 96 | FSETP_R = 0x5BB, | ||
| 97 | FSETP_C = 0x4BB, | ||
| 98 | EXIT = 0xE30, | ||
| 99 | KIL = 0xE33, | ||
| 100 | |||
| 101 | FMUL_IMM = 0x70D, | ||
| 102 | FMUL_IMM_x = 0x72D, | ||
| 103 | FADD_IMM = 0x70B, | ||
| 104 | FADD_IMM_x = 0x72B, | ||
| 105 | }; | ||
| 106 | |||
| 107 | enum class Type { | ||
| 108 | Trivial, | ||
| 109 | Arithmetic, | ||
| 110 | Ffma, | ||
| 111 | Flow, | ||
| 112 | Memory, | ||
| 113 | Unknown, | ||
| 114 | }; | ||
| 115 | |||
| 116 | struct Info { | ||
| 117 | Type type; | ||
| 118 | std::string name; | ||
| 119 | }; | ||
| 120 | |||
| 121 | OpCode() = default; | ||
| 122 | |||
| 123 | constexpr OpCode(Id value) : value(static_cast<u64>(value)) {} | ||
| 124 | |||
| 125 | constexpr OpCode(u64 value) : value{value} {} | ||
| 126 | |||
| 127 | constexpr Id EffectiveOpCode() const { | ||
| 128 | switch (op1) { | ||
| 129 | case Id::TEXS: | ||
| 130 | return op1; | ||
| 131 | } | ||
| 132 | |||
| 133 | switch (op2) { | ||
| 134 | case Id::IPA: | ||
| 135 | return op2; | ||
| 136 | } | ||
| 137 | |||
| 138 | switch (op3) { | ||
| 139 | case Id::FFMA_IMM: | ||
| 140 | case Id::FFMA_CR: | ||
| 141 | case Id::FFMA_RC: | ||
| 142 | case Id::FFMA_RR: | ||
| 143 | return op3; | ||
| 144 | } | ||
| 145 | |||
| 146 | switch (op4) { | ||
| 147 | case Id::EXIT: | ||
| 148 | case Id::FSETP_R: | ||
| 149 | case Id::FSETP_C: | ||
| 150 | case Id::KIL: | ||
| 151 | return op4; | ||
| 152 | } | ||
| 153 | |||
| 154 | switch (op5) { | ||
| 155 | case Id::MUFU: | ||
| 156 | case Id::LD_A: | ||
| 157 | case Id::ST_A: | ||
| 158 | case Id::FADD_R: | ||
| 159 | case Id::FADD_C: | ||
| 160 | case Id::FMUL_R: | ||
| 161 | case Id::FMUL_C: | ||
| 162 | return op5; | ||
| 163 | |||
| 164 | case Id::FMUL_IMM: | ||
| 165 | case Id::FMUL_IMM_x: | ||
| 166 | return Id::FMUL_IMM; | ||
| 167 | |||
| 168 | case Id::FADD_IMM: | ||
| 169 | case Id::FADD_IMM_x: | ||
| 170 | return Id::FADD_IMM; | ||
| 171 | } | ||
| 172 | |||
| 173 | return static_cast<Id>(value); | ||
| 174 | } | ||
| 175 | |||
| 176 | static const Info& GetInfo(const OpCode& opcode) { | ||
| 177 | static const std::map<Id, Info> info_table{BuildInfoTable()}; | ||
| 178 | const auto& search{info_table.find(opcode.EffectiveOpCode())}; | ||
| 179 | if (search != info_table.end()) { | ||
| 180 | return search->second; | ||
| 181 | } | ||
| 182 | |||
| 183 | static const Info unknown{Type::Unknown, "UNK"}; | ||
| 184 | return unknown; | ||
| 185 | } | ||
| 186 | |||
| 187 | constexpr operator Id() const { | ||
| 188 | return static_cast<Id>(value); | ||
| 189 | } | ||
| 190 | |||
| 191 | constexpr OpCode operator<<(size_t bits) const { | ||
| 192 | return value << bits; | ||
| 193 | } | ||
| 194 | |||
| 195 | constexpr OpCode operator>>(size_t bits) const { | ||
| 196 | return value >> bits; | ||
| 197 | } | ||
| 198 | |||
| 199 | template <typename T> | ||
| 200 | constexpr u64 operator-(const T& oth) const { | ||
| 201 | return value - oth; | ||
| 202 | } | ||
| 203 | |||
| 204 | constexpr u64 operator&(const OpCode& oth) const { | ||
| 205 | return value & oth.value; | ||
| 206 | } | ||
| 207 | |||
| 208 | constexpr u64 operator~() const { | ||
| 209 | return ~value; | ||
| 210 | } | ||
| 211 | |||
| 212 | static std::map<Id, Info> BuildInfoTable() { | ||
| 213 | std::map<Id, Info> info_table; | ||
| 214 | info_table[Id::TEXS] = {Type::Memory, "texs"}; | ||
| 215 | info_table[Id::LD_A] = {Type::Memory, "ld_a"}; | ||
| 216 | info_table[Id::ST_A] = {Type::Memory, "st_a"}; | ||
| 217 | info_table[Id::MUFU] = {Type::Arithmetic, "mufu"}; | ||
| 218 | info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"}; | ||
| 219 | info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"}; | ||
| 220 | info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"}; | ||
| 221 | info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"}; | ||
| 222 | info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"}; | ||
| 223 | info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"}; | ||
| 224 | info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"}; | ||
| 225 | info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"}; | ||
| 226 | info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; | ||
| 227 | info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; | ||
| 228 | info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; | ||
| 229 | info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; | ||
| 230 | info_table[Id::EXIT] = {Type::Trivial, "exit"}; | ||
| 231 | info_table[Id::IPA] = {Type::Trivial, "ipa"}; | ||
| 232 | info_table[Id::KIL] = {Type::Flow, "kil"}; | ||
| 233 | return info_table; | ||
| 234 | } | ||
| 235 | |||
| 236 | BitField<57, 7, Id> op1; | ||
| 237 | BitField<56, 8, Id> op2; | ||
| 238 | BitField<55, 9, Id> op3; | ||
| 239 | BitField<52, 12, Id> op4; | ||
| 240 | BitField<51, 13, Id> op5; | ||
| 241 | u64 value; | ||
| 242 | }; | ||
| 243 | static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); | ||
| 244 | |||
| 245 | } // namespace Shader | ||
| 246 | } // namespace Tegra | ||
| 247 | |||
| 248 | namespace std { | ||
| 249 | |||
| 250 | // TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330. | ||
| 251 | template <> | ||
| 252 | struct make_unsigned<Tegra::Shader::Attribute> { | ||
| 253 | using type = Tegra::Shader::Attribute; | ||
| 254 | }; | ||
| 255 | |||
| 256 | template <> | ||
| 257 | struct make_unsigned<Tegra::Shader::Register> { | ||
| 258 | using type = Tegra::Shader::Register; | ||
| 259 | }; | ||
| 260 | |||
| 261 | template <> | ||
| 262 | struct make_unsigned<Tegra::Shader::OpCode> { | ||
| 263 | using type = Tegra::Shader::OpCode; | ||
| 264 | }; | ||
| 265 | |||
| 266 | } // namespace std | ||
| 267 | |||
| 268 | namespace Tegra { | ||
| 269 | namespace Shader { | ||
| 270 | |||
| 271 | enum class Pred : u64 { | ||
| 272 | UnusedIndex = 0x7, | ||
| 273 | NeverExecute = 0xf, | ||
| 274 | }; | ||
| 275 | |||
| 276 | enum class SubOp : u64 { | ||
| 277 | Cos = 0x0, | ||
| 278 | Sin = 0x1, | ||
| 279 | Ex2 = 0x2, | ||
| 280 | Lg2 = 0x3, | ||
| 281 | Rcp = 0x4, | ||
| 282 | Rsq = 0x5, | ||
| 283 | }; | ||
| 284 | |||
| 285 | union Instruction { | ||
| 286 | Instruction& operator=(const Instruction& instr) { | ||
| 287 | hex = instr.hex; | ||
| 288 | return *this; | ||
| 289 | } | ||
| 290 | |||
| 291 | OpCode opcode; | ||
| 292 | BitField<0, 8, Register> gpr0; | ||
| 293 | BitField<8, 8, Register> gpr8; | ||
| 294 | BitField<16, 4, Pred> pred; | ||
| 295 | BitField<20, 8, Register> gpr20; | ||
| 296 | BitField<20, 7, SubOp> sub_op; | ||
| 297 | BitField<28, 8, Register> gpr28; | ||
| 298 | BitField<36, 13, u64> imm36; | ||
| 299 | BitField<39, 8, Register> gpr39; | ||
| 300 | |||
| 301 | union { | ||
| 302 | BitField<45, 1, u64> negate_b; | ||
| 303 | BitField<46, 1, u64> abs_a; | ||
| 304 | BitField<48, 1, u64> negate_a; | ||
| 305 | BitField<49, 1, u64> abs_b; | ||
| 306 | BitField<50, 1, u64> abs_d; | ||
| 307 | } alu; | ||
| 308 | |||
| 309 | union { | ||
| 310 | BitField<48, 1, u64> negate_b; | ||
| 311 | BitField<49, 1, u64> negate_c; | ||
| 312 | } ffma; | ||
| 313 | |||
| 314 | BitField<60, 1, u64> is_b_gpr; | ||
| 315 | BitField<59, 1, u64> is_c_gpr; | ||
| 316 | |||
| 317 | Attribute attribute; | ||
| 318 | Uniform uniform; | ||
| 319 | |||
| 320 | u64 hex; | ||
| 321 | }; | ||
| 322 | static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); | ||
| 323 | static_assert(std::is_standard_layout<Instruction>::value, | ||
| 324 | "Structure does not have standard layout"); | ||
| 325 | |||
| 326 | } // namespace Shader | ||
| 327 | } // namespace Tegra | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f217a265b..f75d4c658 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | |||
| 34 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | 34 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); |
| 35 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | 35 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); |
| 36 | 36 | ||
| 37 | enum class UniformBindings : GLuint { Common, VS, FS }; | ||
| 38 | |||
| 39 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, | ||
| 40 | size_t expected_size) { | ||
| 41 | GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||
| 42 | if (ub_index != GL_INVALID_INDEX) { | ||
| 43 | GLint ub_size = 0; | ||
| 44 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||
| 45 | ASSERT_MSG(ub_size == expected_size, | ||
| 46 | "Uniform block size did not match! Got %d, expected %zu", | ||
| 47 | static_cast<int>(ub_size), expected_size); | ||
| 48 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | static void SetShaderUniformBlockBindings(GLuint shader) { | ||
| 53 | SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, | ||
| 54 | sizeof(RasterizerOpenGL::UniformData)); | ||
| 55 | SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, | ||
| 56 | sizeof(RasterizerOpenGL::VSUniformData)); | ||
| 57 | SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, | ||
| 58 | sizeof(RasterizerOpenGL::FSUniformData)); | ||
| 59 | } | ||
| 60 | |||
| 61 | RasterizerOpenGL::RasterizerOpenGL() { | 37 | RasterizerOpenGL::RasterizerOpenGL() { |
| 62 | shader_dirty = true; | ||
| 63 | |||
| 64 | has_ARB_buffer_storage = false; | 38 | has_ARB_buffer_storage = false; |
| 65 | has_ARB_direct_state_access = false; | 39 | has_ARB_direct_state_access = false; |
| 66 | has_ARB_separate_shader_objects = false; | 40 | has_ARB_separate_shader_objects = false; |
| @@ -88,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() { | |||
| 88 | } | 62 | } |
| 89 | } | 63 | } |
| 90 | 64 | ||
| 65 | ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); | ||
| 66 | |||
| 91 | // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 | 67 | // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 |
| 92 | state.clip_distance[0] = true; | 68 | state.clip_distance[0] = true; |
| 93 | 69 | ||
| @@ -102,36 +78,31 @@ RasterizerOpenGL::RasterizerOpenGL() { | |||
| 102 | state.draw.uniform_buffer = uniform_buffer.handle; | 78 | state.draw.uniform_buffer = uniform_buffer.handle; |
| 103 | state.Apply(); | 79 | state.Apply(); |
| 104 | 80 | ||
| 105 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); | ||
| 106 | glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); | ||
| 107 | |||
| 108 | uniform_block_data.dirty = true; | ||
| 109 | |||
| 110 | // Create render framebuffer | 81 | // Create render framebuffer |
| 111 | framebuffer.Create(); | 82 | framebuffer.Create(); |
| 112 | 83 | ||
| 113 | if (has_ARB_separate_shader_objects) { | 84 | hw_vao.Create(); |
| 114 | hw_vao.Create(); | 85 | hw_vao_enabled_attributes.fill(false); |
| 115 | hw_vao_enabled_attributes.fill(false); | ||
| 116 | 86 | ||
| 117 | stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); | 87 | stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); |
| 118 | stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); | 88 | stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); |
| 119 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | 89 | state.draw.vertex_buffer = stream_buffer->GetHandle(); |
| 120 | 90 | ||
| 121 | pipeline.Create(); | 91 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 122 | state.draw.program_pipeline = pipeline.handle; | 92 | |
| 123 | state.draw.shader_program = 0; | 93 | state.draw.shader_program = 0; |
| 124 | state.draw.vertex_array = hw_vao.handle; | 94 | state.draw.vertex_array = hw_vao.handle; |
| 125 | state.Apply(); | 95 | state.Apply(); |
| 126 | 96 | ||
| 127 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); | 97 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); |
| 128 | 98 | ||
| 129 | vs_uniform_buffer.Create(); | 99 | for (unsigned index = 0; index < uniform_buffers.size(); ++index) { |
| 130 | glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); | 100 | auto& buffer = uniform_buffers[index]; |
| 131 | glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); | 101 | buffer.Create(); |
| 132 | glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); | 102 | glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); |
| 133 | } else { | 103 | glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, |
| 134 | UNREACHABLE(); | 104 | GL_STREAM_COPY); |
| 105 | glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); | ||
| 135 | } | 106 | } |
| 136 | 107 | ||
| 137 | accelerate_draw = AccelDraw::Disabled; | 108 | accelerate_draw = AccelDraw::Disabled; |
| @@ -200,26 +171,74 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | |||
| 200 | buffer_offset += data_size; | 171 | buffer_offset += data_size; |
| 201 | } | 172 | } |
| 202 | 173 | ||
| 203 | void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { | 174 | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { |
| 204 | MICROPROFILE_SCOPE(OpenGL_VS); | 175 | // Helper function for uploading uniform data |
| 205 | LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); | 176 | const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { |
| 206 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); | 177 | if (has_ARB_direct_state_access) { |
| 207 | } | 178 | glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); |
| 179 | } else { | ||
| 180 | glBindBuffer(GL_COPY_WRITE_BUFFER, handle); | ||
| 181 | glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); | ||
| 182 | } | ||
| 183 | }; | ||
| 208 | 184 | ||
| 209 | void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { | 185 | auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); |
| 210 | MICROPROFILE_SCOPE(OpenGL_FS); | 186 | ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); |
| 211 | UNREACHABLE(); | ||
| 212 | } | ||
| 213 | 187 | ||
| 214 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | 188 | for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { |
| 215 | if (!has_ARB_separate_shader_objects) { | 189 | ptr_pos += sizeof(GLShader::MaxwellUniformData); |
| 216 | UNREACHABLE(); | 190 | |
| 217 | return false; | 191 | auto& shader_config = gpu.regs.shader_config[index]; |
| 192 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 193 | |||
| 194 | // VertexB program is always enabled, despite bit setting | ||
| 195 | const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB}; | ||
| 196 | |||
| 197 | // Skip stages that are not enabled | ||
| 198 | if (!is_enabled) { | ||
| 199 | continue; | ||
| 200 | } | ||
| 201 | |||
| 202 | // Upload uniform data as one UBO per stage | ||
| 203 | const auto& stage = index - 1; // Stage indices are 0 - 5 | ||
| 204 | const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | ||
| 205 | copy_buffer(uniform_buffers[stage].handle, ubo_offset, | ||
| 206 | sizeof(GLShader::MaxwellUniformData)); | ||
| 207 | GLShader::MaxwellUniformData* ub_ptr = | ||
| 208 | reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); | ||
| 209 | ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); | ||
| 210 | |||
| 211 | // Fetch program code from memory | ||
| 212 | GLShader::ProgramCode program_code; | ||
| 213 | const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; | ||
| 214 | const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; | ||
| 215 | Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); | ||
| 216 | GLShader::ShaderSetup setup{std::move(program_code)}; | ||
| 217 | |||
| 218 | switch (program) { | ||
| 219 | case Maxwell::ShaderProgram::VertexB: { | ||
| 220 | GLShader::MaxwellVSConfig vs_config{setup}; | ||
| 221 | shader_program_manager->UseProgrammableVertexShader(vs_config, setup); | ||
| 222 | break; | ||
| 223 | } | ||
| 224 | case Maxwell::ShaderProgram::Fragment: { | ||
| 225 | GLShader::MaxwellFSConfig fs_config{setup}; | ||
| 226 | shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | default: | ||
| 230 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, | ||
| 231 | shader_config.enable.Value(), shader_config.offset); | ||
| 232 | UNREACHABLE(); | ||
| 233 | } | ||
| 218 | } | 234 | } |
| 219 | 235 | ||
| 236 | shader_program_manager->UseTrivialGeometryShader(); | ||
| 237 | } | ||
| 238 | |||
| 239 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||
| 220 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | 240 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; |
| 221 | DrawArrays(); | 241 | DrawArrays(); |
| 222 | |||
| 223 | return true; | 242 | return true; |
| 224 | } | 243 | } |
| 225 | 244 | ||
| @@ -280,18 +299,6 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 280 | // Sync and bind the texture surfaces | 299 | // Sync and bind the texture surfaces |
| 281 | BindTextures(); | 300 | BindTextures(); |
| 282 | 301 | ||
| 283 | // Sync and bind the shader | ||
| 284 | if (shader_dirty) { | ||
| 285 | SetShader(); | ||
| 286 | shader_dirty = false; | ||
| 287 | } | ||
| 288 | |||
| 289 | // Sync the uniform data | ||
| 290 | if (uniform_block_data.dirty) { | ||
| 291 | glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); | ||
| 292 | uniform_block_data.dirty = false; | ||
| 293 | } | ||
| 294 | |||
| 295 | // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable | 302 | // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable |
| 296 | // scissor test to prevent drawing outside of the framebuffer region | 303 | // scissor test to prevent drawing outside of the framebuffer region |
| 297 | state.scissor.enabled = true; | 304 | state.scissor.enabled = true; |
| @@ -311,7 +318,9 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 311 | if (is_indexed) { | 318 | if (is_indexed) { |
| 312 | UNREACHABLE(); | 319 | UNREACHABLE(); |
| 313 | } | 320 | } |
| 314 | buffer_size += sizeof(VSUniformData); | 321 | |
| 322 | // Uniform space for the 5 shader stages | ||
| 323 | buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||
| 315 | 324 | ||
| 316 | size_t ptr_pos = 0; | 325 | size_t ptr_pos = 0; |
| 317 | u8* buffer_ptr; | 326 | u8* buffer_ptr; |
| @@ -327,25 +336,12 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 327 | UNREACHABLE(); | 336 | UNREACHABLE(); |
| 328 | } | 337 | } |
| 329 | 338 | ||
| 330 | SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), | 339 | SetupShaders(buffer_ptr, buffer_offset, ptr_pos); |
| 331 | buffer_offset + static_cast<GLintptr>(ptr_pos)); | ||
| 332 | const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | ||
| 333 | ptr_pos += sizeof(VSUniformData); | ||
| 334 | 340 | ||
| 335 | stream_buffer->Unmap(); | 341 | stream_buffer->Unmap(); |
| 336 | 342 | ||
| 337 | const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | 343 | shader_program_manager->ApplyTo(state); |
| 338 | if (has_ARB_direct_state_access) { | 344 | state.Apply(); |
| 339 | glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); | ||
| 340 | } else { | ||
| 341 | glBindBuffer(GL_COPY_WRITE_BUFFER, handle); | ||
| 342 | glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); | ||
| 343 | } | ||
| 344 | }; | ||
| 345 | |||
| 346 | copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); | ||
| 347 | |||
| 348 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); | ||
| 349 | 345 | ||
| 350 | if (is_indexed) { | 346 | if (is_indexed) { |
| 351 | UNREACHABLE(); | 347 | UNREACHABLE(); |
| @@ -531,72 +527,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | |||
| 531 | } | 527 | } |
| 532 | } | 528 | } |
| 533 | 529 | ||
| 534 | void RasterizerOpenGL::SetShader() { | ||
| 535 | // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to | ||
| 536 | // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell | ||
| 537 | // shaders. | ||
| 538 | |||
| 539 | static constexpr char vertex_shader[] = R"( | ||
| 540 | #version 150 core | ||
| 541 | |||
| 542 | in vec2 vert_position; | ||
| 543 | in vec2 vert_tex_coord; | ||
| 544 | out vec2 frag_tex_coord; | ||
| 545 | |||
| 546 | void main() { | ||
| 547 | // Multiply input position by the rotscale part of the matrix and then manually translate by | ||
| 548 | // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector | ||
| 549 | // to `vec3(vert_position.xy, 1.0)` | ||
| 550 | gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); | ||
| 551 | frag_tex_coord = vert_tex_coord; | ||
| 552 | } | ||
| 553 | )"; | ||
| 554 | |||
| 555 | static constexpr char fragment_shader[] = R"( | ||
| 556 | #version 150 core | ||
| 557 | |||
| 558 | in vec2 frag_tex_coord; | ||
| 559 | out vec4 color; | ||
| 560 | |||
| 561 | uniform sampler2D tex[32]; | ||
| 562 | |||
| 563 | void main() { | ||
| 564 | color = texture(tex[0], frag_tex_coord); | ||
| 565 | } | ||
| 566 | )"; | ||
| 567 | |||
| 568 | if (current_shader) { | ||
| 569 | return; | ||
| 570 | } | ||
| 571 | |||
| 572 | LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); | ||
| 573 | |||
| 574 | current_shader = &test_shader; | ||
| 575 | if (has_ARB_separate_shader_objects) { | ||
| 576 | test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); | ||
| 577 | glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); | ||
| 578 | } else { | ||
| 579 | UNREACHABLE(); | ||
| 580 | } | ||
| 581 | |||
| 582 | state.draw.shader_program = test_shader.shader.handle; | ||
| 583 | state.Apply(); | ||
| 584 | |||
| 585 | for (u32 texture = 0; texture < texture_samplers.size(); ++texture) { | ||
| 586 | // Set the texture samplers to correspond to different texture units | ||
| 587 | std::string uniform_name = "tex[" + std::to_string(texture) + "]"; | ||
| 588 | GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str()); | ||
| 589 | if (uniform_tex != -1) { | ||
| 590 | glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); | ||
| 591 | } | ||
| 592 | } | ||
| 593 | |||
| 594 | if (has_ARB_separate_shader_objects) { | ||
| 595 | state.draw.shader_program = 0; | ||
| 596 | state.Apply(); | ||
| 597 | } | ||
| 598 | } | ||
| 599 | |||
| 600 | void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, | 530 | void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, |
| 601 | const Surface& depth_surface, bool has_stencil) { | 531 | const Surface& depth_surface, bool has_stencil) { |
| 602 | state.draw.draw_framebuffer = framebuffer.handle; | 532 | state.draw.draw_framebuffer = framebuffer.handle; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d868bf421..71c21c69b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -15,10 +15,12 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/hash.h" | 16 | #include "common/hash.h" |
| 17 | #include "common/vector_math.h" | 17 | #include "common/vector_math.h" |
| 18 | #include "video_core/engines/maxwell_3d.h" | ||
| 18 | #include "video_core/rasterizer_interface.h" | 19 | #include "video_core/rasterizer_interface.h" |
| 19 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 20 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_state.h" | 24 | #include "video_core/renderer_opengl/gl_state.h" |
| 23 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 25 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 24 | 26 | ||
| @@ -45,7 +47,7 @@ public: | |||
| 45 | /// OpenGL shader generated for a given Maxwell register state | 47 | /// OpenGL shader generated for a given Maxwell register state |
| 46 | struct MaxwellShader { | 48 | struct MaxwellShader { |
| 47 | /// OpenGL shader resource | 49 | /// OpenGL shader resource |
| 48 | OGLShader shader; | 50 | OGLProgram shader; |
| 49 | }; | 51 | }; |
| 50 | 52 | ||
| 51 | struct VertexShader { | 53 | struct VertexShader { |
| @@ -56,34 +58,6 @@ public: | |||
| 56 | OGLShader shader; | 58 | OGLShader shader; |
| 57 | }; | 59 | }; |
| 58 | 60 | ||
| 59 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | ||
| 60 | // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at | ||
| 61 | // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. | ||
| 62 | // Not following that rule will cause problems on some AMD drivers. | ||
| 63 | struct UniformData {}; | ||
| 64 | |||
| 65 | // static_assert( | ||
| 66 | // sizeof(UniformData) == 0x460, | ||
| 67 | // "The size of the UniformData structure has changed, update the structure in the shader"); | ||
| 68 | static_assert(sizeof(UniformData) < 16384, | ||
| 69 | "UniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 70 | |||
| 71 | struct VSUniformData {}; | ||
| 72 | // static_assert( | ||
| 73 | // sizeof(VSUniformData) == 1856, | ||
| 74 | // "The size of the VSUniformData structure has changed, update the structure in the | ||
| 75 | // shader"); | ||
| 76 | static_assert(sizeof(VSUniformData) < 16384, | ||
| 77 | "VSUniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 78 | |||
| 79 | struct FSUniformData {}; | ||
| 80 | // static_assert( | ||
| 81 | // sizeof(FSUniformData) == 1856, | ||
| 82 | // "The size of the FSUniformData structure has changed, update the structure in the | ||
| 83 | // shader"); | ||
| 84 | static_assert(sizeof(FSUniformData) < 16384, | ||
| 85 | "FSUniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 86 | |||
| 87 | private: | 61 | private: |
| 88 | class SamplerInfo { | 62 | class SamplerInfo { |
| 89 | public: | 63 | public: |
| @@ -122,9 +96,6 @@ private: | |||
| 122 | /// Syncs the clip coefficients to match the guest state | 96 | /// Syncs the clip coefficients to match the guest state |
| 123 | void SyncClipCoef(); | 97 | void SyncClipCoef(); |
| 124 | 98 | ||
| 125 | /// Sets the OpenGL shader in accordance with the current guest state | ||
| 126 | void SetShader(); | ||
| 127 | |||
| 128 | /// Syncs the cull mode to match the guest state | 99 | /// Syncs the cull mode to match the guest state |
| 129 | void SyncCullMode(); | 100 | void SyncCullMode(); |
| 130 | 101 | ||
| @@ -152,23 +123,12 @@ private: | |||
| 152 | 123 | ||
| 153 | RasterizerCacheOpenGL res_cache; | 124 | RasterizerCacheOpenGL res_cache; |
| 154 | 125 | ||
| 155 | /// Shader used for test renderering - to be removed once we have emulated shaders | 126 | std::unique_ptr<GLShader::ProgramManager> shader_program_manager; |
| 156 | MaxwellShader test_shader{}; | ||
| 157 | |||
| 158 | const MaxwellShader* current_shader{}; | ||
| 159 | bool shader_dirty{}; | ||
| 160 | |||
| 161 | struct { | ||
| 162 | UniformData data; | ||
| 163 | bool dirty; | ||
| 164 | } uniform_block_data = {}; | ||
| 165 | |||
| 166 | OGLPipeline pipeline; | ||
| 167 | OGLVertexArray sw_vao; | 127 | OGLVertexArray sw_vao; |
| 168 | OGLVertexArray hw_vao; | 128 | OGLVertexArray hw_vao; |
| 169 | std::array<bool, 16> hw_vao_enabled_attributes; | 129 | std::array<bool, 16> hw_vao_enabled_attributes; |
| 170 | 130 | ||
| 171 | std::array<SamplerInfo, 32> texture_samplers; | 131 | std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; |
| 172 | static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; | 132 | static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; |
| 173 | std::unique_ptr<OGLStreamBuffer> vertex_buffer; | 133 | std::unique_ptr<OGLStreamBuffer> vertex_buffer; |
| 174 | OGLBuffer uniform_buffer; | 134 | OGLBuffer uniform_buffer; |
| @@ -182,19 +142,9 @@ private: | |||
| 182 | void AnalyzeVertexArray(bool is_indexed); | 142 | void AnalyzeVertexArray(bool is_indexed); |
| 183 | void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); | 143 | void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); |
| 184 | 144 | ||
| 185 | OGLBuffer vs_uniform_buffer; | 145 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; |
| 186 | std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map; | ||
| 187 | std::unordered_map<std::string, VertexShader> vs_shader_cache; | ||
| 188 | OGLShader vs_default_shader; | ||
| 189 | |||
| 190 | void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); | ||
| 191 | |||
| 192 | OGLBuffer fs_uniform_buffer; | ||
| 193 | std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map; | ||
| 194 | std::unordered_map<std::string, FragmentShader> fs_shader_cache; | ||
| 195 | OGLShader fs_default_shader; | ||
| 196 | 146 | ||
| 197 | void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); | 147 | void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); |
| 198 | 148 | ||
| 199 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 149 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 200 | AccelDraw accelerate_draw; | 150 | AccelDraw accelerate_draw; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5cbafa2e7..213b20a21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -818,7 +818,7 @@ void main() { | |||
| 818 | color = texelFetch(tbo, tbo_offset).rabg; | 818 | color = texelFetch(tbo, tbo_offset).rabg; |
| 819 | } | 819 | } |
| 820 | )"; | 820 | )"; |
| 821 | d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); | 821 | d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); |
| 822 | 822 | ||
| 823 | OpenGLState state = OpenGLState::GetCurState(); | 823 | OpenGLState state = OpenGLState::GetCurState(); |
| 824 | GLuint old_program = state.draw.shader_program; | 824 | GLuint old_program = state.draw.shader_program; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 06524fc59..e7ce506cf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -334,7 +334,7 @@ private: | |||
| 334 | OGLVertexArray attributeless_vao; | 334 | OGLVertexArray attributeless_vao; |
| 335 | OGLBuffer d24s8_abgr_buffer; | 335 | OGLBuffer d24s8_abgr_buffer; |
| 336 | GLsizeiptr d24s8_abgr_buffer_size; | 336 | GLsizeiptr d24s8_abgr_buffer_size; |
| 337 | OGLShader d24s8_abgr_shader; | 337 | OGLProgram d24s8_abgr_shader; |
| 338 | GLint d24s8_abgr_tbo_size_u_id; | 338 | GLint d24s8_abgr_tbo_size_u_id; |
| 339 | GLint d24s8_abgr_viewport_u_id; | 339 | GLint d24s8_abgr_viewport_u_id; |
| 340 | }; | 340 | }; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 7da5e74d1..2f0e7ac1a 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -13,14 +13,16 @@ | |||
| 13 | class OGLTexture : private NonCopyable { | 13 | class OGLTexture : private NonCopyable { |
| 14 | public: | 14 | public: |
| 15 | OGLTexture() = default; | 15 | OGLTexture() = default; |
| 16 | OGLTexture(OGLTexture&& o) { | 16 | |
| 17 | std::swap(handle, o.handle); | 17 | OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {} |
| 18 | } | 18 | |
| 19 | ~OGLTexture() { | 19 | ~OGLTexture() { |
| 20 | Release(); | 20 | Release(); |
| 21 | } | 21 | } |
| 22 | |||
| 22 | OGLTexture& operator=(OGLTexture&& o) { | 23 | OGLTexture& operator=(OGLTexture&& o) { |
| 23 | std::swap(handle, o.handle); | 24 | Release(); |
| 25 | handle = std::exchange(o.handle, 0); | ||
| 24 | return *this; | 26 | return *this; |
| 25 | } | 27 | } |
| 26 | 28 | ||
| @@ -46,14 +48,16 @@ public: | |||
| 46 | class OGLSampler : private NonCopyable { | 48 | class OGLSampler : private NonCopyable { |
| 47 | public: | 49 | public: |
| 48 | OGLSampler() = default; | 50 | OGLSampler() = default; |
| 49 | OGLSampler(OGLSampler&& o) { | 51 | |
| 50 | std::swap(handle, o.handle); | 52 | OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {} |
| 51 | } | 53 | |
| 52 | ~OGLSampler() { | 54 | ~OGLSampler() { |
| 53 | Release(); | 55 | Release(); |
| 54 | } | 56 | } |
| 57 | |||
| 55 | OGLSampler& operator=(OGLSampler&& o) { | 58 | OGLSampler& operator=(OGLSampler&& o) { |
| 56 | std::swap(handle, o.handle); | 59 | Release(); |
| 60 | handle = std::exchange(o.handle, 0); | ||
| 57 | return *this; | 61 | return *this; |
| 58 | } | 62 | } |
| 59 | 63 | ||
| @@ -79,25 +83,71 @@ public: | |||
| 79 | class OGLShader : private NonCopyable { | 83 | class OGLShader : private NonCopyable { |
| 80 | public: | 84 | public: |
| 81 | OGLShader() = default; | 85 | OGLShader() = default; |
| 82 | OGLShader(OGLShader&& o) { | 86 | |
| 83 | std::swap(handle, o.handle); | 87 | OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {} |
| 84 | } | 88 | |
| 85 | ~OGLShader() { | 89 | ~OGLShader() { |
| 86 | Release(); | 90 | Release(); |
| 87 | } | 91 | } |
| 92 | |||
| 88 | OGLShader& operator=(OGLShader&& o) { | 93 | OGLShader& operator=(OGLShader&& o) { |
| 89 | std::swap(handle, o.handle); | 94 | Release(); |
| 95 | handle = std::exchange(o.handle, 0); | ||
| 90 | return *this; | 96 | return *this; |
| 91 | } | 97 | } |
| 92 | 98 | ||
| 93 | /// Creates a new internal OpenGL resource and stores the handle | 99 | void Create(const char* source, GLenum type) { |
| 94 | void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, | 100 | if (handle != 0) |
| 95 | const std::vector<const char*>& feedback_vars = {}, | 101 | return; |
| 96 | bool separable_program = false) { | 102 | if (source == nullptr) |
| 103 | return; | ||
| 104 | handle = GLShader::LoadShader(source, type); | ||
| 105 | } | ||
| 106 | |||
| 107 | void Release() { | ||
| 108 | if (handle == 0) | ||
| 109 | return; | ||
| 110 | glDeleteShader(handle); | ||
| 111 | handle = 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | GLuint handle = 0; | ||
| 115 | }; | ||
| 116 | |||
| 117 | class OGLProgram : private NonCopyable { | ||
| 118 | public: | ||
| 119 | OGLProgram() = default; | ||
| 120 | |||
| 121 | OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {} | ||
| 122 | |||
| 123 | ~OGLProgram() { | ||
| 124 | Release(); | ||
| 125 | } | ||
| 126 | |||
| 127 | OGLProgram& operator=(OGLProgram&& o) { | ||
| 128 | Release(); | ||
| 129 | handle = std::exchange(o.handle, 0); | ||
| 130 | return *this; | ||
| 131 | } | ||
| 132 | |||
| 133 | template <typename... T> | ||
| 134 | void Create(bool separable_program, T... shaders) { | ||
| 97 | if (handle != 0) | 135 | if (handle != 0) |
| 98 | return; | 136 | return; |
| 99 | handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, | 137 | handle = GLShader::LoadProgram(separable_program, shaders...); |
| 100 | separable_program); | 138 | } |
| 139 | |||
| 140 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 141 | void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, | ||
| 142 | bool separable_program = false) { | ||
| 143 | OGLShader vert, geo, frag; | ||
| 144 | if (vert_shader) | ||
| 145 | vert.Create(vert_shader, GL_VERTEX_SHADER); | ||
| 146 | if (geo_shader) | ||
| 147 | geo.Create(geo_shader, GL_GEOMETRY_SHADER); | ||
| 148 | if (frag_shader) | ||
| 149 | frag.Create(frag_shader, GL_FRAGMENT_SHADER); | ||
| 150 | Create(separable_program, vert.handle, geo.handle, frag.handle); | ||
| 101 | } | 151 | } |
| 102 | 152 | ||
| 103 | /// Deletes the internal OpenGL resource | 153 | /// Deletes the internal OpenGL resource |
| @@ -148,14 +198,16 @@ public: | |||
| 148 | class OGLBuffer : private NonCopyable { | 198 | class OGLBuffer : private NonCopyable { |
| 149 | public: | 199 | public: |
| 150 | OGLBuffer() = default; | 200 | OGLBuffer() = default; |
| 151 | OGLBuffer(OGLBuffer&& o) { | 201 | |
| 152 | std::swap(handle, o.handle); | 202 | OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {} |
| 153 | } | 203 | |
| 154 | ~OGLBuffer() { | 204 | ~OGLBuffer() { |
| 155 | Release(); | 205 | Release(); |
| 156 | } | 206 | } |
| 207 | |||
| 157 | OGLBuffer& operator=(OGLBuffer&& o) { | 208 | OGLBuffer& operator=(OGLBuffer&& o) { |
| 158 | std::swap(handle, o.handle); | 209 | Release(); |
| 210 | handle = std::exchange(o.handle, 0); | ||
| 159 | return *this; | 211 | return *this; |
| 160 | } | 212 | } |
| 161 | 213 | ||
| @@ -214,14 +266,16 @@ public: | |||
| 214 | class OGLVertexArray : private NonCopyable { | 266 | class OGLVertexArray : private NonCopyable { |
| 215 | public: | 267 | public: |
| 216 | OGLVertexArray() = default; | 268 | OGLVertexArray() = default; |
| 217 | OGLVertexArray(OGLVertexArray&& o) { | 269 | |
| 218 | std::swap(handle, o.handle); | 270 | OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {} |
| 219 | } | 271 | |
| 220 | ~OGLVertexArray() { | 272 | ~OGLVertexArray() { |
| 221 | Release(); | 273 | Release(); |
| 222 | } | 274 | } |
| 275 | |||
| 223 | OGLVertexArray& operator=(OGLVertexArray&& o) { | 276 | OGLVertexArray& operator=(OGLVertexArray&& o) { |
| 224 | std::swap(handle, o.handle); | 277 | Release(); |
| 278 | handle = std::exchange(o.handle, 0); | ||
| 225 | return *this; | 279 | return *this; |
| 226 | } | 280 | } |
| 227 | 281 | ||
| @@ -247,14 +301,16 @@ public: | |||
| 247 | class OGLFramebuffer : private NonCopyable { | 301 | class OGLFramebuffer : private NonCopyable { |
| 248 | public: | 302 | public: |
| 249 | OGLFramebuffer() = default; | 303 | OGLFramebuffer() = default; |
| 250 | OGLFramebuffer(OGLFramebuffer&& o) { | 304 | |
| 251 | std::swap(handle, o.handle); | 305 | OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {} |
| 252 | } | 306 | |
| 253 | ~OGLFramebuffer() { | 307 | ~OGLFramebuffer() { |
| 254 | Release(); | 308 | Release(); |
| 255 | } | 309 | } |
| 310 | |||
| 256 | OGLFramebuffer& operator=(OGLFramebuffer&& o) { | 311 | OGLFramebuffer& operator=(OGLFramebuffer&& o) { |
| 257 | std::swap(handle, o.handle); | 312 | Release(); |
| 313 | handle = std::exchange(o.handle, 0); | ||
| 258 | return *this; | 314 | return *this; |
| 259 | } | 315 | } |
| 260 | 316 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 564ea8f9e..1290fa4cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -2,57 +2,499 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <map> | ||
| 6 | #include <set> | ||
| 5 | #include <string> | 7 | #include <string> |
| 6 | #include <queue> | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 11 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 10 | 12 | ||
| 11 | namespace Maxwell3D { | 13 | namespace GLShader { |
| 12 | namespace Shader { | ||
| 13 | namespace Decompiler { | 14 | namespace Decompiler { |
| 14 | 15 | ||
| 16 | using Tegra::Shader::Attribute; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::Register; | ||
| 20 | using Tegra::Shader::SubOp; | ||
| 21 | using Tegra::Shader::Uniform; | ||
| 22 | |||
| 15 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | 23 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; |
| 16 | 24 | ||
| 17 | class Impl { | 25 | class DecompileFail : public std::runtime_error { |
| 18 | public: | 26 | public: |
| 19 | Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | 27 | using std::runtime_error::runtime_error; |
| 20 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, | 28 | }; |
| 21 | const std::function<std::string(u32)>& inputreg_getter, | 29 | |
| 22 | const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, | 30 | /// Describes the behaviour of code path of a given entry point and a return point. |
| 23 | const std::string& emit_cb, const std::string& setemit_cb) | 31 | enum class ExitMethod { |
| 24 | : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), | 32 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. |
| 25 | inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), | 33 | AlwaysReturn, ///< All code paths reach the return point. |
| 26 | sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} | 34 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. |
| 27 | 35 | AlwaysEnd, ///< All code paths reach a END instruction. | |
| 28 | std::string Decompile() { | 36 | }; |
| 29 | UNREACHABLE(); | 37 | |
| 30 | return {}; | 38 | /// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. |
| 39 | struct Subroutine { | ||
| 40 | /// Generates a name suitable for GLSL source code. | ||
| 41 | std::string GetName() const { | ||
| 42 | return "sub_" + std::to_string(begin) + "_" + std::to_string(end); | ||
| 43 | } | ||
| 44 | |||
| 45 | u32 begin; ///< Entry point of the subroutine. | ||
| 46 | u32 end; ///< Return point of the subroutine. | ||
| 47 | ExitMethod exit_method; ///< Exit method of the subroutine. | ||
| 48 | std::set<u32> labels; ///< Addresses refereced by JMP instructions. | ||
| 49 | |||
| 50 | bool operator<(const Subroutine& rhs) const { | ||
| 51 | return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | |||
| 55 | /// Analyzes shader code and produces a set of subroutines. | ||
| 56 | class ControlFlowAnalyzer { | ||
| 57 | public: | ||
| 58 | ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) | ||
| 59 | : program_code(program_code) { | ||
| 60 | |||
| 61 | // Recursively finds all subroutines. | ||
| 62 | const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); | ||
| 63 | if (program_main.exit_method != ExitMethod::AlwaysEnd) | ||
| 64 | throw DecompileFail("Program does not always end"); | ||
| 65 | } | ||
| 66 | |||
| 67 | std::set<Subroutine> GetSubroutines() { | ||
| 68 | return std::move(subroutines); | ||
| 31 | } | 69 | } |
| 32 | 70 | ||
| 33 | private: | 71 | private: |
| 34 | const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; | 72 | const ProgramCode& program_code; |
| 35 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; | 73 | std::set<Subroutine> subroutines; |
| 36 | u32 main_offset; | 74 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; |
| 37 | const std::function<std::string(u32)>& inputreg_getter; | 75 | |
| 38 | const std::function<std::string(u32)>& outputreg_getter; | 76 | /// Adds and analyzes a new subroutine if it is not added yet. |
| 39 | bool sanitize_mul; | 77 | const Subroutine& AddSubroutine(u32 begin, u32 end) { |
| 40 | const std::string& emit_cb; | 78 | auto iter = subroutines.find(Subroutine{begin, end}); |
| 41 | const std::string& setemit_cb; | 79 | if (iter != subroutines.end()) |
| 80 | return *iter; | ||
| 81 | |||
| 82 | Subroutine subroutine{begin, end}; | ||
| 83 | subroutine.exit_method = Scan(begin, end, subroutine.labels); | ||
| 84 | if (subroutine.exit_method == ExitMethod::Undetermined) | ||
| 85 | throw DecompileFail("Recursive function detected"); | ||
| 86 | return *subroutines.insert(std::move(subroutine)).first; | ||
| 87 | } | ||
| 88 | |||
| 89 | /// Scans a range of code for labels and determines the exit method. | ||
| 90 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||
| 91 | auto [iter, inserted] = | ||
| 92 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||
| 93 | ExitMethod& exit_method = iter->second; | ||
| 94 | if (!inserted) | ||
| 95 | return exit_method; | ||
| 96 | |||
| 97 | for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { | ||
| 98 | const Instruction instr = {program_code[offset]}; | ||
| 99 | switch (instr.opcode.EffectiveOpCode()) { | ||
| 100 | case OpCode::Id::EXIT: { | ||
| 101 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | } | ||
| 105 | return exit_method = ExitMethod::AlwaysReturn; | ||
| 106 | } | ||
| 42 | }; | 107 | }; |
| 43 | 108 | ||
| 44 | std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | 109 | class ShaderWriter { |
| 45 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, | 110 | public: |
| 46 | u32 main_offset, | 111 | void AddLine(const std::string& text) { |
| 47 | const std::function<std::string(u32)>& inputreg_getter, | 112 | DEBUG_ASSERT(scope >= 0); |
| 48 | const std::function<std::string(u32)>& outputreg_getter, | 113 | if (!text.empty()) { |
| 49 | bool sanitize_mul, const std::string& emit_cb, | 114 | shader_source += std::string(static_cast<size_t>(scope) * 4, ' '); |
| 50 | const std::string& setemit_cb) { | 115 | } |
| 51 | Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, | 116 | shader_source += text + '\n'; |
| 52 | sanitize_mul, emit_cb, setemit_cb); | 117 | } |
| 53 | return impl.Decompile(); | 118 | |
| 119 | std::string GetResult() { | ||
| 120 | return std::move(shader_source); | ||
| 121 | } | ||
| 122 | |||
| 123 | int scope = 0; | ||
| 124 | |||
| 125 | private: | ||
| 126 | std::string shader_source; | ||
| 127 | }; | ||
| 128 | |||
| 129 | class GLSLGenerator { | ||
| 130 | public: | ||
| 131 | GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, | ||
| 132 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage) | ||
| 133 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | ||
| 134 | stage(stage) { | ||
| 135 | |||
| 136 | Generate(); | ||
| 137 | } | ||
| 138 | |||
| 139 | std::string GetShaderCode() { | ||
| 140 | return declarations.GetResult() + shader.GetResult(); | ||
| 141 | } | ||
| 142 | |||
| 143 | private: | ||
| 144 | /// Gets the Subroutine object corresponding to the specified address. | ||
| 145 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { | ||
| 146 | auto iter = subroutines.find(Subroutine{begin, end}); | ||
| 147 | ASSERT(iter != subroutines.end()); | ||
| 148 | return *iter; | ||
| 149 | } | ||
| 150 | |||
| 151 | /// Generates code representing an input attribute register. | ||
| 152 | std::string GetInputAttribute(Attribute::Index attribute) { | ||
| 153 | declr_input_attribute.insert(attribute); | ||
| 154 | |||
| 155 | const u32 index{static_cast<u32>(attribute) - | ||
| 156 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 157 | if (attribute >= Attribute::Index::Attribute_0) { | ||
| 158 | return "input_attribute_" + std::to_string(index); | ||
| 159 | } | ||
| 160 | |||
| 161 | LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); | ||
| 162 | UNREACHABLE(); | ||
| 163 | } | ||
| 164 | |||
| 165 | /// Generates code representing an output attribute register. | ||
| 166 | std::string GetOutputAttribute(Attribute::Index attribute) { | ||
| 167 | switch (attribute) { | ||
| 168 | case Attribute::Index::Position: | ||
| 169 | return "gl_Position"; | ||
| 170 | default: | ||
| 171 | const u32 index{static_cast<u32>(attribute) - | ||
| 172 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 173 | if (attribute >= Attribute::Index::Attribute_0) { | ||
| 174 | declr_output_attribute.insert(attribute); | ||
| 175 | return "output_attribute_" + std::to_string(index); | ||
| 176 | } | ||
| 177 | |||
| 178 | LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); | ||
| 179 | UNREACHABLE(); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | /// Generates code representing a temporary (GPR) register. | ||
| 184 | std::string GetRegister(const Register& reg) { | ||
| 185 | return *declr_register.insert("register_" + std::to_string(reg)).first; | ||
| 186 | } | ||
| 187 | |||
| 188 | /// Generates code representing a uniform (C buffer) register. | ||
| 189 | std::string GetUniform(const Uniform& reg) const { | ||
| 190 | std::string index = std::to_string(reg.index); | ||
| 191 | return "uniform_" + index + "[" + std::to_string(reg.offset >> 2) + "][" + | ||
| 192 | std::to_string(reg.offset & 3) + "]"; | ||
| 193 | } | ||
| 194 | |||
| 195 | /** | ||
| 196 | * Adds code that calls a subroutine. | ||
| 197 | * @param subroutine the subroutine to call. | ||
| 198 | */ | ||
| 199 | void CallSubroutine(const Subroutine& subroutine) { | ||
| 200 | if (subroutine.exit_method == ExitMethod::AlwaysEnd) { | ||
| 201 | shader.AddLine(subroutine.GetName() + "();"); | ||
| 202 | shader.AddLine("return true;"); | ||
| 203 | } else if (subroutine.exit_method == ExitMethod::Conditional) { | ||
| 204 | shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); | ||
| 205 | } else { | ||
| 206 | shader.AddLine(subroutine.GetName() + "();"); | ||
| 207 | } | ||
| 208 | } | ||
| 209 | |||
| 210 | /** | ||
| 211 | * Writes code that does an assignment operation. | ||
| 212 | * @param reg the destination register code. | ||
| 213 | * @param value the code representing the value to assign. | ||
| 214 | */ | ||
| 215 | void SetDest(u64 elem, const std::string& reg, const std::string& value, | ||
| 216 | u64 dest_num_components, u64 value_num_components) { | ||
| 217 | std::string swizzle = "."; | ||
| 218 | swizzle += "xyzw"[elem]; | ||
| 219 | |||
| 220 | std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); | ||
| 221 | std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); | ||
| 222 | |||
| 223 | shader.AddLine(dest + " = " + src + ";"); | ||
| 224 | } | ||
| 225 | |||
| 226 | /** | ||
| 227 | * Compiles a single instruction from Tegra to GLSL. | ||
| 228 | * @param offset the offset of the Tegra shader instruction. | ||
| 229 | * @return the offset of the next instruction to execute. Usually it is the current offset | ||
| 230 | * + 1. If the current instruction always terminates the program, returns PROGRAM_END. | ||
| 231 | */ | ||
| 232 | u32 CompileInstr(u32 offset) { | ||
| 233 | const Instruction instr = {program_code[offset]}; | ||
| 234 | |||
| 235 | shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name); | ||
| 236 | |||
| 237 | switch (OpCode::GetInfo(instr.opcode).type) { | ||
| 238 | case OpCode::Type::Arithmetic: { | ||
| 239 | ASSERT(!instr.alu.abs_d); | ||
| 240 | |||
| 241 | std::string dest = GetRegister(instr.gpr0); | ||
| 242 | std::string op_a = instr.alu.negate_a ? "-" : ""; | ||
| 243 | op_a += GetRegister(instr.gpr8); | ||
| 244 | if (instr.alu.abs_a) { | ||
| 245 | op_a = "abs(" + op_a + ")"; | ||
| 246 | } | ||
| 247 | |||
| 248 | std::string op_b = instr.alu.negate_b ? "-" : ""; | ||
| 249 | if (instr.is_b_gpr) { | ||
| 250 | op_b += GetRegister(instr.gpr20); | ||
| 251 | } else { | ||
| 252 | op_b += GetUniform(instr.uniform); | ||
| 253 | } | ||
| 254 | if (instr.alu.abs_b) { | ||
| 255 | op_b = "abs(" + op_b + ")"; | ||
| 256 | } | ||
| 257 | |||
| 258 | switch (instr.opcode.EffectiveOpCode()) { | ||
| 259 | case OpCode::Id::FMUL_C: | ||
| 260 | case OpCode::Id::FMUL_R: { | ||
| 261 | SetDest(0, dest, op_a + " * " + op_b, 1, 1); | ||
| 262 | break; | ||
| 263 | } | ||
| 264 | case OpCode::Id::FADD_C: | ||
| 265 | case OpCode::Id::FADD_R: { | ||
| 266 | SetDest(0, dest, op_a + " + " + op_b, 1, 1); | ||
| 267 | break; | ||
| 268 | } | ||
| 269 | default: { | ||
| 270 | LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | ||
| 271 | static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||
| 272 | OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||
| 273 | throw DecompileFail("Unhandled instruction"); | ||
| 274 | break; | ||
| 275 | } | ||
| 276 | } | ||
| 277 | break; | ||
| 278 | } | ||
| 279 | case OpCode::Type::Ffma: { | ||
| 280 | ASSERT_MSG(!instr.ffma.negate_b, "untested"); | ||
| 281 | ASSERT_MSG(!instr.ffma.negate_c, "untested"); | ||
| 282 | |||
| 283 | std::string dest = GetRegister(instr.gpr0); | ||
| 284 | std::string op_a = GetRegister(instr.gpr8); | ||
| 285 | |||
| 286 | std::string op_b = instr.ffma.negate_b ? "-" : ""; | ||
| 287 | op_b += GetUniform(instr.uniform); | ||
| 288 | |||
| 289 | std::string op_c = instr.ffma.negate_c ? "-" : ""; | ||
| 290 | op_c += GetRegister(instr.gpr39); | ||
| 291 | |||
| 292 | switch (instr.opcode.EffectiveOpCode()) { | ||
| 293 | case OpCode::Id::FFMA_CR: { | ||
| 294 | SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); | ||
| 295 | break; | ||
| 296 | } | ||
| 297 | |||
| 298 | default: { | ||
| 299 | LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", | ||
| 300 | static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||
| 301 | OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||
| 302 | throw DecompileFail("Unhandled instruction"); | ||
| 303 | break; | ||
| 304 | } | ||
| 305 | } | ||
| 306 | break; | ||
| 307 | } | ||
| 308 | case OpCode::Type::Memory: { | ||
| 309 | std::string gpr0 = GetRegister(instr.gpr0); | ||
| 310 | const Attribute::Index attribute = instr.attribute.fmt20.index; | ||
| 311 | |||
| 312 | switch (instr.opcode.EffectiveOpCode()) { | ||
| 313 | case OpCode::Id::LD_A: { | ||
| 314 | ASSERT(instr.attribute.fmt20.size == 0); | ||
| 315 | SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); | ||
| 316 | break; | ||
| 317 | } | ||
| 318 | case OpCode::Id::ST_A: { | ||
| 319 | ASSERT(instr.attribute.fmt20.size == 0); | ||
| 320 | SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); | ||
| 321 | break; | ||
| 322 | } | ||
| 323 | default: { | ||
| 324 | LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", | ||
| 325 | static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||
| 326 | OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||
| 327 | throw DecompileFail("Unhandled instruction"); | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | } | ||
| 331 | break; | ||
| 332 | } | ||
| 333 | |||
| 334 | default: { | ||
| 335 | switch (instr.opcode.EffectiveOpCode()) { | ||
| 336 | case OpCode::Id::EXIT: { | ||
| 337 | shader.AddLine("return true;"); | ||
| 338 | offset = PROGRAM_END - 1; | ||
| 339 | break; | ||
| 340 | } | ||
| 341 | |||
| 342 | default: { | ||
| 343 | LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||
| 344 | static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||
| 345 | OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||
| 346 | throw DecompileFail("Unhandled instruction"); | ||
| 347 | break; | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | break; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | |||
| 355 | return offset + 1; | ||
| 356 | } | ||
| 357 | |||
| 358 | /** | ||
| 359 | * Compiles a range of instructions from Tegra to GLSL. | ||
| 360 | * @param begin the offset of the starting instruction. | ||
| 361 | * @param end the offset where the compilation should stop (exclusive). | ||
| 362 | * @return the offset of the next instruction to compile. PROGRAM_END if the program | ||
| 363 | * terminates. | ||
| 364 | */ | ||
| 365 | u32 CompileRange(u32 begin, u32 end) { | ||
| 366 | u32 program_counter; | ||
| 367 | for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { | ||
| 368 | program_counter = CompileInstr(program_counter); | ||
| 369 | } | ||
| 370 | return program_counter; | ||
| 371 | } | ||
| 372 | |||
| 373 | void Generate() { | ||
| 374 | // Add declarations for all subroutines | ||
| 375 | for (const auto& subroutine : subroutines) { | ||
| 376 | shader.AddLine("bool " + subroutine.GetName() + "();"); | ||
| 377 | } | ||
| 378 | shader.AddLine(""); | ||
| 379 | |||
| 380 | // Add the main entry point | ||
| 381 | shader.AddLine("bool exec_shader() {"); | ||
| 382 | ++shader.scope; | ||
| 383 | CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); | ||
| 384 | --shader.scope; | ||
| 385 | shader.AddLine("}\n"); | ||
| 386 | |||
| 387 | // Add definitions for all subroutines | ||
| 388 | for (const auto& subroutine : subroutines) { | ||
| 389 | std::set<u32> labels = subroutine.labels; | ||
| 390 | |||
| 391 | shader.AddLine("bool " + subroutine.GetName() + "() {"); | ||
| 392 | ++shader.scope; | ||
| 393 | |||
| 394 | if (labels.empty()) { | ||
| 395 | if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { | ||
| 396 | shader.AddLine("return false;"); | ||
| 397 | } | ||
| 398 | } else { | ||
| 399 | labels.insert(subroutine.begin); | ||
| 400 | shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); | ||
| 401 | shader.AddLine("while (true) {"); | ||
| 402 | ++shader.scope; | ||
| 403 | |||
| 404 | shader.AddLine("switch (jmp_to) {"); | ||
| 405 | |||
| 406 | for (auto label : labels) { | ||
| 407 | shader.AddLine("case " + std::to_string(label) + "u: {"); | ||
| 408 | ++shader.scope; | ||
| 409 | |||
| 410 | auto next_it = labels.lower_bound(label + 1); | ||
| 411 | u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; | ||
| 412 | |||
| 413 | u32 compile_end = CompileRange(label, next_label); | ||
| 414 | if (compile_end > next_label && compile_end != PROGRAM_END) { | ||
| 415 | // This happens only when there is a label inside a IF/LOOP block | ||
| 416 | shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); | ||
| 417 | labels.emplace(compile_end); | ||
| 418 | } | ||
| 419 | |||
| 420 | --shader.scope; | ||
| 421 | shader.AddLine("}"); | ||
| 422 | } | ||
| 423 | |||
| 424 | shader.AddLine("default: return false;"); | ||
| 425 | shader.AddLine("}"); | ||
| 426 | |||
| 427 | --shader.scope; | ||
| 428 | shader.AddLine("}"); | ||
| 429 | |||
| 430 | shader.AddLine("return false;"); | ||
| 431 | } | ||
| 432 | |||
| 433 | --shader.scope; | ||
| 434 | shader.AddLine("}\n"); | ||
| 435 | |||
| 436 | DEBUG_ASSERT(shader.scope == 0); | ||
| 437 | } | ||
| 438 | |||
| 439 | GenerateDeclarations(); | ||
| 440 | } | ||
| 441 | |||
| 442 | /// Add declarations for registers | ||
| 443 | void GenerateDeclarations() { | ||
| 444 | for (const auto& reg : declr_register) { | ||
| 445 | declarations.AddLine("float " + reg + " = 0.0;"); | ||
| 446 | } | ||
| 447 | declarations.AddLine(""); | ||
| 448 | |||
| 449 | for (const auto& index : declr_input_attribute) { | ||
| 450 | // TODO(bunnei): Use proper number of elements for these | ||
| 451 | declarations.AddLine("layout(location = " + | ||
| 452 | std::to_string(static_cast<u32>(index) - | ||
| 453 | static_cast<u32>(Attribute::Index::Attribute_0)) + | ||
| 454 | ") in vec4 " + GetInputAttribute(index) + ";"); | ||
| 455 | } | ||
| 456 | declarations.AddLine(""); | ||
| 457 | |||
| 458 | for (const auto& index : declr_output_attribute) { | ||
| 459 | // TODO(bunnei): Use proper number of elements for these | ||
| 460 | declarations.AddLine("layout(location = " + | ||
| 461 | std::to_string(static_cast<u32>(index) - | ||
| 462 | static_cast<u32>(Attribute::Index::Attribute_0)) + | ||
| 463 | ") out vec4 " + GetOutputAttribute(index) + ";"); | ||
| 464 | } | ||
| 465 | declarations.AddLine(""); | ||
| 466 | } | ||
| 467 | |||
| 468 | private: | ||
| 469 | const std::set<Subroutine>& subroutines; | ||
| 470 | const ProgramCode& program_code; | ||
| 471 | const u32 main_offset; | ||
| 472 | Maxwell3D::Regs::ShaderStage stage; | ||
| 473 | |||
| 474 | ShaderWriter shader; | ||
| 475 | ShaderWriter declarations; | ||
| 476 | |||
| 477 | // Declarations | ||
| 478 | std::set<std::string> declr_register; | ||
| 479 | std::set<Attribute::Index> declr_input_attribute; | ||
| 480 | std::set<Attribute::Index> declr_output_attribute; | ||
| 481 | }; // namespace Decompiler | ||
| 482 | |||
| 483 | std::string GetCommonDeclarations() { | ||
| 484 | return "bool exec_shader();"; | ||
| 485 | } | ||
| 486 | |||
| 487 | boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | ||
| 488 | Maxwell3D::Regs::ShaderStage stage) { | ||
| 489 | try { | ||
| 490 | auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); | ||
| 491 | GLSLGenerator generator(subroutines, program_code, main_offset, stage); | ||
| 492 | return generator.GetShaderCode(); | ||
| 493 | } catch (const DecompileFail& exception) { | ||
| 494 | LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); | ||
| 495 | } | ||
| 496 | return boost::none; | ||
| 54 | } | 497 | } |
| 55 | 498 | ||
| 56 | } // namespace Decompiler | 499 | } // namespace Decompiler |
| 57 | } // namespace Shader | 500 | } // namespace GLShader |
| 58 | } // namespace Maxwell3D | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02ebfcbe8..2f4047d87 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -5,23 +5,20 @@ | |||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <functional> | 6 | #include <functional> |
| 7 | #include <string> | 7 | #include <string> |
| 8 | #include <boost/optional.hpp> | ||
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 9 | 12 | ||
| 10 | namespace Maxwell3D { | 13 | namespace GLShader { |
| 11 | namespace Shader { | ||
| 12 | namespace Decompiler { | 14 | namespace Decompiler { |
| 13 | 15 | ||
| 14 | constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; | 16 | using Tegra::Engines::Maxwell3D; |
| 15 | constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; | ||
| 16 | 17 | ||
| 17 | std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | 18 | std::string GetCommonDeclarations(); |
| 18 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, | 19 | |
| 19 | u32 main_offset, | 20 | boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, |
| 20 | const std::function<std::string(u32)>& inputreg_getter, | 21 | Maxwell3D::Regs::ShaderStage stage); |
| 21 | const std::function<std::string(u32)>& outputreg_getter, | ||
| 22 | bool sanitize_mul, const std::string& emit_cb = "", | ||
| 23 | const std::string& setemit_cb = ""); | ||
| 24 | 22 | ||
| 25 | } // namespace Decompiler | 23 | } // namespace Decompiler |
| 26 | } // namespace Shader | 24 | } // namespace GLShader |
| 27 | } // namespace Maxwell3D | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8f3c98800..524c2cfb5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -7,12 +7,12 @@ | |||
| 7 | 7 | ||
| 8 | namespace GLShader { | 8 | namespace GLShader { |
| 9 | 9 | ||
| 10 | std::string GenerateVertexShader(const MaxwellVSConfig& config) { | 10 | std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { |
| 11 | UNREACHABLE(); | 11 | UNREACHABLE(); |
| 12 | return {}; | 12 | return {}; |
| 13 | } | 13 | } |
| 14 | 14 | ||
| 15 | std::string GenerateFragmentShader(const MaxwellFSConfig& config) { | 15 | std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { |
| 16 | UNREACHABLE(); | 16 | UNREACHABLE(); |
| 17 | return {}; | 17 | return {}; |
| 18 | } | 18 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 5101e7d30..925e66ee4 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -4,46 +4,67 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstring> | 7 | #include <array> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <type_traits> | 9 | #include <type_traits> |
| 10 | #include "common/common_types.h" | ||
| 10 | #include "common/hash.h" | 11 | #include "common/hash.h" |
| 11 | 12 | ||
| 12 | namespace GLShader { | 13 | namespace GLShader { |
| 13 | 14 | ||
| 14 | enum Attributes { | 15 | constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; |
| 15 | ATTRIBUTE_POSITION, | ||
| 16 | ATTRIBUTE_COLOR, | ||
| 17 | ATTRIBUTE_TEXCOORD0, | ||
| 18 | ATTRIBUTE_TEXCOORD1, | ||
| 19 | ATTRIBUTE_TEXCOORD2, | ||
| 20 | ATTRIBUTE_TEXCOORD0_W, | ||
| 21 | ATTRIBUTE_NORMQUAT, | ||
| 22 | ATTRIBUTE_VIEW, | ||
| 23 | }; | ||
| 24 | 16 | ||
| 25 | struct MaxwellShaderConfigCommon { | 17 | using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>; |
| 26 | explicit MaxwellShaderConfigCommon(){}; | 18 | |
| 19 | struct ShaderSetup { | ||
| 20 | ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} | ||
| 21 | |||
| 22 | ProgramCode program_code; | ||
| 23 | bool program_code_hash_dirty = true; | ||
| 24 | |||
| 25 | u64 GetProgramCodeHash() { | ||
| 26 | if (program_code_hash_dirty) { | ||
| 27 | program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); | ||
| 28 | program_code_hash_dirty = false; | ||
| 29 | } | ||
| 30 | return program_code_hash; | ||
| 31 | } | ||
| 32 | |||
| 33 | private: | ||
| 34 | u64 program_code_hash{}; | ||
| 27 | }; | 35 | }; |
| 28 | 36 | ||
| 29 | struct MaxwellVSConfig : MaxwellShaderConfigCommon { | 37 | struct MaxwellShaderConfigCommon { |
| 30 | explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} | 38 | void Init(ShaderSetup& setup) { |
| 39 | program_hash = setup.GetProgramCodeHash(); | ||
| 40 | } | ||
| 31 | 41 | ||
| 32 | bool operator==(const MaxwellVSConfig& o) const { | 42 | u64 program_hash; |
| 33 | return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; | ||
| 34 | }; | ||
| 35 | }; | 43 | }; |
| 36 | 44 | ||
| 37 | struct MaxwellFSConfig : MaxwellShaderConfigCommon { | 45 | struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { |
| 38 | explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} | 46 | explicit MaxwellVSConfig(ShaderSetup& setup) { |
| 47 | state.Init(setup); | ||
| 48 | } | ||
| 49 | }; | ||
| 39 | 50 | ||
| 40 | bool operator==(const MaxwellFSConfig& o) const { | 51 | struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { |
| 41 | return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; | 52 | explicit MaxwellFSConfig(ShaderSetup& setup) { |
| 42 | }; | 53 | state.Init(setup); |
| 54 | } | ||
| 43 | }; | 55 | }; |
| 44 | 56 | ||
| 45 | std::string GenerateVertexShader(const MaxwellVSConfig& config); | 57 | /** |
| 46 | std::string GenerateFragmentShader(const MaxwellFSConfig& config); | 58 | * Generates the GLSL vertex shader program source code for the given VS program |
| 59 | * @returns String of the shader source code | ||
| 60 | */ | ||
| 61 | std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); | ||
| 62 | |||
| 63 | /** | ||
| 64 | * Generates the GLSL fragment shader program source code for the given FS program | ||
| 65 | * @returns String of the shader source code | ||
| 66 | */ | ||
| 67 | std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); | ||
| 47 | 68 | ||
| 48 | } // namespace GLShader | 69 | } // namespace GLShader |
| 49 | 70 | ||
| @@ -52,14 +73,14 @@ namespace std { | |||
| 52 | template <> | 73 | template <> |
| 53 | struct hash<GLShader::MaxwellVSConfig> { | 74 | struct hash<GLShader::MaxwellVSConfig> { |
| 54 | size_t operator()(const GLShader::MaxwellVSConfig& k) const { | 75 | size_t operator()(const GLShader::MaxwellVSConfig& k) const { |
| 55 | return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); | 76 | return k.Hash(); |
| 56 | } | 77 | } |
| 57 | }; | 78 | }; |
| 58 | 79 | ||
| 59 | template <> | 80 | template <> |
| 60 | struct hash<GLShader::MaxwellFSConfig> { | 81 | struct hash<GLShader::MaxwellFSConfig> { |
| 61 | size_t operator()(const GLShader::MaxwellFSConfig& k) const { | 82 | size_t operator()(const GLShader::MaxwellFSConfig& k) const { |
| 62 | return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); | 83 | return k.Hash(); |
| 63 | } | 84 | } |
| 64 | }; | 85 | }; |
| 65 | 86 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp new file mode 100644 index 000000000..7fceedce8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -0,0 +1,65 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/hle/kernel/process.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 9 | |||
| 10 | namespace GLShader { | ||
| 11 | |||
| 12 | namespace Impl { | ||
| 13 | void SetShaderUniformBlockBinding(GLuint shader, const char* name, | ||
| 14 | Maxwell3D::Regs::ShaderStage binding, size_t expected_size) { | ||
| 15 | GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||
| 16 | if (ub_index != GL_INVALID_INDEX) { | ||
| 17 | GLint ub_size = 0; | ||
| 18 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||
| 19 | ASSERT_MSG(ub_size == expected_size, | ||
| 20 | "Uniform block size did not match! Got %d, expected %zu", | ||
| 21 | static_cast<int>(ub_size), expected_size); | ||
| 22 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | void SetShaderUniformBlockBindings(GLuint shader) { | ||
| 27 | SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, | ||
| 28 | sizeof(MaxwellUniformData)); | ||
| 29 | SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 30 | sizeof(MaxwellUniformData)); | ||
| 31 | SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, | ||
| 32 | sizeof(MaxwellUniformData)); | ||
| 33 | } | ||
| 34 | |||
| 35 | void SetShaderSamplerBindings(GLuint shader) { | ||
| 36 | OpenGLState cur_state = OpenGLState::GetCurState(); | ||
| 37 | GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); | ||
| 38 | cur_state.Apply(); | ||
| 39 | |||
| 40 | // Set the texture samplers to correspond to different texture units | ||
| 41 | for (u32 texture = 0; texture < NumTextureSamplers; ++texture) { | ||
| 42 | // Set the texture samplers to correspond to different texture units | ||
| 43 | std::string uniform_name = "tex[" + std::to_string(texture) + "]"; | ||
| 44 | GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str()); | ||
| 45 | if (uniform_tex != -1) { | ||
| 46 | glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | cur_state.draw.shader_program = old_program; | ||
| 51 | cur_state.Apply(); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Impl | ||
| 55 | |||
| 56 | void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { | ||
| 57 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | ||
| 58 | for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) { | ||
| 59 | const auto& const_buffer = shader_stage.const_buffers[index]; | ||
| 60 | const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address); | ||
| 61 | Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer)); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | } // namespace GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h new file mode 100644 index 000000000..5c8560cf5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -0,0 +1,151 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <tuple> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <boost/functional/hash.hpp> | ||
| 10 | #include <glad/glad.h> | ||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 13 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||
| 14 | |||
| 15 | namespace GLShader { | ||
| 16 | |||
| 17 | /// Number of OpenGL texture samplers that can be used in the fragment shader | ||
| 18 | static constexpr size_t NumTextureSamplers = 32; | ||
| 19 | |||
| 20 | using Tegra::Engines::Maxwell3D; | ||
| 21 | |||
| 22 | namespace Impl { | ||
| 23 | void SetShaderUniformBlockBindings(GLuint shader); | ||
| 24 | void SetShaderSamplerBindings(GLuint shader); | ||
| 25 | } // namespace Impl | ||
| 26 | |||
| 27 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | ||
| 28 | // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at | ||
| 29 | // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. | ||
| 30 | // Not following that rule will cause problems on some AMD drivers. | ||
| 31 | struct MaxwellUniformData { | ||
| 32 | void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); | ||
| 33 | |||
| 34 | using ConstBuffer = std::array<GLvec4, 4>; | ||
| 35 | alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers; | ||
| 36 | }; | ||
| 37 | static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect"); | ||
| 38 | static_assert(sizeof(MaxwellUniformData) < 16384, | ||
| 39 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 40 | |||
| 41 | class OGLShaderStage { | ||
| 42 | public: | ||
| 43 | OGLShaderStage() = default; | ||
| 44 | |||
| 45 | void Create(const char* source, GLenum type) { | ||
| 46 | OGLShader shader; | ||
| 47 | shader.Create(source, type); | ||
| 48 | program.Create(true, shader.handle); | ||
| 49 | Impl::SetShaderUniformBlockBindings(program.handle); | ||
| 50 | Impl::SetShaderSamplerBindings(program.handle); | ||
| 51 | } | ||
| 52 | GLuint GetHandle() const { | ||
| 53 | return program.handle; | ||
| 54 | } | ||
| 55 | |||
| 56 | private: | ||
| 57 | OGLProgram program; | ||
| 58 | }; | ||
| 59 | |||
| 60 | // TODO(wwylele): beautify this doc | ||
| 61 | // This is a shader cache designed for translating PICA shader to GLSL shader. | ||
| 62 | // The double cache is needed because diffent KeyConfigType, which includes a hash of the code | ||
| 63 | // region (including its leftover unused code) can generate the same GLSL code. | ||
| 64 | template <typename KeyConfigType, | ||
| 65 | std::string (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), GLenum ShaderType> | ||
| 66 | class ShaderCache { | ||
| 67 | public: | ||
| 68 | ShaderCache() = default; | ||
| 69 | |||
| 70 | GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) { | ||
| 71 | auto map_it = shader_map.find(key); | ||
| 72 | if (map_it == shader_map.end()) { | ||
| 73 | std::string program = CodeGenerator(setup, key); | ||
| 74 | |||
| 75 | auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{}); | ||
| 76 | OGLShaderStage& cached_shader = iter->second; | ||
| 77 | if (new_shader) { | ||
| 78 | cached_shader.Create(program.c_str(), ShaderType); | ||
| 79 | } | ||
| 80 | shader_map[key] = &cached_shader; | ||
| 81 | return cached_shader.GetHandle(); | ||
| 82 | } else { | ||
| 83 | return map_it->second->GetHandle(); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | private: | ||
| 88 | std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map; | ||
| 89 | std::unordered_map<std::string, OGLShaderStage> shader_cache; | ||
| 90 | }; | ||
| 91 | |||
| 92 | using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>; | ||
| 93 | |||
| 94 | using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>; | ||
| 95 | |||
| 96 | class ProgramManager { | ||
| 97 | public: | ||
| 98 | ProgramManager() { | ||
| 99 | pipeline.Create(); | ||
| 100 | } | ||
| 101 | |||
| 102 | void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) { | ||
| 103 | current.vs = vertex_shaders.Get(config, setup); | ||
| 104 | } | ||
| 105 | |||
| 106 | void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) { | ||
| 107 | current.fs = fragment_shaders.Get(config, setup); | ||
| 108 | } | ||
| 109 | |||
| 110 | void UseTrivialGeometryShader() { | ||
| 111 | current.gs = 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | void ApplyTo(OpenGLState& state) { | ||
| 115 | // Workaround for AMD bug | ||
| 116 | glUseProgramStages(pipeline.handle, | ||
| 117 | GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, | ||
| 118 | 0); | ||
| 119 | |||
| 120 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); | ||
| 121 | glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); | ||
| 122 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); | ||
| 123 | state.draw.shader_program = 0; | ||
| 124 | state.draw.program_pipeline = pipeline.handle; | ||
| 125 | } | ||
| 126 | |||
| 127 | private: | ||
| 128 | struct ShaderTuple { | ||
| 129 | GLuint vs = 0, gs = 0, fs = 0; | ||
| 130 | bool operator==(const ShaderTuple& rhs) const { | ||
| 131 | return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); | ||
| 132 | } | ||
| 133 | struct Hash { | ||
| 134 | std::size_t operator()(const ShaderTuple& tuple) const { | ||
| 135 | std::size_t hash = 0; | ||
| 136 | boost::hash_combine(hash, tuple.vs); | ||
| 137 | boost::hash_combine(hash, tuple.gs); | ||
| 138 | boost::hash_combine(hash, tuple.fs); | ||
| 139 | return hash; | ||
| 140 | } | ||
| 141 | }; | ||
| 142 | }; | ||
| 143 | ShaderTuple current; | ||
| 144 | VertexShaders vertex_shaders; | ||
| 145 | FragmentShaders fragment_shaders; | ||
| 146 | |||
| 147 | std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache; | ||
| 148 | OGLPipeline pipeline; | ||
| 149 | }; | ||
| 150 | |||
| 151 | } // namespace GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a6c6204d5..8568fface 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,156 +10,41 @@ | |||
| 10 | 10 | ||
| 11 | namespace GLShader { | 11 | namespace GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, | 13 | GLuint LoadShader(const char* source, GLenum type) { |
| 14 | const char* fragment_shader, const std::vector<const char*>& feedback_vars, | 14 | const char* debug_type; |
| 15 | bool separable_program) { | 15 | switch (type) { |
| 16 | // Create the shaders | 16 | case GL_VERTEX_SHADER: |
| 17 | GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; | 17 | debug_type = "vertex"; |
| 18 | GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; | 18 | break; |
| 19 | GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; | 19 | case GL_GEOMETRY_SHADER: |
| 20 | debug_type = "geometry"; | ||
| 21 | break; | ||
| 22 | case GL_FRAGMENT_SHADER: | ||
| 23 | debug_type = "fragment"; | ||
| 24 | break; | ||
| 25 | default: | ||
| 26 | UNREACHABLE(); | ||
| 27 | } | ||
| 28 | GLuint shader_id = glCreateShader(type); | ||
| 29 | glShaderSource(shader_id, 1, &source, nullptr); | ||
| 30 | NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | ||
| 31 | glCompileShader(shader_id); | ||
| 20 | 32 | ||
| 21 | GLint result = GL_FALSE; | 33 | GLint result = GL_FALSE; |
| 22 | int info_log_length; | 34 | GLint info_log_length; |
| 23 | 35 | glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); | |
| 24 | if (vertex_shader) { | 36 | glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); |
| 25 | // Compile Vertex Shader | ||
| 26 | LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); | ||
| 27 | |||
| 28 | glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); | ||
| 29 | glCompileShader(vertex_shader_id); | ||
| 30 | |||
| 31 | // Check Vertex Shader | ||
| 32 | glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); | ||
| 33 | glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 34 | |||
| 35 | if (info_log_length > 1) { | ||
| 36 | std::vector<char> vertex_shader_error(info_log_length); | ||
| 37 | glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); | ||
| 38 | if (result == GL_TRUE) { | ||
| 39 | LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); | ||
| 40 | } else { | ||
| 41 | LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", | ||
| 42 | &vertex_shader_error[0]); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | if (geometry_shader) { | ||
| 48 | // Compile Geometry Shader | ||
| 49 | LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); | ||
| 50 | |||
| 51 | glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); | ||
| 52 | glCompileShader(geometry_shader_id); | ||
| 53 | |||
| 54 | // Check Geometry Shader | ||
| 55 | glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); | ||
| 56 | glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 57 | |||
| 58 | if (info_log_length > 1) { | ||
| 59 | std::vector<char> geometry_shader_error(info_log_length); | ||
| 60 | glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, | ||
| 61 | &geometry_shader_error[0]); | ||
| 62 | if (result == GL_TRUE) { | ||
| 63 | LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); | ||
| 64 | } else { | ||
| 65 | LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", | ||
| 66 | &geometry_shader_error[0]); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | if (fragment_shader) { | ||
| 72 | // Compile Fragment Shader | ||
| 73 | LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); | ||
| 74 | |||
| 75 | glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); | ||
| 76 | glCompileShader(fragment_shader_id); | ||
| 77 | |||
| 78 | // Check Fragment Shader | ||
| 79 | glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); | ||
| 80 | glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 81 | |||
| 82 | if (info_log_length > 1) { | ||
| 83 | std::vector<char> fragment_shader_error(info_log_length); | ||
| 84 | glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, | ||
| 85 | &fragment_shader_error[0]); | ||
| 86 | if (result == GL_TRUE) { | ||
| 87 | LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); | ||
| 88 | } else { | ||
| 89 | LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", | ||
| 90 | &fragment_shader_error[0]); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | // Link the program | ||
| 96 | LOG_DEBUG(Render_OpenGL, "Linking program..."); | ||
| 97 | |||
| 98 | GLuint program_id = glCreateProgram(); | ||
| 99 | if (vertex_shader) { | ||
| 100 | glAttachShader(program_id, vertex_shader_id); | ||
| 101 | } | ||
| 102 | if (geometry_shader) { | ||
| 103 | glAttachShader(program_id, geometry_shader_id); | ||
| 104 | } | ||
| 105 | if (fragment_shader) { | ||
| 106 | glAttachShader(program_id, fragment_shader_id); | ||
| 107 | } | ||
| 108 | |||
| 109 | if (!feedback_vars.empty()) { | ||
| 110 | auto varyings = feedback_vars; | ||
| 111 | glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()), | ||
| 112 | &varyings[0], GL_INTERLEAVED_ATTRIBS); | ||
| 113 | } | ||
| 114 | |||
| 115 | if (separable_program) { | ||
| 116 | glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 117 | } | ||
| 118 | |||
| 119 | glLinkProgram(program_id); | ||
| 120 | |||
| 121 | // Check the program | ||
| 122 | glGetProgramiv(program_id, GL_LINK_STATUS, &result); | ||
| 123 | glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 124 | 37 | ||
| 125 | if (info_log_length > 1) { | 38 | if (info_log_length > 1) { |
| 126 | std::vector<char> program_error(info_log_length); | 39 | std::string shader_error(info_log_length, ' '); |
| 127 | glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); | 40 | glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); |
| 128 | if (result == GL_TRUE) { | 41 | if (result == GL_TRUE) { |
| 129 | LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); | 42 | NGLOG_DEBUG(Render_OpenGL, "{}", shader_error); |
| 130 | } else { | 43 | } else { |
| 131 | LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); | 44 | NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); |
| 132 | } | 45 | } |
| 133 | } | 46 | } |
| 134 | 47 | return shader_id; | |
| 135 | // If the program linking failed at least one of the shaders was probably bad | ||
| 136 | if (result == GL_FALSE) { | ||
| 137 | if (vertex_shader) { | ||
| 138 | LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); | ||
| 139 | } | ||
| 140 | if (geometry_shader) { | ||
| 141 | LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); | ||
| 142 | } | ||
| 143 | if (fragment_shader) { | ||
| 144 | LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | ASSERT_MSG(result == GL_TRUE, "Shader not linked"); | ||
| 148 | |||
| 149 | if (vertex_shader) { | ||
| 150 | glDetachShader(program_id, vertex_shader_id); | ||
| 151 | glDeleteShader(vertex_shader_id); | ||
| 152 | } | ||
| 153 | if (geometry_shader) { | ||
| 154 | glDetachShader(program_id, geometry_shader_id); | ||
| 155 | glDeleteShader(geometry_shader_id); | ||
| 156 | } | ||
| 157 | if (fragment_shader) { | ||
| 158 | glDetachShader(program_id, fragment_shader_id); | ||
| 159 | glDeleteShader(fragment_shader_id); | ||
| 160 | } | ||
| 161 | |||
| 162 | return program_id; | ||
| 163 | } | 48 | } |
| 164 | 49 | ||
| 165 | } // namespace GLShader | 50 | } // namespace GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index fc7b5e080..a1fa9e814 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h | |||
| @@ -6,18 +6,60 @@ | |||
| 6 | 6 | ||
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | #include <glad/glad.h> | 8 | #include <glad/glad.h> |
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/logging/log.h" | ||
| 9 | 11 | ||
| 10 | namespace GLShader { | 12 | namespace GLShader { |
| 11 | 13 | ||
| 12 | /** | 14 | /** |
| 15 | * Utility function to create and compile an OpenGL GLSL shader | ||
| 16 | * @param source String of the GLSL shader program | ||
| 17 | * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) | ||
| 18 | */ | ||
| 19 | GLuint LoadShader(const char* source, GLenum type); | ||
| 20 | |||
| 21 | /** | ||
| 13 | * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) | 22 | * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) |
| 14 | * @param vertex_shader String of the GLSL vertex shader program | 23 | * @param separable_program whether to create a separable program |
| 15 | * @param geometry_shader String of the GLSL geometry shader program | 24 | * @param shaders ID of shaders to attach to the program |
| 16 | * @param fragment_shader String of the GLSL fragment shader program | 25 | * @returns Handle of the newly created OpenGL program object |
| 17 | * @returns Handle of the newly created OpenGL shader object | ||
| 18 | */ | 26 | */ |
| 19 | GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, | 27 | template <typename... T> |
| 20 | const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, | 28 | GLuint LoadProgram(bool separable_program, T... shaders) { |
| 21 | bool separable_program = false); | 29 | // Link the program |
| 30 | NGLOG_DEBUG(Render_OpenGL, "Linking program..."); | ||
| 31 | |||
| 32 | GLuint program_id = glCreateProgram(); | ||
| 33 | |||
| 34 | ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); | ||
| 35 | |||
| 36 | if (separable_program) { | ||
| 37 | glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 38 | } | ||
| 39 | |||
| 40 | glLinkProgram(program_id); | ||
| 41 | |||
| 42 | // Check the program | ||
| 43 | GLint result = GL_FALSE; | ||
| 44 | GLint info_log_length; | ||
| 45 | glGetProgramiv(program_id, GL_LINK_STATUS, &result); | ||
| 46 | glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 47 | |||
| 48 | if (info_log_length > 1) { | ||
| 49 | std::string program_error(info_log_length, ' '); | ||
| 50 | glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); | ||
| 51 | if (result == GL_TRUE) { | ||
| 52 | NGLOG_DEBUG(Render_OpenGL, "{}", program_error); | ||
| 53 | } else { | ||
| 54 | NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | ASSERT_MSG(result == GL_TRUE, "Shader not linked"); | ||
| 59 | |||
| 60 | ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); | ||
| 61 | |||
| 62 | return program_id; | ||
| 63 | } | ||
| 22 | 64 | ||
| 23 | } // namespace GLShader | 65 | } // namespace GLShader |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 48ee80125..7909dcfc3 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -10,6 +10,14 @@ | |||
| 10 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 12 | 12 | ||
| 13 | using GLvec2 = std::array<GLfloat, 2>; | ||
| 14 | using GLvec3 = std::array<GLfloat, 3>; | ||
| 15 | using GLvec4 = std::array<GLfloat, 4>; | ||
| 16 | |||
| 17 | using GLuvec2 = std::array<GLuint, 2>; | ||
| 18 | using GLuvec3 = std::array<GLuint, 3>; | ||
| 19 | using GLuvec4 = std::array<GLuint, 4>; | ||
| 20 | |||
| 13 | namespace MaxwellToGL { | 21 | namespace MaxwellToGL { |
| 14 | 22 | ||
| 15 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -39,6 +47,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | |||
| 39 | 47 | ||
| 40 | inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { | 48 | inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { |
| 41 | switch (topology) { | 49 | switch (topology) { |
| 50 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 51 | return GL_TRIANGLES; | ||
| 42 | case Maxwell::PrimitiveTopology::TriangleStrip: | 52 | case Maxwell::PrimitiveTopology::TriangleStrip: |
| 43 | return GL_TRIANGLE_STRIP; | 53 | return GL_TRIANGLE_STRIP; |
| 44 | } | 54 | } |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 78b50b227..5e78723a2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -57,7 +57,7 @@ uniform sampler2D color_texture; | |||
| 57 | void main() { | 57 | void main() { |
| 58 | // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to | 58 | // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to |
| 59 | // support more framebuffer pixel formats. | 59 | // support more framebuffer pixel formats. |
| 60 | color = texture(color_texture, frag_tex_coord).abgr; | 60 | color = texture(color_texture, frag_tex_coord); |
| 61 | } | 61 | } |
| 62 | )"; | 62 | )"; |
| 63 | 63 | ||
| @@ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 210 | 0.0f); | 210 | 0.0f); |
| 211 | 211 | ||
| 212 | // Link shaders and get variable locations | 212 | // Link shaders and get variable locations |
| 213 | shader.Create(vertex_shader, nullptr, fragment_shader); | 213 | shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); |
| 214 | state.draw.shader_program = shader.handle; | 214 | state.draw.shader_program = shader.handle; |
| 215 | state.Apply(); | 215 | state.Apply(); |
| 216 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 216 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |
| @@ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 311 | } | 311 | } |
| 312 | 312 | ||
| 313 | std::array<ScreenRectVertex, 4> vertices = {{ | 313 | std::array<ScreenRectVertex, 4> vertices = {{ |
| 314 | ScreenRectVertex(x, y, texcoords.top, right), | 314 | ScreenRectVertex(x, y, texcoords.top, left), |
| 315 | ScreenRectVertex(x + w, y, texcoords.bottom, right), | 315 | ScreenRectVertex(x + w, y, texcoords.bottom, left), |
| 316 | ScreenRectVertex(x, y + h, texcoords.top, left), | 316 | ScreenRectVertex(x, y + h, texcoords.top, right), |
| 317 | ScreenRectVertex(x + w, y + h, texcoords.bottom, left), | 317 | ScreenRectVertex(x + w, y + h, texcoords.bottom, right), |
| 318 | }}; | 318 | }}; |
| 319 | 319 | ||
| 320 | state.texture_units[0].texture_2d = screen_info.display_texture; | 320 | state.texture_units[0].texture_2d = screen_info.display_texture; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c52f40037..2cc6d9a00 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -72,7 +72,7 @@ private: | |||
| 72 | // OpenGL object IDs | 72 | // OpenGL object IDs |
| 73 | OGLVertexArray vertex_array; | 73 | OGLVertexArray vertex_array; |
| 74 | OGLBuffer vertex_buffer; | 74 | OGLBuffer vertex_buffer; |
| 75 | OGLShader shader; | 75 | OGLProgram shader; |
| 76 | 76 | ||
| 77 | /// Display information for Switch screen | 77 | /// Display information for Switch screen |
| 78 | ScreenInfo screen_info; | 78 | ScreenInfo screen_info; |
diff --git a/src/video_core/utils.h b/src/video_core/utils.h index be0f7e22b..e0a14d48f 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h | |||
| @@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe | |||
| 151 | const u32 coarse_y = y & ~127; | 151 | const u32 coarse_y = y & ~127; |
| 152 | u32 morton_offset = | 152 | u32 morton_offset = |
| 153 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | 153 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; |
| 154 | u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; | 154 | u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel; |
| 155 | 155 | ||
| 156 | data_ptrs[morton_to_gl] = morton_data + morton_offset; | 156 | data_ptrs[morton_to_gl] = morton_data + morton_offset; |
| 157 | data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; | 157 | data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; |