diff options
27 files changed, 543 insertions, 362 deletions
diff --git a/.gitmodules b/.gitmodules index 059512902..db0905b3d 100644 --- a/.gitmodules +++ b/.gitmodules | |||
| @@ -9,4 +9,4 @@ | |||
| 9 | url = https://github.com/neobrain/nihstro.git | 9 | url = https://github.com/neobrain/nihstro.git |
| 10 | [submodule "soundtouch"] | 10 | [submodule "soundtouch"] |
| 11 | path = externals/soundtouch | 11 | path = externals/soundtouch |
| 12 | url = https://github.com/citra-emu/soundtouch.git | 12 | url = https://github.com/citra-emu/ext-soundtouch.git |
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp index daaf6e3f3..30552fe26 100644 --- a/src/audio_core/hle/source.cpp +++ b/src/audio_core/hle/source.cpp | |||
| @@ -126,13 +126,13 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_l | |||
| 126 | if (config.simple_filter_dirty) { | 126 | if (config.simple_filter_dirty) { |
| 127 | config.simple_filter_dirty.Assign(0); | 127 | config.simple_filter_dirty.Assign(0); |
| 128 | state.filters.Configure(config.simple_filter); | 128 | state.filters.Configure(config.simple_filter); |
| 129 | LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update"); | 129 | LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update", source_id); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | if (config.biquad_filter_dirty) { | 132 | if (config.biquad_filter_dirty) { |
| 133 | config.biquad_filter_dirty.Assign(0); | 133 | config.biquad_filter_dirty.Assign(0); |
| 134 | state.filters.Configure(config.biquad_filter); | 134 | state.filters.Configure(config.biquad_filter); |
| 135 | LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update"); | 135 | LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update", source_id); |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | if (config.interpolation_dirty) { | 138 | if (config.interpolation_dirty) { |
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp index 1402f8e79..9c80f7ec9 100644 --- a/src/citra_qt/debugger/graphics_tracing.cpp +++ b/src/citra_qt/debugger/graphics_tracing.cpp | |||
| @@ -74,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() { | |||
| 74 | std::array<u32, 4 * 16> default_attributes; | 74 | std::array<u32, 4 * 16> default_attributes; |
| 75 | for (unsigned i = 0; i < 16; ++i) { | 75 | for (unsigned i = 0; i < 16; ++i) { |
| 76 | for (unsigned comp = 0; comp < 3; ++comp) { | 76 | for (unsigned comp = 0; comp < 3; ++comp) { |
| 77 | default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); | 77 | default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32()); |
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | 80 | ||
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 854f6ff16..391666d35 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp | |||
| @@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d | |||
| 501 | info.labels.insert({ entry_point, "main" }); | 501 | info.labels.insert({ entry_point, "main" }); |
| 502 | 502 | ||
| 503 | // Generate debug information | 503 | // Generate debug information |
| 504 | debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); | 504 | debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); |
| 505 | 505 | ||
| 506 | // Reload widget state | 506 | // Reload widget state |
| 507 | for (int attr = 0; attr < num_attributes; ++attr) { | 507 | for (int attr = 0; attr < num_attributes; ++attr) { |
diff --git a/src/common/swap.h b/src/common/swap.h index a7c37bc44..1749bd7a4 100644 --- a/src/common/swap.h +++ b/src/common/swap.h | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | #include <sys/endian.h> | 25 | #include <sys/endian.h> |
| 26 | #endif | 26 | #endif |
| 27 | 27 | ||
| 28 | #include <cstring> | ||
| 29 | |||
| 28 | #include "common/common_types.h" | 30 | #include "common/common_types.h" |
| 29 | 31 | ||
| 30 | // GCC 4.6+ | 32 | // GCC 4.6+ |
| @@ -58,9 +60,6 @@ | |||
| 58 | 60 | ||
| 59 | namespace Common { | 61 | namespace Common { |
| 60 | 62 | ||
| 61 | inline u8 swap8(u8 _data) {return _data;} | ||
| 62 | inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];} | ||
| 63 | |||
| 64 | #ifdef _MSC_VER | 63 | #ifdef _MSC_VER |
| 65 | inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} | 64 | inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} |
| 66 | inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} | 65 | inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} |
| @@ -92,52 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3 | |||
| 92 | #endif | 91 | #endif |
| 93 | 92 | ||
| 94 | inline float swapf(float f) { | 93 | inline float swapf(float f) { |
| 95 | union { | 94 | static_assert(sizeof(u32) == sizeof(float), |
| 96 | float f; | 95 | "float must be the same size as uint32_t."); |
| 97 | unsigned int u32; | ||
| 98 | } dat1, dat2; | ||
| 99 | |||
| 100 | dat1.f = f; | ||
| 101 | dat2.u32 = swap32(dat1.u32); | ||
| 102 | 96 | ||
| 103 | return dat2.f; | 97 | u32 value; |
| 104 | } | 98 | std::memcpy(&value, &f, sizeof(u32)); |
| 105 | |||
| 106 | inline double swapd(double f) { | ||
| 107 | union { | ||
| 108 | double f; | ||
| 109 | unsigned long long u64; | ||
| 110 | } dat1, dat2; | ||
| 111 | 99 | ||
| 112 | dat1.f = f; | 100 | value = swap32(value); |
| 113 | dat2.u64 = swap64(dat1.u64); | 101 | std::memcpy(&f, &value, sizeof(u32)); |
| 114 | 102 | ||
| 115 | return dat2.f; | 103 | return f; |
| 116 | } | 104 | } |
| 117 | 105 | ||
| 118 | inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} | 106 | inline double swapd(double f) { |
| 119 | inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} | 107 | static_assert(sizeof(u64) == sizeof(double), |
| 120 | inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} | 108 | "double must be the same size as uint64_t."); |
| 121 | |||
| 122 | template <int count> | ||
| 123 | void swap(u8*); | ||
| 124 | 109 | ||
| 125 | template <> | 110 | u64 value; |
| 126 | inline void swap<1>(u8* data) { } | 111 | std::memcpy(&value, &f, sizeof(u64)); |
| 127 | 112 | ||
| 128 | template <> | 113 | value = swap64(value); |
| 129 | inline void swap<2>(u8* data) { | 114 | std::memcpy(&f, &value, sizeof(u64)); |
| 130 | *reinterpret_cast<u16*>(data) = swap16(data); | ||
| 131 | } | ||
| 132 | |||
| 133 | template <> | ||
| 134 | inline void swap<4>(u8* data) { | ||
| 135 | *reinterpret_cast<u32*>(data) = swap32(data); | ||
| 136 | } | ||
| 137 | 115 | ||
| 138 | template <> | 116 | return f; |
| 139 | inline void swap<8>(u8* data) { | ||
| 140 | *reinterpret_cast<u64*>(data) = swap64(data); | ||
| 141 | } | 117 | } |
| 142 | 118 | ||
| 143 | } // Namespace Common | 119 | } // Namespace Common |
| @@ -534,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t<T, F> v) { | |||
| 534 | template <typename T> | 510 | template <typename T> |
| 535 | struct swap_64_t { | 511 | struct swap_64_t { |
| 536 | static T swap(T x) { | 512 | static T swap(T x) { |
| 537 | return (T)Common::swap64(*(u64 *)&x); | 513 | return static_cast<T>(Common::swap64(x)); |
| 538 | } | 514 | } |
| 539 | }; | 515 | }; |
| 540 | 516 | ||
| 541 | template <typename T> | 517 | template <typename T> |
| 542 | struct swap_32_t { | 518 | struct swap_32_t { |
| 543 | static T swap(T x) { | 519 | static T swap(T x) { |
| 544 | return (T)Common::swap32(*(u32 *)&x); | 520 | return static_cast<T>(Common::swap32(x)); |
| 545 | } | 521 | } |
| 546 | }; | 522 | }; |
| 547 | 523 | ||
| 548 | template <typename T> | 524 | template <typename T> |
| 549 | struct swap_16_t { | 525 | struct swap_16_t { |
| 550 | static T swap(T x) { | 526 | static T swap(T x) { |
| 551 | return (T)Common::swap16(*(u16 *)&x); | 527 | return static_cast<T>(Common::swap16(x)); |
| 552 | } | 528 | } |
| 553 | }; | 529 | }; |
| 554 | 530 | ||
| 555 | template <typename T> | 531 | template <typename T> |
| 556 | struct swap_float_t { | 532 | struct swap_float_t { |
| 557 | static T swap(T x) { | 533 | static T swap(T x) { |
| 558 | return (T)Common::swapf(*(float *)&x); | 534 | return static_cast<T>(Common::swapf(x)); |
| 559 | } | 535 | } |
| 560 | }; | 536 | }; |
| 561 | 537 | ||
| 562 | template <typename T> | 538 | template <typename T> |
| 563 | struct swap_double_t { | 539 | struct swap_double_t { |
| 564 | static T swap(T x) { | 540 | static T swap(T x) { |
| 565 | return (T)Common::swapd(*(double *)&x); | 541 | return static_cast<T>(Common::swapd(x)); |
| 566 | } | 542 | } |
| 567 | }; | 543 | }; |
| 568 | 544 | ||
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index a3581132c..13492a08b 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp | |||
| @@ -93,7 +93,7 @@ void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 e | |||
| 93 | context.cpu_registers[0] = arg; | 93 | context.cpu_registers[0] = arg; |
| 94 | context.pc = entry_point; | 94 | context.pc = entry_point; |
| 95 | context.sp = stack_top; | 95 | context.sp = stack_top; |
| 96 | context.cpsr = 0x1F | ((entry_point & 1) << 5); // Usermode and THUMB mode | 96 | context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { | 99 | void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { |
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 1360ee845..820b19e1a 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp | |||
| @@ -437,7 +437,7 @@ static void HandleSetThread() { | |||
| 437 | * | 437 | * |
| 438 | * @param signal Signal to be sent to client. | 438 | * @param signal Signal to be sent to client. |
| 439 | */ | 439 | */ |
| 440 | void SendSignal(u32 signal) { | 440 | static void SendSignal(u32 signal) { |
| 441 | if (gdbserver_socket == -1) { | 441 | if (gdbserver_socket == -1) { |
| 442 | return; | 442 | return; |
| 443 | } | 443 | } |
| @@ -713,7 +713,7 @@ static void Continue() { | |||
| 713 | * @param addr Address of breakpoint. | 713 | * @param addr Address of breakpoint. |
| 714 | * @param len Length of breakpoint. | 714 | * @param len Length of breakpoint. |
| 715 | */ | 715 | */ |
| 716 | bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { | 716 | static bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { |
| 717 | std::map<u32, Breakpoint>& p = GetBreakpointList(type); | 717 | std::map<u32, Breakpoint>& p = GetBreakpointList(type); |
| 718 | 718 | ||
| 719 | Breakpoint breakpoint; | 719 | Breakpoint breakpoint; |
| @@ -907,7 +907,7 @@ void ToggleServer(bool status) { | |||
| 907 | } | 907 | } |
| 908 | } | 908 | } |
| 909 | 909 | ||
| 910 | void Init(u16 port) { | 910 | static void Init(u16 port) { |
| 911 | if (!g_server_enabled) { | 911 | if (!g_server_enabled) { |
| 912 | // Set the halt loop to false in case the user enabled the gdbstub mid-execution. | 912 | // Set the halt loop to false in case the user enabled the gdbstub mid-execution. |
| 913 | // This way the CPU can still execute normally. | 913 | // This way the CPU can still execute normally. |
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 6f1f0856c..4be20db22 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp | |||
| @@ -108,7 +108,6 @@ struct MemoryArea { | |||
| 108 | // We don't declare the IO regions in here since its handled by other means. | 108 | // We don't declare the IO regions in here since its handled by other means. |
| 109 | static MemoryArea memory_areas[] = { | 109 | static MemoryArea memory_areas[] = { |
| 110 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) | 110 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) |
| 111 | {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory | ||
| 112 | }; | 111 | }; |
| 113 | 112 | ||
| 114 | } | 113 | } |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index a06afef2b..d781ef32c 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -142,8 +142,11 @@ public: | |||
| 142 | 142 | ||
| 143 | MemoryRegionInfo* memory_region = nullptr; | 143 | MemoryRegionInfo* memory_region = nullptr; |
| 144 | 144 | ||
| 145 | /// Bitmask of the used TLS slots | 145 | /// The Thread Local Storage area is allocated as processes create threads, |
| 146 | std::bitset<300> used_tls_slots; | 146 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part |
| 147 | /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask. | ||
| 148 | /// This vector will grow as more pages are allocated for new threads. | ||
| 149 | std::vector<std::bitset<8>> tls_slots; | ||
| 147 | 150 | ||
| 148 | VAddr GetLinearHeapAreaAddress() const; | 151 | VAddr GetLinearHeapAreaAddress() const; |
| 149 | VAddr GetLinearHeapBase() const; | 152 | VAddr GetLinearHeapBase() const; |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 6dc95d0f1..68f026918 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -117,9 +117,10 @@ void Thread::Stop() { | |||
| 117 | } | 117 | } |
| 118 | wait_objects.clear(); | 118 | wait_objects.clear(); |
| 119 | 119 | ||
| 120 | Kernel::g_current_process->used_tls_slots[tls_index] = false; | 120 | // Mark the TLS slot in the thread's page as free. |
| 121 | g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; | 121 | u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE; |
| 122 | g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE; | 122 | u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE; |
| 123 | Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot); | ||
| 123 | 124 | ||
| 124 | HLE::Reschedule(__func__); | 125 | HLE::Reschedule(__func__); |
| 125 | } | 126 | } |
| @@ -366,6 +367,31 @@ static void DebugThreadQueue() { | |||
| 366 | } | 367 | } |
| 367 | } | 368 | } |
| 368 | 369 | ||
| 370 | /** | ||
| 371 | * Finds a free location for the TLS section of a thread. | ||
| 372 | * @param tls_slots The TLS page array of the thread's owner process. | ||
| 373 | * Returns a tuple of (page, slot, alloc_needed) where: | ||
| 374 | * page: The index of the first allocated TLS page that has free slots. | ||
| 375 | * slot: The index of the first free slot in the indicated page. | ||
| 376 | * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full). | ||
| 377 | */ | ||
| 378 | std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) { | ||
| 379 | // Iterate over all the allocated pages, and try to find one where not all slots are used. | ||
| 380 | for (unsigned page = 0; page < tls_slots.size(); ++page) { | ||
| 381 | const auto& page_tls_slots = tls_slots[page]; | ||
| 382 | if (!page_tls_slots.all()) { | ||
| 383 | // We found a page with at least one free slot, find which slot it is | ||
| 384 | for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) { | ||
| 385 | if (!page_tls_slots.test(slot)) { | ||
| 386 | return std::make_tuple(page, slot, false); | ||
| 387 | } | ||
| 388 | } | ||
| 389 | } | ||
| 390 | } | ||
| 391 | |||
| 392 | return std::make_tuple(0, 0, true); | ||
| 393 | } | ||
| 394 | |||
| 369 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, | 395 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, |
| 370 | u32 arg, s32 processor_id, VAddr stack_top) { | 396 | u32 arg, s32 processor_id, VAddr stack_top) { |
| 371 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { | 397 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { |
| @@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, | |||
| 403 | thread->name = std::move(name); | 429 | thread->name = std::move(name); |
| 404 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); | 430 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); |
| 405 | thread->owner_process = g_current_process; | 431 | thread->owner_process = g_current_process; |
| 406 | thread->tls_index = -1; | ||
| 407 | thread->waitsynch_waited = false; | 432 | thread->waitsynch_waited = false; |
| 408 | 433 | ||
| 409 | // Find the next available TLS index, and mark it as used | 434 | // Find the next available TLS index, and mark it as used |
| 410 | auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; | 435 | auto& tls_slots = Kernel::g_current_process->tls_slots; |
| 411 | for (unsigned int i = 0; i < used_tls_slots.size(); ++i) { | 436 | bool needs_allocation = true; |
| 412 | if (used_tls_slots[i] == false) { | 437 | u32 available_page; // Which allocated page has free space |
| 413 | thread->tls_index = i; | 438 | u32 available_slot; // Which slot within the page is free |
| 414 | used_tls_slots[i] = true; | 439 | |
| 415 | break; | 440 | std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots); |
| 441 | |||
| 442 | if (needs_allocation) { | ||
| 443 | // There are no already-allocated pages with free slots, lets allocate a new one. | ||
| 444 | // TLS pages are allocated from the BASE region in the linear heap. | ||
| 445 | MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE); | ||
| 446 | auto& linheap_memory = memory_region->linear_heap_memory; | ||
| 447 | |||
| 448 | if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) { | ||
| 449 | LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread"); | ||
| 450 | return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent); | ||
| 416 | } | 451 | } |
| 452 | |||
| 453 | u32 offset = linheap_memory->size(); | ||
| 454 | |||
| 455 | // Allocate some memory from the end of the linear heap for this region. | ||
| 456 | linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0); | ||
| 457 | memory_region->used += Memory::PAGE_SIZE; | ||
| 458 | Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE; | ||
| 459 | |||
| 460 | tls_slots.emplace_back(0); // The page is completely available at the start | ||
| 461 | available_page = tls_slots.size() - 1; | ||
| 462 | available_slot = 0; // Use the first slot in the new page | ||
| 463 | |||
| 464 | auto& vm_manager = Kernel::g_current_process->vm_manager; | ||
| 465 | vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); | ||
| 466 | |||
| 467 | // Map the page to the current process' address space. | ||
| 468 | // TODO(Subv): Find the correct MemoryState for this region. | ||
| 469 | vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE, | ||
| 470 | linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private); | ||
| 417 | } | 471 | } |
| 418 | 472 | ||
| 419 | ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); | 473 | // Mark the slot as used |
| 420 | g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; | 474 | tls_slots[available_page].set(available_slot); |
| 421 | g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE; | 475 | thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE; |
| 422 | 476 | ||
| 423 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used | 477 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used |
| 424 | // to initialize the context | 478 | // to initialize the context |
| @@ -509,10 +563,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { | |||
| 509 | context.cpu_registers[1] = output; | 563 | context.cpu_registers[1] = output; |
| 510 | } | 564 | } |
| 511 | 565 | ||
| 512 | VAddr Thread::GetTLSAddress() const { | ||
| 513 | return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE; | ||
| 514 | } | ||
| 515 | |||
| 516 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 566 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 517 | 567 | ||
| 518 | void ThreadingInit() { | 568 | void ThreadingInit() { |
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 97ba57fc5..deab5d5a6 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -127,7 +127,7 @@ public: | |||
| 127 | * Returns the Thread Local Storage address of the current thread | 127 | * Returns the Thread Local Storage address of the current thread |
| 128 | * @returns VAddr of the thread's TLS | 128 | * @returns VAddr of the thread's TLS |
| 129 | */ | 129 | */ |
| 130 | VAddr GetTLSAddress() const; | 130 | VAddr GetTLSAddress() const { return tls_address; } |
| 131 | 131 | ||
| 132 | Core::ThreadContext context; | 132 | Core::ThreadContext context; |
| 133 | 133 | ||
| @@ -144,7 +144,7 @@ public: | |||
| 144 | 144 | ||
| 145 | s32 processor_id; | 145 | s32 processor_id; |
| 146 | 146 | ||
| 147 | s32 tls_index; ///< Index of the Thread Local Storage of the thread | 147 | VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread |
| 148 | 148 | ||
| 149 | bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait | 149 | bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait |
| 150 | 150 | ||
diff --git a/src/core/memory.h b/src/core/memory.h index 9caa3c3f5..126d60471 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -100,15 +100,9 @@ enum : VAddr { | |||
| 100 | SHARED_PAGE_SIZE = 0x00001000, | 100 | SHARED_PAGE_SIZE = 0x00001000, |
| 101 | SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, | 101 | SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, |
| 102 | 102 | ||
| 103 | // TODO(yuriks): The size of this area is dynamic, the kernel grows | ||
| 104 | // it as more and more threads are created. For now we'll just use a | ||
| 105 | // hardcoded value. | ||
| 106 | /// Area where TLS (Thread-Local Storage) buffers are allocated. | 103 | /// Area where TLS (Thread-Local Storage) buffers are allocated. |
| 107 | TLS_AREA_VADDR = 0x1FF82000, | 104 | TLS_AREA_VADDR = 0x1FF82000, |
| 108 | TLS_ENTRY_SIZE = 0x200, | 105 | TLS_ENTRY_SIZE = 0x200, |
| 109 | TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size | ||
| 110 | TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, | ||
| 111 | |||
| 112 | 106 | ||
| 113 | /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. | 107 | /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. |
| 114 | NEW_LINEAR_HEAP_VADDR = 0x30000000, | 108 | NEW_LINEAR_HEAP_VADDR = 0x30000000, |
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 2bc747102..db99ce666 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -75,8 +75,6 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 75 | viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); | 75 | viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); |
| 76 | viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); | 76 | viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); |
| 77 | viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); | 77 | viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); |
| 78 | viewport.zscale = float24::FromRaw(regs.viewport_depth_range); | ||
| 79 | viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); | ||
| 80 | 78 | ||
| 81 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; | 79 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; |
| 82 | vtx.color *= inv_w; | 80 | vtx.color *= inv_w; |
| @@ -89,7 +87,7 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 89 | 87 | ||
| 90 | vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; | 88 | vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; |
| 91 | vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | 89 | vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; |
| 92 | vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; | 90 | vtx.screenpos[2] = vtx.pos.z * inv_w; |
| 93 | } | 91 | } |
| 94 | 92 | ||
| 95 | void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { | 93 | void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index dd1379503..ad0da796e 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -128,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 128 | 128 | ||
| 129 | // TODO: Verify that this actually modifies the register! | 129 | // TODO: Verify that this actually modifies the register! |
| 130 | if (setup.index < 15) { | 130 | if (setup.index < 15) { |
| 131 | g_state.vs.default_attributes[setup.index] = attribute; | 131 | g_state.vs_default_attributes[setup.index] = attribute; |
| 132 | setup.index++; | 132 | setup.index++; |
| 133 | } else { | 133 | } else { |
| 134 | // Put each attribute into an immediate input buffer. | 134 | // Put each attribute into an immediate input buffer. |
| @@ -144,12 +144,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 144 | immediate_attribute_id = 0; | 144 | immediate_attribute_id = 0; |
| 145 | 145 | ||
| 146 | Shader::UnitState<false> shader_unit; | 146 | Shader::UnitState<false> shader_unit; |
| 147 | Shader::Setup(); | 147 | g_state.vs.Setup(); |
| 148 | 148 | ||
| 149 | // Send to vertex shader | 149 | // Send to vertex shader |
| 150 | if (g_debug_context) | 150 | if (g_debug_context) |
| 151 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input)); | 151 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input)); |
| 152 | Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); | 152 | Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); |
| 153 | 153 | ||
| 154 | // Send to renderer | 154 | // Send to renderer |
| 155 | using Pica::Shader::OutputVertex; | 155 | using Pica::Shader::OutputVertex; |
| @@ -237,7 +237,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 237 | vertex_cache_ids.fill(-1); | 237 | vertex_cache_ids.fill(-1); |
| 238 | 238 | ||
| 239 | Shader::UnitState<false> shader_unit; | 239 | Shader::UnitState<false> shader_unit; |
| 240 | Shader::Setup(); | 240 | g_state.vs.Setup(); |
| 241 | 241 | ||
| 242 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 242 | for (unsigned int index = 0; index < regs.num_vertices; ++index) |
| 243 | { | 243 | { |
| @@ -274,7 +274,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 274 | // Send to vertex shader | 274 | // Send to vertex shader |
| 275 | if (g_debug_context) | 275 | if (g_debug_context) |
| 276 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); | 276 | g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); |
| 277 | output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); | 277 | output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); |
| 278 | 278 | ||
| 279 | if (is_indexed) { | 279 | if (is_indexed) { |
| 280 | vertex_cache[vertex_cache_pos] = output; | 280 | vertex_cache[vertex_cache_pos] = output; |
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index be82cf4b5..ec78f9593 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp | |||
| @@ -500,7 +500,7 @@ void Init() { | |||
| 500 | } | 500 | } |
| 501 | 501 | ||
| 502 | void Shutdown() { | 502 | void Shutdown() { |
| 503 | Shader::Shutdown(); | 503 | Shader::ClearCache(); |
| 504 | } | 504 | } |
| 505 | 505 | ||
| 506 | template <typename T> | 506 | template <typename T> |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 5891fb72a..86c0a0096 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -70,7 +70,7 @@ struct Regs { | |||
| 70 | INSERT_PADDING_WORDS(0x9); | 70 | INSERT_PADDING_WORDS(0x9); |
| 71 | 71 | ||
| 72 | BitField<0, 24, u32> viewport_depth_range; // float24 | 72 | BitField<0, 24, u32> viewport_depth_range; // float24 |
| 73 | BitField<0, 24, u32> viewport_depth_far_plane; // float24 | 73 | BitField<0, 24, u32> viewport_depth_near_plane; // float24 |
| 74 | 74 | ||
| 75 | BitField<0, 3, u32> vs_output_total; | 75 | BitField<0, 3, u32> vs_output_total; |
| 76 | 76 | ||
| @@ -122,9 +122,31 @@ struct Regs { | |||
| 122 | BitField<16, 10, s32> y; | 122 | BitField<16, 10, s32> y; |
| 123 | } viewport_corner; | 123 | } viewport_corner; |
| 124 | 124 | ||
| 125 | INSERT_PADDING_WORDS(0x17); | 125 | INSERT_PADDING_WORDS(0x1); |
| 126 | |||
| 127 | //TODO: early depth | ||
| 128 | INSERT_PADDING_WORDS(0x1); | ||
| 129 | |||
| 130 | INSERT_PADDING_WORDS(0x2); | ||
| 131 | |||
| 132 | enum DepthBuffering : u32 { | ||
| 133 | WBuffering = 0, | ||
| 134 | ZBuffering = 1, | ||
| 135 | }; | ||
| 136 | BitField< 0, 1, DepthBuffering> depthmap_enable; | ||
| 137 | |||
| 138 | INSERT_PADDING_WORDS(0x12); | ||
| 126 | 139 | ||
| 127 | struct TextureConfig { | 140 | struct TextureConfig { |
| 141 | enum TextureType : u32 { | ||
| 142 | Texture2D = 0, | ||
| 143 | TextureCube = 1, | ||
| 144 | Shadow2D = 2, | ||
| 145 | Projection2D = 3, | ||
| 146 | ShadowCube = 4, | ||
| 147 | Disabled = 5, | ||
| 148 | }; | ||
| 149 | |||
| 128 | enum WrapMode : u32 { | 150 | enum WrapMode : u32 { |
| 129 | ClampToEdge = 0, | 151 | ClampToEdge = 0, |
| 130 | ClampToBorder = 1, | 152 | ClampToBorder = 1, |
| @@ -155,6 +177,7 @@ struct Regs { | |||
| 155 | BitField< 2, 1, TextureFilter> min_filter; | 177 | BitField< 2, 1, TextureFilter> min_filter; |
| 156 | BitField< 8, 2, WrapMode> wrap_t; | 178 | BitField< 8, 2, WrapMode> wrap_t; |
| 157 | BitField<12, 2, WrapMode> wrap_s; | 179 | BitField<12, 2, WrapMode> wrap_s; |
| 180 | BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew. | ||
| 158 | }; | 181 | }; |
| 159 | 182 | ||
| 160 | INSERT_PADDING_WORDS(0x1); | 183 | INSERT_PADDING_WORDS(0x1); |
| @@ -1279,10 +1302,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40); | |||
| 1279 | ASSERT_REG_POSITION(viewport_size_x, 0x41); | 1302 | ASSERT_REG_POSITION(viewport_size_x, 0x41); |
| 1280 | ASSERT_REG_POSITION(viewport_size_y, 0x43); | 1303 | ASSERT_REG_POSITION(viewport_size_y, 0x43); |
| 1281 | ASSERT_REG_POSITION(viewport_depth_range, 0x4d); | 1304 | ASSERT_REG_POSITION(viewport_depth_range, 0x4d); |
| 1282 | ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); | 1305 | ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e); |
| 1283 | ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); | 1306 | ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); |
| 1284 | ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); | 1307 | ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); |
| 1285 | ASSERT_REG_POSITION(viewport_corner, 0x68); | 1308 | ASSERT_REG_POSITION(viewport_corner, 0x68); |
| 1309 | ASSERT_REG_POSITION(depthmap_enable, 0x6D); | ||
| 1286 | ASSERT_REG_POSITION(texture0_enable, 0x80); | 1310 | ASSERT_REG_POSITION(texture0_enable, 0x80); |
| 1287 | ASSERT_REG_POSITION(texture0, 0x81); | 1311 | ASSERT_REG_POSITION(texture0, 0x81); |
| 1288 | ASSERT_REG_POSITION(texture0_format, 0x8e); | 1312 | ASSERT_REG_POSITION(texture0_format, 0x8e); |
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 1059c6ae4..495174c25 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h | |||
| @@ -25,6 +25,8 @@ struct State { | |||
| 25 | Shader::ShaderSetup vs; | 25 | Shader::ShaderSetup vs; |
| 26 | Shader::ShaderSetup gs; | 26 | Shader::ShaderSetup gs; |
| 27 | 27 | ||
| 28 | std::array<Math::Vec4<float24>, 16> vs_default_attributes; | ||
| 29 | |||
| 28 | struct { | 30 | struct { |
| 29 | union LutEntry { | 31 | union LutEntry { |
| 30 | // Used for raw access | 32 | // Used for raw access |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index df67b9081..65168f05a 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -442,8 +442,33 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 442 | 442 | ||
| 443 | DEBUG_ASSERT(0 != texture.config.address); | 443 | DEBUG_ASSERT(0 != texture.config.address); |
| 444 | 444 | ||
| 445 | int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | 445 | float24 u = uv[i].u(); |
| 446 | int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | 446 | float24 v = uv[i].v(); |
| 447 | |||
| 448 | // Only unit 0 respects the texturing type (according to 3DBrew) | ||
| 449 | // TODO: Refactor so cubemaps and shadowmaps can be handled | ||
| 450 | if (i == 0) { | ||
| 451 | switch(texture.config.type) { | ||
| 452 | case Regs::TextureConfig::Texture2D: | ||
| 453 | break; | ||
| 454 | case Regs::TextureConfig::Projection2D: { | ||
| 455 | auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); | ||
| 456 | u /= tc0_w; | ||
| 457 | v /= tc0_w; | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | default: | ||
| 461 | // TODO: Change to LOG_ERROR when more types are handled. | ||
| 462 | LOG_DEBUG(HW_GPU, "Unhandled texture type %x", (int)texture.config.type); | ||
| 463 | UNIMPLEMENTED(); | ||
| 464 | break; | ||
| 465 | } | ||
| 466 | } | ||
| 467 | |||
| 468 | int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | ||
| 469 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | ||
| 470 | |||
| 471 | |||
| 447 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { | 472 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { |
| 448 | switch (mode) { | 473 | switch (mode) { |
| 449 | case Regs::TextureConfig::ClampToEdge: | 474 | case Regs::TextureConfig::ClampToEdge: |
| @@ -862,10 +887,30 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 862 | } | 887 | } |
| 863 | } | 888 | } |
| 864 | 889 | ||
| 890 | // interpolated_z = z / w | ||
| 891 | float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + | ||
| 892 | v1.screenpos[2].ToFloat32() * w1 + | ||
| 893 | v2.screenpos[2].ToFloat32() * w2) / wsum; | ||
| 894 | |||
| 895 | // Not fully accurate. About 3 bits in precision are missing. | ||
| 896 | // Z-Buffer (z / w * scale + offset) | ||
| 897 | float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32(); | ||
| 898 | float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32(); | ||
| 899 | float depth = interpolated_z_over_w * depth_scale + depth_offset; | ||
| 900 | |||
| 901 | // Potentially switch to W-Buffer | ||
| 902 | if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { | ||
| 903 | |||
| 904 | // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) | ||
| 905 | depth *= interpolated_w_inverse.ToFloat32() * wsum; | ||
| 906 | } | ||
| 907 | |||
| 908 | // Clamp the result | ||
| 909 | depth = MathUtil::Clamp(depth, 0.0f, 1.0f); | ||
| 910 | |||
| 911 | // Convert float to integer | ||
| 865 | unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); | 912 | unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); |
| 866 | u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + | 913 | u32 z = (u32)(depth * ((1 << num_bits) - 1)); |
| 867 | v1.screenpos[2].ToFloat32() * w1 + | ||
| 868 | v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); | ||
| 869 | 914 | ||
| 870 | if (output_merger.depth_test_enable) { | 915 | if (output_merger.depth_test_enable) { |
| 871 | u32 ref_z = GetDepth(x >> 4, y >> 4); | 916 | u32 ref_z = GetDepth(x >> 4, y >> 4); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0b471dfd2..ed2e2f3ae 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -76,6 +76,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { | |||
| 76 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); | 76 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); |
| 77 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); | 77 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); |
| 78 | 78 | ||
| 79 | glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); | ||
| 80 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); | ||
| 81 | |||
| 79 | glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); | 82 | glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); |
| 80 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); | 83 | glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); |
| 81 | 84 | ||
| @@ -256,10 +259,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 256 | 259 | ||
| 257 | // Depth modifiers | 260 | // Depth modifiers |
| 258 | case PICA_REG_INDEX(viewport_depth_range): | 261 | case PICA_REG_INDEX(viewport_depth_range): |
| 259 | case PICA_REG_INDEX(viewport_depth_far_plane): | 262 | case PICA_REG_INDEX(viewport_depth_near_plane): |
| 260 | SyncDepthModifiers(); | 263 | SyncDepthModifiers(); |
| 261 | break; | 264 | break; |
| 262 | 265 | ||
| 266 | // Depth buffering | ||
| 267 | case PICA_REG_INDEX(depthmap_enable): | ||
| 268 | shader_dirty = true; | ||
| 269 | break; | ||
| 270 | |||
| 263 | // Blending | 271 | // Blending |
| 264 | case PICA_REG_INDEX(output_merger.alphablend_enable): | 272 | case PICA_REG_INDEX(output_merger.alphablend_enable): |
| 265 | SyncBlendEnabled(); | 273 | SyncBlendEnabled(); |
| @@ -314,6 +322,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
| 314 | SyncLogicOp(); | 322 | SyncLogicOp(); |
| 315 | break; | 323 | break; |
| 316 | 324 | ||
| 325 | // Texture 0 type | ||
| 326 | case PICA_REG_INDEX(texture0.type): | ||
| 327 | shader_dirty = true; | ||
| 328 | break; | ||
| 329 | |||
| 317 | // TEV stages | 330 | // TEV stages |
| 318 | case PICA_REG_INDEX(tev_stage0.color_source1): | 331 | case PICA_REG_INDEX(tev_stage0.color_source1): |
| 319 | case PICA_REG_INDEX(tev_stage0.color_modifier1): | 332 | case PICA_REG_INDEX(tev_stage0.color_modifier1): |
| @@ -910,10 +923,10 @@ void RasterizerOpenGL::SyncCullMode() { | |||
| 910 | } | 923 | } |
| 911 | 924 | ||
| 912 | void RasterizerOpenGL::SyncDepthModifiers() { | 925 | void RasterizerOpenGL::SyncDepthModifiers() { |
| 913 | float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); | 926 | float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); |
| 914 | float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; | 927 | float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); |
| 915 | 928 | ||
| 916 | // TODO: Implement scale modifier | 929 | uniform_block_data.data.depth_scale = depth_scale; |
| 917 | uniform_block_data.data.depth_offset = depth_offset; | 930 | uniform_block_data.data.depth_offset = depth_offset; |
| 918 | uniform_block_data.dirty = true; | 931 | uniform_block_data.dirty = true; |
| 919 | } | 932 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 82fa61742..eed00011a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -39,140 +39,185 @@ struct ScreenInfo; | |||
| 39 | * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where | 39 | * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where |
| 40 | * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) | 40 | * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) |
| 41 | * two separate shaders sharing the same key. | 41 | * two separate shaders sharing the same key. |
| 42 | * | ||
| 43 | * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." | ||
| 44 | * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." | ||
| 45 | * = Bytewise copy instead of memberwise copy. | ||
| 46 | * This is important because the padding bytes are included in the hash and comparison between objects. | ||
| 42 | */ | 47 | */ |
| 43 | struct PicaShaderConfig { | 48 | union PicaShaderConfig { |
| 49 | |||
| 44 | /// Construct a PicaShaderConfig with the current Pica register configuration. | 50 | /// Construct a PicaShaderConfig with the current Pica register configuration. |
| 45 | static PicaShaderConfig CurrentConfig() { | 51 | static PicaShaderConfig CurrentConfig() { |
| 46 | PicaShaderConfig res; | 52 | PicaShaderConfig res; |
| 53 | |||
| 54 | auto& state = res.state; | ||
| 55 | std::memset(&state, 0, sizeof(PicaShaderConfig::State)); | ||
| 56 | |||
| 47 | const auto& regs = Pica::g_state.regs; | 57 | const auto& regs = Pica::g_state.regs; |
| 48 | 58 | ||
| 49 | res.alpha_test_func = regs.output_merger.alpha_test.enable ? | 59 | state.depthmap_enable = regs.depthmap_enable; |
| 60 | |||
| 61 | state.alpha_test_func = regs.output_merger.alpha_test.enable ? | ||
| 50 | regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; | 62 | regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; |
| 51 | 63 | ||
| 64 | state.texture0_type = regs.texture0.type; | ||
| 65 | |||
| 52 | // Copy relevant tev stages fields. | 66 | // Copy relevant tev stages fields. |
| 53 | // We don't sync const_color here because of the high variance, it is a | 67 | // We don't sync const_color here because of the high variance, it is a |
| 54 | // shader uniform instead. | 68 | // shader uniform instead. |
| 55 | const auto& tev_stages = regs.GetTevStages(); | 69 | const auto& tev_stages = regs.GetTevStages(); |
| 56 | DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size()); | 70 | DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); |
| 57 | for (size_t i = 0; i < tev_stages.size(); i++) { | 71 | for (size_t i = 0; i < tev_stages.size(); i++) { |
| 58 | const auto& tev_stage = tev_stages[i]; | 72 | const auto& tev_stage = tev_stages[i]; |
| 59 | res.tev_stages[i].sources_raw = tev_stage.sources_raw; | 73 | state.tev_stages[i].sources_raw = tev_stage.sources_raw; |
| 60 | res.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; | 74 | state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; |
| 61 | res.tev_stages[i].ops_raw = tev_stage.ops_raw; | 75 | state.tev_stages[i].ops_raw = tev_stage.ops_raw; |
| 62 | res.tev_stages[i].scales_raw = tev_stage.scales_raw; | 76 | state.tev_stages[i].scales_raw = tev_stage.scales_raw; |
| 63 | } | 77 | } |
| 64 | 78 | ||
| 65 | res.combiner_buffer_input = | 79 | state.combiner_buffer_input = |
| 66 | regs.tev_combiner_buffer_input.update_mask_rgb.Value() | | 80 | regs.tev_combiner_buffer_input.update_mask_rgb.Value() | |
| 67 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; | 81 | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; |
| 68 | 82 | ||
| 69 | // Fragment lighting | 83 | // Fragment lighting |
| 70 | 84 | ||
| 71 | res.lighting.enable = !regs.lighting.disable; | 85 | state.lighting.enable = !regs.lighting.disable; |
| 72 | res.lighting.src_num = regs.lighting.num_lights + 1; | 86 | state.lighting.src_num = regs.lighting.num_lights + 1; |
| 73 | 87 | ||
| 74 | for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { | 88 | for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { |
| 75 | unsigned num = regs.lighting.light_enable.GetNum(light_index); | 89 | unsigned num = regs.lighting.light_enable.GetNum(light_index); |
| 76 | const auto& light = regs.lighting.light[num]; | 90 | const auto& light = regs.lighting.light[num]; |
| 77 | res.lighting.light[light_index].num = num; | 91 | state.lighting.light[light_index].num = num; |
| 78 | res.lighting.light[light_index].directional = light.directional != 0; | 92 | state.lighting.light[light_index].directional = light.directional != 0; |
| 79 | res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; | 93 | state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; |
| 80 | res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); | 94 | state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); |
| 81 | res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); | 95 | state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); |
| 82 | res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); | 96 | state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); |
| 83 | } | 97 | } |
| 84 | 98 | ||
| 85 | res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; | 99 | state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; |
| 86 | res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; | 100 | state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; |
| 87 | res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); | 101 | state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); |
| 88 | res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); | 102 | state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); |
| 89 | 103 | ||
| 90 | res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; | 104 | state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; |
| 91 | res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; | 105 | state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; |
| 92 | res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); | 106 | state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); |
| 93 | res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); | 107 | state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); |
| 94 | 108 | ||
| 95 | res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; | 109 | state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; |
| 96 | res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; | 110 | state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; |
| 97 | res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); | 111 | state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); |
| 98 | res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); | 112 | state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); |
| 99 | 113 | ||
| 100 | res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; | 114 | state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; |
| 101 | res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; | 115 | state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; |
| 102 | res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); | 116 | state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); |
| 103 | res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); | 117 | state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); |
| 104 | 118 | ||
| 105 | res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; | 119 | state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; |
| 106 | res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; | 120 | state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; |
| 107 | res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); | 121 | state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); |
| 108 | res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); | 122 | state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); |
| 109 | 123 | ||
| 110 | res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; | 124 | state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; |
| 111 | res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; | 125 | state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; |
| 112 | res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); | 126 | state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); |
| 113 | res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); | 127 | state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); |
| 114 | 128 | ||
| 115 | res.lighting.config = regs.lighting.config; | 129 | state.lighting.config = regs.lighting.config; |
| 116 | res.lighting.fresnel_selector = regs.lighting.fresnel_selector; | 130 | state.lighting.fresnel_selector = regs.lighting.fresnel_selector; |
| 117 | res.lighting.bump_mode = regs.lighting.bump_mode; | 131 | state.lighting.bump_mode = regs.lighting.bump_mode; |
| 118 | res.lighting.bump_selector = regs.lighting.bump_selector; | 132 | state.lighting.bump_selector = regs.lighting.bump_selector; |
| 119 | res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; | 133 | state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; |
| 120 | res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; | 134 | state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; |
| 121 | 135 | ||
| 122 | return res; | 136 | return res; |
| 123 | } | 137 | } |
| 124 | 138 | ||
| 125 | bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { | 139 | bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { |
| 126 | return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); | 140 | return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); |
| 127 | } | 141 | } |
| 128 | 142 | ||
| 129 | bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { | 143 | bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { |
| 130 | return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); | 144 | return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); |
| 131 | } | 145 | } |
| 132 | 146 | ||
| 133 | bool operator ==(const PicaShaderConfig& o) const { | 147 | bool operator ==(const PicaShaderConfig& o) const { |
| 134 | return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; | 148 | return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; |
| 135 | }; | 149 | }; |
| 136 | 150 | ||
| 137 | Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; | 151 | // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC. |
| 138 | std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; | 152 | // This makes BitField not TC when used in a union or struct so we have to resort |
| 139 | u8 combiner_buffer_input = 0; | 153 | // to this ugly hack. |
| 154 | // Once that bug is fixed we can use Pica::Regs::TevStageConfig here. | ||
| 155 | // Doesn't include const_color because we don't sync it, see comment in CurrentConfig() | ||
| 156 | struct TevStageConfigRaw { | ||
| 157 | u32 sources_raw; | ||
| 158 | u32 modifiers_raw; | ||
| 159 | u32 ops_raw; | ||
| 160 | u32 scales_raw; | ||
| 161 | explicit operator Pica::Regs::TevStageConfig() const noexcept { | ||
| 162 | Pica::Regs::TevStageConfig stage; | ||
| 163 | stage.sources_raw = sources_raw; | ||
| 164 | stage.modifiers_raw = modifiers_raw; | ||
| 165 | stage.ops_raw = ops_raw; | ||
| 166 | stage.const_color = 0; | ||
| 167 | stage.scales_raw = scales_raw; | ||
| 168 | return stage; | ||
| 169 | } | ||
| 170 | }; | ||
| 140 | 171 | ||
| 141 | struct { | 172 | struct State { |
| 142 | struct { | 173 | |
| 143 | unsigned num = 0; | 174 | Pica::Regs::CompareFunc alpha_test_func; |
| 144 | bool directional = false; | 175 | Pica::Regs::TextureConfig::TextureType texture0_type; |
| 145 | bool two_sided_diffuse = false; | 176 | std::array<TevStageConfigRaw, 6> tev_stages; |
| 146 | bool dist_atten_enable = false; | 177 | u8 combiner_buffer_input; |
| 147 | GLfloat dist_atten_scale = 0.0f; | 178 | |
| 148 | GLfloat dist_atten_bias = 0.0f; | 179 | Pica::Regs::DepthBuffering depthmap_enable; |
| 149 | } light[8]; | ||
| 150 | |||
| 151 | bool enable = false; | ||
| 152 | unsigned src_num = 0; | ||
| 153 | Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; | ||
| 154 | unsigned bump_selector = 0; | ||
| 155 | bool bump_renorm = false; | ||
| 156 | bool clamp_highlights = false; | ||
| 157 | |||
| 158 | Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; | ||
| 159 | Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; | ||
| 160 | 180 | ||
| 161 | struct { | 181 | struct { |
| 162 | bool enable = false; | 182 | struct { |
| 163 | bool abs_input = false; | 183 | unsigned num; |
| 164 | Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; | 184 | bool directional; |
| 165 | float scale = 1.0f; | 185 | bool two_sided_diffuse; |
| 166 | } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; | 186 | bool dist_atten_enable; |
| 167 | } lighting; | 187 | GLfloat dist_atten_scale; |
| 188 | GLfloat dist_atten_bias; | ||
| 189 | } light[8]; | ||
| 190 | |||
| 191 | bool enable; | ||
| 192 | unsigned src_num; | ||
| 193 | Pica::Regs::LightingBumpMode bump_mode; | ||
| 194 | unsigned bump_selector; | ||
| 195 | bool bump_renorm; | ||
| 196 | bool clamp_highlights; | ||
| 197 | |||
| 198 | Pica::Regs::LightingConfig config; | ||
| 199 | Pica::Regs::LightingFresnelSelector fresnel_selector; | ||
| 200 | |||
| 201 | struct { | ||
| 202 | bool enable; | ||
| 203 | bool abs_input; | ||
| 204 | Pica::Regs::LightingLutInput type; | ||
| 205 | float scale; | ||
| 206 | } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; | ||
| 207 | } lighting; | ||
| 208 | |||
| 209 | } state; | ||
| 168 | }; | 210 | }; |
| 211 | #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) | ||
| 212 | static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable"); | ||
| 213 | #endif | ||
| 169 | 214 | ||
| 170 | namespace std { | 215 | namespace std { |
| 171 | 216 | ||
| 172 | template <> | 217 | template <> |
| 173 | struct hash<PicaShaderConfig> { | 218 | struct hash<PicaShaderConfig> { |
| 174 | size_t operator()(const PicaShaderConfig& k) const { | 219 | size_t operator()(const PicaShaderConfig& k) const { |
| 175 | return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); | 220 | return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State)); |
| 176 | } | 221 | } |
| 177 | }; | 222 | }; |
| 178 | 223 | ||
| @@ -239,6 +284,7 @@ private: | |||
| 239 | tex_coord1[1] = v.tc1.y.ToFloat32(); | 284 | tex_coord1[1] = v.tc1.y.ToFloat32(); |
| 240 | tex_coord2[0] = v.tc2.x.ToFloat32(); | 285 | tex_coord2[0] = v.tc2.x.ToFloat32(); |
| 241 | tex_coord2[1] = v.tc2.y.ToFloat32(); | 286 | tex_coord2[1] = v.tc2.y.ToFloat32(); |
| 287 | tex_coord0_w = v.tc0_w.ToFloat32(); | ||
| 242 | normquat[0] = v.quat.x.ToFloat32(); | 288 | normquat[0] = v.quat.x.ToFloat32(); |
| 243 | normquat[1] = v.quat.y.ToFloat32(); | 289 | normquat[1] = v.quat.y.ToFloat32(); |
| 244 | normquat[2] = v.quat.z.ToFloat32(); | 290 | normquat[2] = v.quat.z.ToFloat32(); |
| @@ -259,6 +305,7 @@ private: | |||
| 259 | GLfloat tex_coord0[2]; | 305 | GLfloat tex_coord0[2]; |
| 260 | GLfloat tex_coord1[2]; | 306 | GLfloat tex_coord1[2]; |
| 261 | GLfloat tex_coord2[2]; | 307 | GLfloat tex_coord2[2]; |
| 308 | GLfloat tex_coord0_w; | ||
| 262 | GLfloat normquat[4]; | 309 | GLfloat normquat[4]; |
| 263 | GLfloat view[3]; | 310 | GLfloat view[3]; |
| 264 | }; | 311 | }; |
| @@ -277,6 +324,7 @@ private: | |||
| 277 | GLvec4 const_color[6]; | 324 | GLvec4 const_color[6]; |
| 278 | GLvec4 tev_combiner_buffer_color; | 325 | GLvec4 tev_combiner_buffer_color; |
| 279 | GLint alphatest_ref; | 326 | GLint alphatest_ref; |
| 327 | GLfloat depth_scale; | ||
| 280 | GLfloat depth_offset; | 328 | GLfloat depth_offset; |
| 281 | alignas(16) GLvec3 lighting_global_ambient; | 329 | alignas(16) GLvec3 lighting_global_ambient; |
| 282 | LightSrc light_src[8]; | 330 | LightSrc light_src[8]; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9011caa39..71d60e69c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -32,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) { | |||
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | /// Writes the specified TEV stage source component(s) | 34 | /// Writes the specified TEV stage source component(s) |
| 35 | static void AppendSource(std::string& out, TevStageConfig::Source source, | 35 | static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, |
| 36 | const std::string& index_name) { | 36 | const std::string& index_name) { |
| 37 | const auto& state = config.state; | ||
| 37 | using Source = TevStageConfig::Source; | 38 | using Source = TevStageConfig::Source; |
| 38 | switch (source) { | 39 | switch (source) { |
| 39 | case Source::PrimaryColor: | 40 | case Source::PrimaryColor: |
| @@ -46,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, | |||
| 46 | out += "secondary_fragment_color"; | 47 | out += "secondary_fragment_color"; |
| 47 | break; | 48 | break; |
| 48 | case Source::Texture0: | 49 | case Source::Texture0: |
| 49 | out += "texture(tex[0], texcoord[0])"; | 50 | // Only unit 0 respects the texturing type (according to 3DBrew) |
| 51 | switch(state.texture0_type) { | ||
| 52 | case Pica::Regs::TextureConfig::Texture2D: | ||
| 53 | out += "texture(tex[0], texcoord[0])"; | ||
| 54 | break; | ||
| 55 | case Pica::Regs::TextureConfig::Projection2D: | ||
| 56 | out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))"; | ||
| 57 | break; | ||
| 58 | default: | ||
| 59 | out += "texture(tex[0], texcoord[0])"; | ||
| 60 | LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type)); | ||
| 61 | UNIMPLEMENTED(); | ||
| 62 | break; | ||
| 63 | } | ||
| 50 | break; | 64 | break; |
| 51 | case Source::Texture1: | 65 | case Source::Texture1: |
| 52 | out += "texture(tex[1], texcoord[1])"; | 66 | out += "texture(tex[1], texcoord[1])"; |
| @@ -71,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, | |||
| 71 | } | 85 | } |
| 72 | 86 | ||
| 73 | /// Writes the color components to use for the specified TEV stage color modifier | 87 | /// Writes the color components to use for the specified TEV stage color modifier |
| 74 | static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, | 88 | static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, |
| 75 | TevStageConfig::Source source, const std::string& index_name) { | 89 | TevStageConfig::Source source, const std::string& index_name) { |
| 76 | using ColorModifier = TevStageConfig::ColorModifier; | 90 | using ColorModifier = TevStageConfig::ColorModifier; |
| 77 | switch (modifier) { | 91 | switch (modifier) { |
| 78 | case ColorModifier::SourceColor: | 92 | case ColorModifier::SourceColor: |
| 79 | AppendSource(out, source, index_name); | 93 | AppendSource(out, config, source, index_name); |
| 80 | out += ".rgb"; | 94 | out += ".rgb"; |
| 81 | break; | 95 | break; |
| 82 | case ColorModifier::OneMinusSourceColor: | 96 | case ColorModifier::OneMinusSourceColor: |
| 83 | out += "vec3(1.0) - "; | 97 | out += "vec3(1.0) - "; |
| 84 | AppendSource(out, source, index_name); | 98 | AppendSource(out, config, source, index_name); |
| 85 | out += ".rgb"; | 99 | out += ".rgb"; |
| 86 | break; | 100 | break; |
| 87 | case ColorModifier::SourceAlpha: | 101 | case ColorModifier::SourceAlpha: |
| 88 | AppendSource(out, source, index_name); | 102 | AppendSource(out, config, source, index_name); |
| 89 | out += ".aaa"; | 103 | out += ".aaa"; |
| 90 | break; | 104 | break; |
| 91 | case ColorModifier::OneMinusSourceAlpha: | 105 | case ColorModifier::OneMinusSourceAlpha: |
| 92 | out += "vec3(1.0) - "; | 106 | out += "vec3(1.0) - "; |
| 93 | AppendSource(out, source, index_name); | 107 | AppendSource(out, config, source, index_name); |
| 94 | out += ".aaa"; | 108 | out += ".aaa"; |
| 95 | break; | 109 | break; |
| 96 | case ColorModifier::SourceRed: | 110 | case ColorModifier::SourceRed: |
| 97 | AppendSource(out, source, index_name); | 111 | AppendSource(out, config, source, index_name); |
| 98 | out += ".rrr"; | 112 | out += ".rrr"; |
| 99 | break; | 113 | break; |
| 100 | case ColorModifier::OneMinusSourceRed: | 114 | case ColorModifier::OneMinusSourceRed: |
| 101 | out += "vec3(1.0) - "; | 115 | out += "vec3(1.0) - "; |
| 102 | AppendSource(out, source, index_name); | 116 | AppendSource(out, config, source, index_name); |
| 103 | out += ".rrr"; | 117 | out += ".rrr"; |
| 104 | break; | 118 | break; |
| 105 | case ColorModifier::SourceGreen: | 119 | case ColorModifier::SourceGreen: |
| 106 | AppendSource(out, source, index_name); | 120 | AppendSource(out, config, source, index_name); |
| 107 | out += ".ggg"; | 121 | out += ".ggg"; |
| 108 | break; | 122 | break; |
| 109 | case ColorModifier::OneMinusSourceGreen: | 123 | case ColorModifier::OneMinusSourceGreen: |
| 110 | out += "vec3(1.0) - "; | 124 | out += "vec3(1.0) - "; |
| 111 | AppendSource(out, source, index_name); | 125 | AppendSource(out, config, source, index_name); |
| 112 | out += ".ggg"; | 126 | out += ".ggg"; |
| 113 | break; | 127 | break; |
| 114 | case ColorModifier::SourceBlue: | 128 | case ColorModifier::SourceBlue: |
| 115 | AppendSource(out, source, index_name); | 129 | AppendSource(out, config, source, index_name); |
| 116 | out += ".bbb"; | 130 | out += ".bbb"; |
| 117 | break; | 131 | break; |
| 118 | case ColorModifier::OneMinusSourceBlue: | 132 | case ColorModifier::OneMinusSourceBlue: |
| 119 | out += "vec3(1.0) - "; | 133 | out += "vec3(1.0) - "; |
| 120 | AppendSource(out, source, index_name); | 134 | AppendSource(out, config, source, index_name); |
| 121 | out += ".bbb"; | 135 | out += ".bbb"; |
| 122 | break; | 136 | break; |
| 123 | default: | 137 | default: |
| @@ -128,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier | |||
| 128 | } | 142 | } |
| 129 | 143 | ||
| 130 | /// Writes the alpha component to use for the specified TEV stage alpha modifier | 144 | /// Writes the alpha component to use for the specified TEV stage alpha modifier |
| 131 | static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, | 145 | static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, |
| 132 | TevStageConfig::Source source, const std::string& index_name) { | 146 | TevStageConfig::Source source, const std::string& index_name) { |
| 133 | using AlphaModifier = TevStageConfig::AlphaModifier; | 147 | using AlphaModifier = TevStageConfig::AlphaModifier; |
| 134 | switch (modifier) { | 148 | switch (modifier) { |
| 135 | case AlphaModifier::SourceAlpha: | 149 | case AlphaModifier::SourceAlpha: |
| 136 | AppendSource(out, source, index_name); | 150 | AppendSource(out, config, source, index_name); |
| 137 | out += ".a"; | 151 | out += ".a"; |
| 138 | break; | 152 | break; |
| 139 | case AlphaModifier::OneMinusSourceAlpha: | 153 | case AlphaModifier::OneMinusSourceAlpha: |
| 140 | out += "1.0 - "; | 154 | out += "1.0 - "; |
| 141 | AppendSource(out, source, index_name); | 155 | AppendSource(out, config, source, index_name); |
| 142 | out += ".a"; | 156 | out += ".a"; |
| 143 | break; | 157 | break; |
| 144 | case AlphaModifier::SourceRed: | 158 | case AlphaModifier::SourceRed: |
| 145 | AppendSource(out, source, index_name); | 159 | AppendSource(out, config, source, index_name); |
| 146 | out += ".r"; | 160 | out += ".r"; |
| 147 | break; | 161 | break; |
| 148 | case AlphaModifier::OneMinusSourceRed: | 162 | case AlphaModifier::OneMinusSourceRed: |
| 149 | out += "1.0 - "; | 163 | out += "1.0 - "; |
| 150 | AppendSource(out, source, index_name); | 164 | AppendSource(out, config, source, index_name); |
| 151 | out += ".r"; | 165 | out += ".r"; |
| 152 | break; | 166 | break; |
| 153 | case AlphaModifier::SourceGreen: | 167 | case AlphaModifier::SourceGreen: |
| 154 | AppendSource(out, source, index_name); | 168 | AppendSource(out, config, source, index_name); |
| 155 | out += ".g"; | 169 | out += ".g"; |
| 156 | break; | 170 | break; |
| 157 | case AlphaModifier::OneMinusSourceGreen: | 171 | case AlphaModifier::OneMinusSourceGreen: |
| 158 | out += "1.0 - "; | 172 | out += "1.0 - "; |
| 159 | AppendSource(out, source, index_name); | 173 | AppendSource(out, config, source, index_name); |
| 160 | out += ".g"; | 174 | out += ".g"; |
| 161 | break; | 175 | break; |
| 162 | case AlphaModifier::SourceBlue: | 176 | case AlphaModifier::SourceBlue: |
| 163 | AppendSource(out, source, index_name); | 177 | AppendSource(out, config, source, index_name); |
| 164 | out += ".b"; | 178 | out += ".b"; |
| 165 | break; | 179 | break; |
| 166 | case AlphaModifier::OneMinusSourceBlue: | 180 | case AlphaModifier::OneMinusSourceBlue: |
| 167 | out += "1.0 - "; | 181 | out += "1.0 - "; |
| 168 | AppendSource(out, source, index_name); | 182 | AppendSource(out, config, source, index_name); |
| 169 | out += ".b"; | 183 | out += ".b"; |
| 170 | break; | 184 | break; |
| 171 | default: | 185 | default: |
| @@ -287,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { | |||
| 287 | 301 | ||
| 288 | /// Writes the code to emulate the specified TEV stage | 302 | /// Writes the code to emulate the specified TEV stage |
| 289 | static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { | 303 | static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { |
| 290 | auto& stage = config.tev_stages[index]; | 304 | const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); |
| 291 | if (!IsPassThroughTevStage(stage)) { | 305 | if (!IsPassThroughTevStage(stage)) { |
| 292 | std::string index_name = std::to_string(index); | 306 | std::string index_name = std::to_string(index); |
| 293 | 307 | ||
| 294 | out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; | 308 | out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; |
| 295 | AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); | 309 | AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); |
| 296 | out += ", "; | 310 | out += ", "; |
| 297 | AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); | 311 | AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); |
| 298 | out += ", "; | 312 | out += ", "; |
| 299 | AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); | 313 | AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); |
| 300 | out += ");\n"; | 314 | out += ");\n"; |
| 301 | 315 | ||
| 302 | out += "vec3 color_output_" + index_name + " = "; | 316 | out += "vec3 color_output_" + index_name + " = "; |
| @@ -304,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi | |||
| 304 | out += ";\n"; | 318 | out += ";\n"; |
| 305 | 319 | ||
| 306 | out += "float alpha_results_" + index_name + "[3] = float[3]("; | 320 | out += "float alpha_results_" + index_name + "[3] = float[3]("; |
| 307 | AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); | 321 | AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name); |
| 308 | out += ", "; | 322 | out += ", "; |
| 309 | AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); | 323 | AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name); |
| 310 | out += ", "; | 324 | out += ", "; |
| 311 | AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); | 325 | AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name); |
| 312 | out += ");\n"; | 326 | out += ");\n"; |
| 313 | 327 | ||
| 314 | out += "float alpha_output_" + index_name + " = "; | 328 | out += "float alpha_output_" + index_name + " = "; |
| @@ -331,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi | |||
| 331 | 345 | ||
| 332 | /// Writes the code to emulate fragment lighting | 346 | /// Writes the code to emulate fragment lighting |
| 333 | static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | 347 | static void WriteLighting(std::string& out, const PicaShaderConfig& config) { |
| 348 | const auto& lighting = config.state.lighting; | ||
| 349 | |||
| 334 | // Define lighting globals | 350 | // Define lighting globals |
| 335 | out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" | 351 | out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" |
| 336 | "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" | 352 | "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" |
| @@ -338,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 338 | "vec3 refl_value = vec3(0.0);\n"; | 354 | "vec3 refl_value = vec3(0.0);\n"; |
| 339 | 355 | ||
| 340 | // Compute fragment normals | 356 | // Compute fragment normals |
| 341 | if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { | 357 | if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { |
| 342 | // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture | 358 | // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture |
| 343 | std::string bump_selector = std::to_string(config.lighting.bump_selector); | 359 | std::string bump_selector = std::to_string(lighting.bump_selector); |
| 344 | out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; | 360 | out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; |
| 345 | 361 | ||
| 346 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result | 362 | // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result |
| 347 | if (config.lighting.bump_renorm) { | 363 | if (lighting.bump_renorm) { |
| 348 | std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; | 364 | std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; |
| 349 | out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; | 365 | out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; |
| 350 | } | 366 | } |
| 351 | } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { | 367 | } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { |
| 352 | // Bump mapping is enabled using a tangent map | 368 | // Bump mapping is enabled using a tangent map |
| 353 | LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); | 369 | LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); |
| 354 | UNIMPLEMENTED(); | 370 | UNIMPLEMENTED(); |
| @@ -361,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 361 | out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; | 377 | out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; |
| 362 | 378 | ||
| 363 | // Gets the index into the specified lookup table for specular lighting | 379 | // Gets the index into the specified lookup table for specular lighting |
| 364 | auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { | 380 | auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) { |
| 365 | const std::string half_angle = "normalize(normalize(view) + light_vector)"; | 381 | const std::string half_angle = "normalize(normalize(view) + light_vector)"; |
| 366 | std::string index; | 382 | std::string index; |
| 367 | switch (input) { | 383 | switch (input) { |
| @@ -389,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 389 | 405 | ||
| 390 | if (abs) { | 406 | if (abs) { |
| 391 | // LUT index is in the range of (0.0, 1.0) | 407 | // LUT index is in the range of (0.0, 1.0) |
| 392 | index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; | 408 | index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; |
| 393 | return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; | 409 | return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; |
| 394 | } else { | 410 | } else { |
| 395 | // LUT index is in the range of (-1.0, 1.0) | 411 | // LUT index is in the range of (-1.0, 1.0) |
| @@ -407,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 407 | }; | 423 | }; |
| 408 | 424 | ||
| 409 | // Write the code to emulate each enabled light | 425 | // Write the code to emulate each enabled light |
| 410 | for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { | 426 | for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { |
| 411 | const auto& light_config = config.lighting.light[light_index]; | 427 | const auto& light_config = lighting.light[light_index]; |
| 412 | std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; | 428 | std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; |
| 413 | 429 | ||
| 414 | // Compute light vector (directional or positional) | 430 | // Compute light vector (directional or positional) |
| @@ -432,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 432 | } | 448 | } |
| 433 | 449 | ||
| 434 | // If enabled, clamp specular component if lighting result is negative | 450 | // If enabled, clamp specular component if lighting result is negative |
| 435 | std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; | 451 | std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; |
| 436 | 452 | ||
| 437 | // Specular 0 component | 453 | // Specular 0 component |
| 438 | std::string d0_lut_value = "1.0"; | 454 | std::string d0_lut_value = "1.0"; |
| 439 | if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { | 455 | if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { |
| 440 | // Lookup specular "distribution 0" LUT value | 456 | // Lookup specular "distribution 0" LUT value |
| 441 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); | 457 | std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); |
| 442 | d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; | 458 | d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; |
| 443 | } | 459 | } |
| 444 | std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; | 460 | std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; |
| 445 | 461 | ||
| 446 | // If enabled, lookup ReflectRed value, otherwise, 1.0 is used | 462 | // If enabled, lookup ReflectRed value, otherwise, 1.0 is used |
| 447 | if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { | 463 | if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { |
| 448 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); | 464 | std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); |
| 449 | std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; | 465 | std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; |
| 450 | out += "refl_value.r = " + value + ";\n"; | 466 | out += "refl_value.r = " + value + ";\n"; |
| 451 | } else { | 467 | } else { |
| 452 | out += "refl_value.r = 1.0;\n"; | 468 | out += "refl_value.r = 1.0;\n"; |
| 453 | } | 469 | } |
| 454 | 470 | ||
| 455 | // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used | 471 | // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used |
| 456 | if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { | 472 | if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { |
| 457 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); | 473 | std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); |
| 458 | std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; | 474 | std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; |
| 459 | out += "refl_value.g = " + value + ";\n"; | 475 | out += "refl_value.g = " + value + ";\n"; |
| 460 | } else { | 476 | } else { |
| 461 | out += "refl_value.g = refl_value.r;\n"; | 477 | out += "refl_value.g = refl_value.r;\n"; |
| 462 | } | 478 | } |
| 463 | 479 | ||
| 464 | // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used | 480 | // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used |
| 465 | if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { | 481 | if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { |
| 466 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); | 482 | std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); |
| 467 | std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; | 483 | std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; |
| 468 | out += "refl_value.b = " + value + ";\n"; | 484 | out += "refl_value.b = " + value + ";\n"; |
| 469 | } else { | 485 | } else { |
| 470 | out += "refl_value.b = refl_value.r;\n"; | 486 | out += "refl_value.b = refl_value.r;\n"; |
| @@ -472,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 472 | 488 | ||
| 473 | // Specular 1 component | 489 | // Specular 1 component |
| 474 | std::string d1_lut_value = "1.0"; | 490 | std::string d1_lut_value = "1.0"; |
| 475 | if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { | 491 | if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { |
| 476 | // Lookup specular "distribution 1" LUT value | 492 | // Lookup specular "distribution 1" LUT value |
| 477 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); | 493 | std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); |
| 478 | d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; | 494 | d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; |
| 479 | } | 495 | } |
| 480 | std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; | 496 | std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; |
| 481 | 497 | ||
| 482 | // Fresnel | 498 | // Fresnel |
| 483 | if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { | 499 | if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { |
| 484 | // Lookup fresnel LUT value | 500 | // Lookup fresnel LUT value |
| 485 | std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); | 501 | std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); |
| 486 | std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; | 502 | std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; |
| 487 | 503 | ||
| 488 | // Enabled for difffuse lighting alpha component | 504 | // Enabled for difffuse lighting alpha component |
| 489 | if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || | 505 | if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || |
| 490 | config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) | 506 | lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) |
| 491 | out += "diffuse_sum.a *= " + value + ";\n"; | 507 | out += "diffuse_sum.a *= " + value + ";\n"; |
| 492 | 508 | ||
| 493 | // Enabled for the specular lighting alpha component | 509 | // Enabled for the specular lighting alpha component |
| 494 | if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || | 510 | if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || |
| 495 | config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) | 511 | lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) |
| 496 | out += "specular_sum.a *= " + value + ";\n"; | 512 | out += "specular_sum.a *= " + value + ";\n"; |
| 497 | } | 513 | } |
| 498 | 514 | ||
| @@ -510,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { | |||
| 510 | } | 526 | } |
| 511 | 527 | ||
| 512 | std::string GenerateFragmentShader(const PicaShaderConfig& config) { | 528 | std::string GenerateFragmentShader(const PicaShaderConfig& config) { |
| 529 | const auto& state = config.state; | ||
| 530 | |||
| 513 | std::string out = R"( | 531 | std::string out = R"( |
| 514 | #version 330 core | 532 | #version 330 core |
| 515 | #define NUM_TEV_STAGES 6 | 533 | #define NUM_TEV_STAGES 6 |
| @@ -519,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { | |||
| 519 | 537 | ||
| 520 | in vec4 primary_color; | 538 | in vec4 primary_color; |
| 521 | in vec2 texcoord[3]; | 539 | in vec2 texcoord[3]; |
| 540 | in float texcoord0_w; | ||
| 522 | in vec4 normquat; | 541 | in vec4 normquat; |
| 523 | in vec3 view; | 542 | in vec3 view; |
| 524 | 543 | ||
| @@ -536,6 +555,7 @@ layout (std140) uniform shader_data { | |||
| 536 | vec4 const_color[NUM_TEV_STAGES]; | 555 | vec4 const_color[NUM_TEV_STAGES]; |
| 537 | vec4 tev_combiner_buffer_color; | 556 | vec4 tev_combiner_buffer_color; |
| 538 | int alphatest_ref; | 557 | int alphatest_ref; |
| 558 | float depth_scale; | ||
| 539 | float depth_offset; | 559 | float depth_offset; |
| 540 | vec3 lighting_global_ambient; | 560 | vec3 lighting_global_ambient; |
| 541 | LightSrc light_src[NUM_LIGHTS]; | 561 | LightSrc light_src[NUM_LIGHTS]; |
| @@ -555,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
| 555 | )"; | 575 | )"; |
| 556 | 576 | ||
| 557 | // Do not do any sort of processing if it's obvious we're not going to pass the alpha test | 577 | // Do not do any sort of processing if it's obvious we're not going to pass the alpha test |
| 558 | if (config.alpha_test_func == Regs::CompareFunc::Never) { | 578 | if (state.alpha_test_func == Regs::CompareFunc::Never) { |
| 559 | out += "discard; }"; | 579 | out += "discard; }"; |
| 560 | return out; | 580 | return out; |
| 561 | } | 581 | } |
| 562 | 582 | ||
| 563 | if (config.lighting.enable) | 583 | if (state.lighting.enable) |
| 564 | WriteLighting(out, config); | 584 | WriteLighting(out, config); |
| 565 | 585 | ||
| 566 | out += "vec4 combiner_buffer = vec4(0.0);\n"; | 586 | out += "vec4 combiner_buffer = vec4(0.0);\n"; |
| 567 | out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; | 587 | out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; |
| 568 | out += "vec4 last_tex_env_out = vec4(0.0);\n"; | 588 | out += "vec4 last_tex_env_out = vec4(0.0);\n"; |
| 569 | 589 | ||
| 570 | for (size_t index = 0; index < config.tev_stages.size(); ++index) | 590 | for (size_t index = 0; index < state.tev_stages.size(); ++index) |
| 571 | WriteTevStage(out, config, (unsigned)index); | 591 | WriteTevStage(out, config, (unsigned)index); |
| 572 | 592 | ||
| 573 | if (config.alpha_test_func != Regs::CompareFunc::Always) { | 593 | if (state.alpha_test_func != Regs::CompareFunc::Always) { |
| 574 | out += "if ("; | 594 | out += "if ("; |
| 575 | AppendAlphaTestCondition(out, config.alpha_test_func); | 595 | AppendAlphaTestCondition(out, state.alpha_test_func); |
| 576 | out += ") discard;\n"; | 596 | out += ") discard;\n"; |
| 577 | } | 597 | } |
| 578 | 598 | ||
| 579 | out += "color = last_tex_env_out;\n"; | 599 | out += "color = last_tex_env_out;\n"; |
| 580 | out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; | 600 | |
| 601 | out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; | ||
| 602 | out += "float depth = z_over_w * depth_scale + depth_offset;\n"; | ||
| 603 | if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { | ||
| 604 | out += "depth /= gl_FragCoord.w;\n"; | ||
| 605 | } | ||
| 606 | out += "gl_FragDepth = depth;\n"; | ||
| 607 | |||
| 608 | out += "}"; | ||
| 581 | 609 | ||
| 582 | return out; | 610 | return out; |
| 583 | } | 611 | } |
| @@ -585,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
| 585 | std::string GenerateVertexShader() { | 613 | std::string GenerateVertexShader() { |
| 586 | std::string out = "#version 330 core\n"; | 614 | std::string out = "#version 330 core\n"; |
| 587 | 615 | ||
| 588 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; | 616 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; |
| 589 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; | 617 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; |
| 590 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; | 618 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; |
| 591 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; | 619 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; |
| 592 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; | 620 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; |
| 593 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; | 621 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; |
| 594 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; | 622 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; |
| 623 | out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; | ||
| 595 | 624 | ||
| 596 | out += R"( | 625 | out += R"( |
| 597 | out vec4 primary_color; | 626 | out vec4 primary_color; |
| 598 | out vec2 texcoord[3]; | 627 | out vec2 texcoord[3]; |
| 628 | out float texcoord0_w; | ||
| 599 | out vec4 normquat; | 629 | out vec4 normquat; |
| 600 | out vec3 view; | 630 | out vec3 view; |
| 601 | 631 | ||
| @@ -604,6 +634,7 @@ void main() { | |||
| 604 | texcoord[0] = vert_texcoord0; | 634 | texcoord[0] = vert_texcoord0; |
| 605 | texcoord[1] = vert_texcoord1; | 635 | texcoord[1] = vert_texcoord1; |
| 606 | texcoord[2] = vert_texcoord2; | 636 | texcoord[2] = vert_texcoord2; |
| 637 | texcoord0_w = vert_texcoord0_w; | ||
| 607 | normquat = vert_normquat; | 638 | normquat = vert_normquat; |
| 608 | view = vert_view; | 639 | view = vert_view; |
| 609 | gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); | 640 | gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3eb07d57a..bef3249cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <string> | 7 | #include <string> |
| 8 | 8 | ||
| 9 | struct PicaShaderConfig; | 9 | union PicaShaderConfig; |
| 10 | 10 | ||
| 11 | namespace GLShader { | 11 | namespace GLShader { |
| 12 | 12 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 097242f6f..f59912f79 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h | |||
| @@ -14,6 +14,7 @@ enum Attributes { | |||
| 14 | ATTRIBUTE_TEXCOORD0, | 14 | ATTRIBUTE_TEXCOORD0, |
| 15 | ATTRIBUTE_TEXCOORD1, | 15 | ATTRIBUTE_TEXCOORD1, |
| 16 | ATTRIBUTE_TEXCOORD2, | 16 | ATTRIBUTE_TEXCOORD2, |
| 17 | ATTRIBUTE_TEXCOORD0_W, | ||
| 17 | ATTRIBUTE_NORMQUAT, | 18 | ATTRIBUTE_NORMQUAT, |
| 18 | ATTRIBUTE_VIEW, | 19 | ATTRIBUTE_VIEW, |
| 19 | }; | 20 | }; |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 65dcc9156..e93a9d92a 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -35,7 +35,13 @@ static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; | |||
| 35 | static const JitShader* jit_shader; | 35 | static const JitShader* jit_shader; |
| 36 | #endif // ARCHITECTURE_x86_64 | 36 | #endif // ARCHITECTURE_x86_64 |
| 37 | 37 | ||
| 38 | void Setup() { | 38 | void ClearCache() { |
| 39 | #ifdef ARCHITECTURE_x86_64 | ||
| 40 | shader_map.clear(); | ||
| 41 | #endif // ARCHITECTURE_x86_64 | ||
| 42 | } | ||
| 43 | |||
| 44 | void ShaderSetup::Setup() { | ||
| 39 | #ifdef ARCHITECTURE_x86_64 | 45 | #ifdef ARCHITECTURE_x86_64 |
| 40 | if (VideoCore::g_shader_jit_enabled) { | 46 | if (VideoCore::g_shader_jit_enabled) { |
| 41 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 47 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| @@ -54,20 +60,13 @@ void Setup() { | |||
| 54 | #endif // ARCHITECTURE_x86_64 | 60 | #endif // ARCHITECTURE_x86_64 |
| 55 | } | 61 | } |
| 56 | 62 | ||
| 57 | void Shutdown() { | 63 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
| 58 | #ifdef ARCHITECTURE_x86_64 | ||
| 59 | shader_map.clear(); | ||
| 60 | #endif // ARCHITECTURE_x86_64 | ||
| 61 | } | ||
| 62 | |||
| 63 | MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); | ||
| 64 | 64 | ||
| 65 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | 65 | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 66 | auto& config = g_state.regs.vs; | 66 | auto& config = g_state.regs.vs; |
| 67 | 67 | ||
| 68 | MICROPROFILE_SCOPE(GPU_VertexShader); | 68 | MICROPROFILE_SCOPE(GPU_Shader); |
| 69 | 69 | ||
| 70 | state.program_counter = config.main_offset; | ||
| 71 | state.debug.max_offset = 0; | 70 | state.debug.max_offset = 0; |
| 72 | state.debug.max_opdesc_id = 0; | 71 | state.debug.max_opdesc_id = 0; |
| 73 | 72 | ||
| @@ -140,10 +139,9 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr | |||
| 140 | return ret; | 139 | return ret; |
| 141 | } | 140 | } |
| 142 | 141 | ||
| 143 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { | 142 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { |
| 144 | UnitState<true> state; | 143 | UnitState<true> state; |
| 145 | 144 | ||
| 146 | state.program_counter = config.main_offset; | ||
| 147 | state.debug.max_offset = 0; | 145 | state.debug.max_offset = 0; |
| 148 | state.debug.max_opdesc_id = 0; | 146 | state.debug.max_opdesc_id = 0; |
| 149 | 147 | ||
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 56b83bfeb..983e4a967 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -43,7 +43,8 @@ struct OutputVertex { | |||
| 43 | Math::Vec4<float24> color; | 43 | Math::Vec4<float24> color; |
| 44 | Math::Vec2<float24> tc0; | 44 | Math::Vec2<float24> tc0; |
| 45 | Math::Vec2<float24> tc1; | 45 | Math::Vec2<float24> tc1; |
| 46 | INSERT_PADDING_WORDS(2); | 46 | float24 tc0_w; |
| 47 | INSERT_PADDING_WORDS(1); | ||
| 47 | Math::Vec3<float24> view; | 48 | Math::Vec3<float24> view; |
| 48 | INSERT_PADDING_WORDS(1); | 49 | INSERT_PADDING_WORDS(1); |
| 49 | Math::Vec2<float24> tc2; | 50 | Math::Vec2<float24> tc2; |
| @@ -83,23 +84,6 @@ struct OutputVertex { | |||
| 83 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 84 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 84 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 85 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 85 | 86 | ||
| 86 | /// Vertex shader memory | ||
| 87 | struct ShaderSetup { | ||
| 88 | struct { | ||
| 89 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are | ||
| 90 | // therefore required to be 16-byte aligned. | ||
| 91 | alignas(16) Math::Vec4<float24> f[96]; | ||
| 92 | |||
| 93 | std::array<bool, 16> b; | ||
| 94 | std::array<Math::Vec4<u8>, 4> i; | ||
| 95 | } uniforms; | ||
| 96 | |||
| 97 | Math::Vec4<float24> default_attributes[16]; | ||
| 98 | |||
| 99 | std::array<u32, 1024> program_code; | ||
| 100 | std::array<u32, 1024> swizzle_data; | ||
| 101 | }; | ||
| 102 | |||
| 103 | // Helper structure used to keep track of data useful for inspection of shader emulation | 87 | // Helper structure used to keep track of data useful for inspection of shader emulation |
| 104 | template<bool full_debugging> | 88 | template<bool full_debugging> |
| 105 | struct DebugData; | 89 | struct DebugData; |
| @@ -288,29 +272,12 @@ struct UnitState { | |||
| 288 | } registers; | 272 | } registers; |
| 289 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | 273 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); |
| 290 | 274 | ||
| 291 | u32 program_counter; | ||
| 292 | bool conditional_code[2]; | 275 | bool conditional_code[2]; |
| 293 | 276 | ||
| 294 | // Two Address registers and one loop counter | 277 | // Two Address registers and one loop counter |
| 295 | // TODO: How many bits do these actually have? | 278 | // TODO: How many bits do these actually have? |
| 296 | s32 address_registers[3]; | 279 | s32 address_registers[3]; |
| 297 | 280 | ||
| 298 | enum { | ||
| 299 | INVALID_ADDRESS = 0xFFFFFFFF | ||
| 300 | }; | ||
| 301 | |||
| 302 | struct CallStackElement { | ||
| 303 | u32 final_address; // Address upon which we jump to return_address | ||
| 304 | u32 return_address; // Where to jump when leaving scope | ||
| 305 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 306 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 307 | // TODO: Should this be a signed value? Does it even matter? | ||
| 308 | u32 loop_address; // The address where we'll return to after each loop iteration | ||
| 309 | }; | ||
| 310 | |||
| 311 | // TODO: Is there a maximal size for this? | ||
| 312 | boost::container::static_vector<CallStackElement, 16> call_stack; | ||
| 313 | |||
| 314 | DebugData<Debug> debug; | 281 | DebugData<Debug> debug; |
| 315 | 282 | ||
| 316 | static size_t InputOffset(const SourceRegister& reg) { | 283 | static size_t InputOffset(const SourceRegister& reg) { |
| @@ -342,33 +309,49 @@ struct UnitState { | |||
| 342 | } | 309 | } |
| 343 | }; | 310 | }; |
| 344 | 311 | ||
| 345 | /** | 312 | /// Clears the shader cache |
| 346 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per | 313 | void ClearCache(); |
| 347 | * vertex, which would happen within the `Run` function). | ||
| 348 | */ | ||
| 349 | void Setup(); | ||
| 350 | 314 | ||
| 351 | /// Performs any cleanup when the emulator is shutdown | 315 | struct ShaderSetup { |
| 352 | void Shutdown(); | ||
| 353 | 316 | ||
| 354 | /** | 317 | struct { |
| 355 | * Runs the currently setup shader | 318 | // The float uniforms are accessed by the shader JIT using SSE instructions, and are |
| 356 | * @param state Shader unit state, must be setup per shader and per shader unit | 319 | // therefore required to be 16-byte aligned. |
| 357 | * @param input Input vertex into the shader | 320 | alignas(16) Math::Vec4<float24> f[96]; |
| 358 | * @param num_attributes The number of vertex shader attributes | ||
| 359 | * @return The output vertex, after having been processed by the vertex shader | ||
| 360 | */ | ||
| 361 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); | ||
| 362 | 321 | ||
| 363 | /** | 322 | std::array<bool, 16> b; |
| 364 | * Produce debug information based on the given shader and input vertex | 323 | std::array<Math::Vec4<u8>, 4> i; |
| 365 | * @param input Input vertex into the shader | 324 | } uniforms; |
| 366 | * @param num_attributes The number of vertex shader attributes | 325 | |
| 367 | * @param config Configuration object for the shader pipeline | 326 | std::array<u32, 1024> program_code; |
| 368 | * @param setup Setup object for the shader pipeline | 327 | std::array<u32, 1024> swizzle_data; |
| 369 | * @return Debug information for this shader with regards to the given vertex | 328 | |
| 370 | */ | 329 | /** |
| 371 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); | 330 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per |
| 331 | * vertex, which would happen within the `Run` function). | ||
| 332 | */ | ||
| 333 | void Setup(); | ||
| 334 | |||
| 335 | /** | ||
| 336 | * Runs the currently setup shader | ||
| 337 | * @param state Shader unit state, must be setup per shader and per shader unit | ||
| 338 | * @param input Input vertex into the shader | ||
| 339 | * @param num_attributes The number of vertex shader attributes | ||
| 340 | * @return The output vertex, after having been processed by the vertex shader | ||
| 341 | */ | ||
| 342 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); | ||
| 343 | |||
| 344 | /** | ||
| 345 | * Produce debug information based on the given shader and input vertex | ||
| 346 | * @param input Input vertex into the shader | ||
| 347 | * @param num_attributes The number of vertex shader attributes | ||
| 348 | * @param config Configuration object for the shader pipeline | ||
| 349 | * @param setup Setup object for the shader pipeline | ||
| 350 | * @return Debug information for this shader with regards to the given vertex | ||
| 351 | */ | ||
| 352 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); | ||
| 353 | |||
| 354 | }; | ||
| 372 | 355 | ||
| 373 | } // namespace Shader | 356 | } // namespace Shader |
| 374 | 357 | ||
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 7710f7fbc..3a827d11f 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -29,8 +29,24 @@ namespace Pica { | |||
| 29 | 29 | ||
| 30 | namespace Shader { | 30 | namespace Shader { |
| 31 | 31 | ||
| 32 | constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF; | ||
| 33 | |||
| 34 | struct CallStackElement { | ||
| 35 | u32 final_address; // Address upon which we jump to return_address | ||
| 36 | u32 return_address; // Where to jump when leaving scope | ||
| 37 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 38 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 39 | // TODO: Should this be a signed value? Does it even matter? | ||
| 40 | u32 loop_address; // The address where we'll return to after each loop iteration | ||
| 41 | }; | ||
| 42 | |||
| 32 | template<bool Debug> | 43 | template<bool Debug> |
| 33 | void RunInterpreter(UnitState<Debug>& state) { | 44 | void RunInterpreter(UnitState<Debug>& state) { |
| 45 | // TODO: Is there a maximal size for this? | ||
| 46 | boost::container::static_vector<CallStackElement, 16> call_stack; | ||
| 47 | |||
| 48 | u32 program_counter = g_state.regs.vs.main_offset; | ||
| 49 | |||
| 34 | const auto& uniforms = g_state.vs.uniforms; | 50 | const auto& uniforms = g_state.vs.uniforms; |
| 35 | const auto& swizzle_data = g_state.vs.swizzle_data; | 51 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| 36 | const auto& program_code = g_state.vs.program_code; | 52 | const auto& program_code = g_state.vs.program_code; |
| @@ -41,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 41 | unsigned iteration = 0; | 57 | unsigned iteration = 0; |
| 42 | bool exit_loop = false; | 58 | bool exit_loop = false; |
| 43 | while (!exit_loop) { | 59 | while (!exit_loop) { |
| 44 | if (!state.call_stack.empty()) { | 60 | if (!call_stack.empty()) { |
| 45 | auto& top = state.call_stack.back(); | 61 | auto& top = call_stack.back(); |
| 46 | if (state.program_counter == top.final_address) { | 62 | if (program_counter == top.final_address) { |
| 47 | state.address_registers[2] += top.loop_increment; | 63 | state.address_registers[2] += top.loop_increment; |
| 48 | 64 | ||
| 49 | if (top.repeat_counter-- == 0) { | 65 | if (top.repeat_counter-- == 0) { |
| 50 | state.program_counter = top.return_address; | 66 | program_counter = top.return_address; |
| 51 | state.call_stack.pop_back(); | 67 | call_stack.pop_back(); |
| 52 | } else { | 68 | } else { |
| 53 | state.program_counter = top.loop_address; | 69 | program_counter = top.loop_address; |
| 54 | } | 70 | } |
| 55 | 71 | ||
| 56 | // TODO: Is "trying again" accurate to hardware? | 72 | // TODO: Is "trying again" accurate to hardware? |
| @@ -58,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 58 | } | 74 | } |
| 59 | } | 75 | } |
| 60 | 76 | ||
| 61 | const Instruction instr = { program_code[state.program_counter] }; | 77 | const Instruction instr = { program_code[program_counter] }; |
| 62 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 78 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |
| 63 | 79 | ||
| 64 | static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, | 80 | static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, |
| 65 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 81 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 66 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 82 | program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 67 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); | 83 | ASSERT(call_stack.size() < call_stack.capacity()); |
| 68 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 84 | call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |
| 69 | }; | 85 | }; |
| 70 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | 86 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); |
| 71 | if (iteration > 0) | 87 | if (iteration > 0) |
| 72 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | 88 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter); |
| 73 | 89 | ||
| 74 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | 90 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter); |
| 75 | 91 | ||
| 76 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 92 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 77 | switch (source_reg.GetRegisterType()) { | 93 | switch (source_reg.GetRegisterType()) { |
| @@ -519,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 519 | case OpCode::Id::JMPC: | 535 | case OpCode::Id::JMPC: |
| 520 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 536 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); |
| 521 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 537 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 522 | state.program_counter = instr.flow_control.dest_offset - 1; | 538 | program_counter = instr.flow_control.dest_offset - 1; |
| 523 | } | 539 | } |
| 524 | break; | 540 | break; |
| 525 | 541 | ||
| @@ -527,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 527 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 543 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 528 | 544 | ||
| 529 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { | 545 | if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { |
| 530 | state.program_counter = instr.flow_control.dest_offset - 1; | 546 | program_counter = instr.flow_control.dest_offset - 1; |
| 531 | } | 547 | } |
| 532 | break; | 548 | break; |
| 533 | 549 | ||
| @@ -535,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 535 | call(state, | 551 | call(state, |
| 536 | instr.flow_control.dest_offset, | 552 | instr.flow_control.dest_offset, |
| 537 | instr.flow_control.num_instructions, | 553 | instr.flow_control.num_instructions, |
| 538 | state.program_counter + 1, 0, 0); | 554 | program_counter + 1, 0, 0); |
| 539 | break; | 555 | break; |
| 540 | 556 | ||
| 541 | case OpCode::Id::CALLU: | 557 | case OpCode::Id::CALLU: |
| @@ -544,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 544 | call(state, | 560 | call(state, |
| 545 | instr.flow_control.dest_offset, | 561 | instr.flow_control.dest_offset, |
| 546 | instr.flow_control.num_instructions, | 562 | instr.flow_control.num_instructions, |
| 547 | state.program_counter + 1, 0, 0); | 563 | program_counter + 1, 0, 0); |
| 548 | } | 564 | } |
| 549 | break; | 565 | break; |
| 550 | 566 | ||
| @@ -554,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 554 | call(state, | 570 | call(state, |
| 555 | instr.flow_control.dest_offset, | 571 | instr.flow_control.dest_offset, |
| 556 | instr.flow_control.num_instructions, | 572 | instr.flow_control.num_instructions, |
| 557 | state.program_counter + 1, 0, 0); | 573 | program_counter + 1, 0, 0); |
| 558 | } | 574 | } |
| 559 | break; | 575 | break; |
| 560 | 576 | ||
| @@ -565,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 565 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 581 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 566 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 582 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 567 | call(state, | 583 | call(state, |
| 568 | state.program_counter + 1, | 584 | program_counter + 1, |
| 569 | instr.flow_control.dest_offset - state.program_counter - 1, | 585 | instr.flow_control.dest_offset - program_counter - 1, |
| 570 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 586 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 571 | } else { | 587 | } else { |
| 572 | call(state, | 588 | call(state, |
| @@ -584,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 584 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | 600 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); |
| 585 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 601 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 586 | call(state, | 602 | call(state, |
| 587 | state.program_counter + 1, | 603 | program_counter + 1, |
| 588 | instr.flow_control.dest_offset - state.program_counter - 1, | 604 | instr.flow_control.dest_offset - program_counter - 1, |
| 589 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 605 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 590 | } else { | 606 | } else { |
| 591 | call(state, | 607 | call(state, |
| @@ -607,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 607 | 623 | ||
| 608 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | 624 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); |
| 609 | call(state, | 625 | call(state, |
| 610 | state.program_counter + 1, | 626 | program_counter + 1, |
| 611 | instr.flow_control.dest_offset - state.program_counter + 1, | 627 | instr.flow_control.dest_offset - program_counter + 1, |
| 612 | instr.flow_control.dest_offset + 1, | 628 | instr.flow_control.dest_offset + 1, |
| 613 | loop_param.x, | 629 | loop_param.x, |
| 614 | loop_param.z); | 630 | loop_param.z); |
| @@ -625,7 +641,7 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 625 | } | 641 | } |
| 626 | } | 642 | } |
| 627 | 643 | ||
| 628 | ++state.program_counter; | 644 | ++program_counter; |
| 629 | ++iteration; | 645 | ++iteration; |
| 630 | } | 646 | } |
| 631 | } | 647 | } |
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 21ae52949..83896814f 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp | |||
| @@ -124,7 +124,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I | |||
| 124 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | 124 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |
| 125 | } else if (vertex_attribute_is_default[i]) { | 125 | } else if (vertex_attribute_is_default[i]) { |
| 126 | // Load the default attribute if we're configured to do so | 126 | // Load the default attribute if we're configured to do so |
| 127 | input.attr[i] = g_state.vs.default_attributes[i]; | 127 | input.attr[i] = g_state.vs_default_attributes[i]; |
| 128 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | 128 | LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", |
| 129 | i, vertex, index, | 129 | i, vertex, index, |
| 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | 130 | input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |