summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules2
-rw-r--r--src/audio_core/hle/source.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_tracing.cpp2
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp2
-rw-r--r--src/common/swap.h68
-rw-r--r--src/core/arm/dyncom/arm_dyncom.cpp2
-rw-r--r--src/core/gdbstub/gdbstub.cpp6
-rw-r--r--src/core/hle/kernel/memory.cpp1
-rw-r--r--src/core/hle/kernel/process.h7
-rw-r--r--src/core/hle/kernel/thread.cpp84
-rw-r--r--src/core/hle/kernel/thread.h4
-rw-r--r--src/core/memory.h6
-rw-r--r--src/video_core/clipper.cpp4
-rw-r--r--src/video_core/command_processor.cpp10
-rw-r--r--src/video_core/pica.cpp2
-rw-r--r--src/video_core/pica.h30
-rw-r--r--src/video_core/pica_state.h2
-rw-r--r--src/video_core/rasterizer.cpp55
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp21
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h218
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp177
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h1
-rw-r--r--src/video_core/shader/shader.cpp24
-rw-r--r--src/video_core/shader/shader.h101
-rw-r--r--src/video_core/shader/shader_interpreter.cpp68
-rw-r--r--src/video_core/vertex_loader.cpp2
27 files changed, 543 insertions, 362 deletions
diff --git a/.gitmodules b/.gitmodules
index 059512902..db0905b3d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -9,4 +9,4 @@
9 url = https://github.com/neobrain/nihstro.git 9 url = https://github.com/neobrain/nihstro.git
10[submodule "soundtouch"] 10[submodule "soundtouch"]
11 path = externals/soundtouch 11 path = externals/soundtouch
12 url = https://github.com/citra-emu/soundtouch.git 12 url = https://github.com/citra-emu/ext-soundtouch.git
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
index daaf6e3f3..30552fe26 100644
--- a/src/audio_core/hle/source.cpp
+++ b/src/audio_core/hle/source.cpp
@@ -126,13 +126,13 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_l
126 if (config.simple_filter_dirty) { 126 if (config.simple_filter_dirty) {
127 config.simple_filter_dirty.Assign(0); 127 config.simple_filter_dirty.Assign(0);
128 state.filters.Configure(config.simple_filter); 128 state.filters.Configure(config.simple_filter);
129 LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update"); 129 LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update", source_id);
130 } 130 }
131 131
132 if (config.biquad_filter_dirty) { 132 if (config.biquad_filter_dirty) {
133 config.biquad_filter_dirty.Assign(0); 133 config.biquad_filter_dirty.Assign(0);
134 state.filters.Configure(config.biquad_filter); 134 state.filters.Configure(config.biquad_filter);
135 LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update"); 135 LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update", source_id);
136 } 136 }
137 137
138 if (config.interpolation_dirty) { 138 if (config.interpolation_dirty) {
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
index 1402f8e79..9c80f7ec9 100644
--- a/src/citra_qt/debugger/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -74,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() {
74 std::array<u32, 4 * 16> default_attributes; 74 std::array<u32, 4 * 16> default_attributes;
75 for (unsigned i = 0; i < 16; ++i) { 75 for (unsigned i = 0; i < 16; ++i) {
76 for (unsigned comp = 0; comp < 3; ++comp) { 76 for (unsigned comp = 0; comp < 3; ++comp) {
77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); 77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32());
78 } 78 }
79 } 79 }
80 80
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index 854f6ff16..391666d35 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
501 info.labels.insert({ entry_point, "main" }); 501 info.labels.insert({ entry_point, "main" });
502 502
503 // Generate debug information 503 // Generate debug information
504 debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); 504 debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup);
505 505
506 // Reload widget state 506 // Reload widget state
507 for (int attr = 0; attr < num_attributes; ++attr) { 507 for (int attr = 0; attr < num_attributes; ++attr) {
diff --git a/src/common/swap.h b/src/common/swap.h
index a7c37bc44..1749bd7a4 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -25,6 +25,8 @@
25 #include <sys/endian.h> 25 #include <sys/endian.h>
26#endif 26#endif
27 27
28#include <cstring>
29
28#include "common/common_types.h" 30#include "common/common_types.h"
29 31
30// GCC 4.6+ 32// GCC 4.6+
@@ -58,9 +60,6 @@
58 60
59namespace Common { 61namespace Common {
60 62
61inline u8 swap8(u8 _data) {return _data;}
62inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];}
63
64#ifdef _MSC_VER 63#ifdef _MSC_VER
65inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} 64inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
66inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} 65inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
@@ -92,52 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3
92#endif 91#endif
93 92
94inline float swapf(float f) { 93inline float swapf(float f) {
95 union { 94 static_assert(sizeof(u32) == sizeof(float),
96 float f; 95 "float must be the same size as uint32_t.");
97 unsigned int u32;
98 } dat1, dat2;
99
100 dat1.f = f;
101 dat2.u32 = swap32(dat1.u32);
102 96
103 return dat2.f; 97 u32 value;
104} 98 std::memcpy(&value, &f, sizeof(u32));
105
106inline double swapd(double f) {
107 union {
108 double f;
109 unsigned long long u64;
110 } dat1, dat2;
111 99
112 dat1.f = f; 100 value = swap32(value);
113 dat2.u64 = swap64(dat1.u64); 101 std::memcpy(&f, &value, sizeof(u32));
114 102
115 return dat2.f; 103 return f;
116} 104}
117 105
118inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} 106inline double swapd(double f) {
119inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} 107 static_assert(sizeof(u64) == sizeof(double),
120inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} 108 "double must be the same size as uint64_t.");
121
122template <int count>
123void swap(u8*);
124 109
125template <> 110 u64 value;
126inline void swap<1>(u8* data) { } 111 std::memcpy(&value, &f, sizeof(u64));
127 112
128template <> 113 value = swap64(value);
129inline void swap<2>(u8* data) { 114 std::memcpy(&f, &value, sizeof(u64));
130 *reinterpret_cast<u16*>(data) = swap16(data);
131}
132
133template <>
134inline void swap<4>(u8* data) {
135 *reinterpret_cast<u32*>(data) = swap32(data);
136}
137 115
138template <> 116 return f;
139inline void swap<8>(u8* data) {
140 *reinterpret_cast<u64*>(data) = swap64(data);
141} 117}
142 118
143} // Namespace Common 119} // Namespace Common
@@ -534,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t<T, F> v) {
534template <typename T> 510template <typename T>
535struct swap_64_t { 511struct swap_64_t {
536 static T swap(T x) { 512 static T swap(T x) {
537 return (T)Common::swap64(*(u64 *)&x); 513 return static_cast<T>(Common::swap64(x));
538 } 514 }
539}; 515};
540 516
541template <typename T> 517template <typename T>
542struct swap_32_t { 518struct swap_32_t {
543 static T swap(T x) { 519 static T swap(T x) {
544 return (T)Common::swap32(*(u32 *)&x); 520 return static_cast<T>(Common::swap32(x));
545 } 521 }
546}; 522};
547 523
548template <typename T> 524template <typename T>
549struct swap_16_t { 525struct swap_16_t {
550 static T swap(T x) { 526 static T swap(T x) {
551 return (T)Common::swap16(*(u16 *)&x); 527 return static_cast<T>(Common::swap16(x));
552 } 528 }
553}; 529};
554 530
555template <typename T> 531template <typename T>
556struct swap_float_t { 532struct swap_float_t {
557 static T swap(T x) { 533 static T swap(T x) {
558 return (T)Common::swapf(*(float *)&x); 534 return static_cast<T>(Common::swapf(x));
559 } 535 }
560}; 536};
561 537
562template <typename T> 538template <typename T>
563struct swap_double_t { 539struct swap_double_t {
564 static T swap(T x) { 540 static T swap(T x) {
565 return (T)Common::swapd(*(double *)&x); 541 return static_cast<T>(Common::swapd(x));
566 } 542 }
567}; 543};
568 544
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp
index a3581132c..13492a08b 100644
--- a/src/core/arm/dyncom/arm_dyncom.cpp
+++ b/src/core/arm/dyncom/arm_dyncom.cpp
@@ -93,7 +93,7 @@ void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 e
93 context.cpu_registers[0] = arg; 93 context.cpu_registers[0] = arg;
94 context.pc = entry_point; 94 context.pc = entry_point;
95 context.sp = stack_top; 95 context.sp = stack_top;
96 context.cpsr = 0x1F | ((entry_point & 1) << 5); // Usermode and THUMB mode 96 context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode
97} 97}
98 98
99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { 99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) {
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 1360ee845..820b19e1a 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -437,7 +437,7 @@ static void HandleSetThread() {
437 * 437 *
438 * @param signal Signal to be sent to client. 438 * @param signal Signal to be sent to client.
439 */ 439 */
440void SendSignal(u32 signal) { 440static void SendSignal(u32 signal) {
441 if (gdbserver_socket == -1) { 441 if (gdbserver_socket == -1) {
442 return; 442 return;
443 } 443 }
@@ -713,7 +713,7 @@ static void Continue() {
713 * @param addr Address of breakpoint. 713 * @param addr Address of breakpoint.
714 * @param len Length of breakpoint. 714 * @param len Length of breakpoint.
715 */ 715 */
716bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { 716static bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) {
717 std::map<u32, Breakpoint>& p = GetBreakpointList(type); 717 std::map<u32, Breakpoint>& p = GetBreakpointList(type);
718 718
719 Breakpoint breakpoint; 719 Breakpoint breakpoint;
@@ -907,7 +907,7 @@ void ToggleServer(bool status) {
907 } 907 }
908} 908}
909 909
910void Init(u16 port) { 910static void Init(u16 port) {
911 if (!g_server_enabled) { 911 if (!g_server_enabled) {
912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution. 912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution.
913 // This way the CPU can still execute normally. 913 // This way the CPU can still execute normally.
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 6f1f0856c..4be20db22 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -108,7 +108,6 @@ struct MemoryArea {
108// We don't declare the IO regions in here since its handled by other means. 108// We don't declare the IO regions in here since its handled by other means.
109static MemoryArea memory_areas[] = { 109static MemoryArea memory_areas[] = {
110 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) 110 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
111 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
112}; 111};
113 112
114} 113}
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index a06afef2b..d781ef32c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -142,8 +142,11 @@ public:
142 142
143 MemoryRegionInfo* memory_region = nullptr; 143 MemoryRegionInfo* memory_region = nullptr;
144 144
145 /// Bitmask of the used TLS slots 145 /// The Thread Local Storage area is allocated as processes create threads,
146 std::bitset<300> used_tls_slots; 146 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
147 /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask.
148 /// This vector will grow as more pages are allocated for new threads.
149 std::vector<std::bitset<8>> tls_slots;
147 150
148 VAddr GetLinearHeapAreaAddress() const; 151 VAddr GetLinearHeapAreaAddress() const;
149 VAddr GetLinearHeapBase() const; 152 VAddr GetLinearHeapBase() const;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6dc95d0f1..68f026918 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,9 +117,10 @@ void Thread::Stop() {
117 } 117 }
118 wait_objects.clear(); 118 wait_objects.clear();
119 119
120 Kernel::g_current_process->used_tls_slots[tls_index] = false; 120 // Mark the TLS slot in the thread's page as free.
121 g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; 121 u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
122 g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE; 122 u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
123 Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
123 124
124 HLE::Reschedule(__func__); 125 HLE::Reschedule(__func__);
125} 126}
@@ -366,6 +367,31 @@ static void DebugThreadQueue() {
366 } 367 }
367} 368}
368 369
370/**
371 * Finds a free location for the TLS section of a thread.
372 * @param tls_slots The TLS page array of the thread's owner process.
373 * Returns a tuple of (page, slot, alloc_needed) where:
374 * page: The index of the first allocated TLS page that has free slots.
375 * slot: The index of the first free slot in the indicated page.
376 * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
377 */
378std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) {
379 // Iterate over all the allocated pages, and try to find one where not all slots are used.
380 for (unsigned page = 0; page < tls_slots.size(); ++page) {
381 const auto& page_tls_slots = tls_slots[page];
382 if (!page_tls_slots.all()) {
383 // We found a page with at least one free slot, find which slot it is
384 for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) {
385 if (!page_tls_slots.test(slot)) {
386 return std::make_tuple(page, slot, false);
387 }
388 }
389 }
390 }
391
392 return std::make_tuple(0, 0, true);
393}
394
369ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, 395ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority,
370 u32 arg, s32 processor_id, VAddr stack_top) { 396 u32 arg, s32 processor_id, VAddr stack_top) {
371 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { 397 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) {
@@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
403 thread->name = std::move(name); 429 thread->name = std::move(name);
404 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); 430 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
405 thread->owner_process = g_current_process; 431 thread->owner_process = g_current_process;
406 thread->tls_index = -1;
407 thread->waitsynch_waited = false; 432 thread->waitsynch_waited = false;
408 433
409 // Find the next available TLS index, and mark it as used 434 // Find the next available TLS index, and mark it as used
410 auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; 435 auto& tls_slots = Kernel::g_current_process->tls_slots;
411 for (unsigned int i = 0; i < used_tls_slots.size(); ++i) { 436 bool needs_allocation = true;
412 if (used_tls_slots[i] == false) { 437 u32 available_page; // Which allocated page has free space
413 thread->tls_index = i; 438 u32 available_slot; // Which slot within the page is free
414 used_tls_slots[i] = true; 439
415 break; 440 std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots);
441
442 if (needs_allocation) {
443 // There are no already-allocated pages with free slots, lets allocate a new one.
444 // TLS pages are allocated from the BASE region in the linear heap.
445 MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE);
446 auto& linheap_memory = memory_region->linear_heap_memory;
447
448 if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) {
449 LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread");
450 return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent);
416 } 451 }
452
453 u32 offset = linheap_memory->size();
454
455 // Allocate some memory from the end of the linear heap for this region.
456 linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0);
457 memory_region->used += Memory::PAGE_SIZE;
458 Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE;
459
460 tls_slots.emplace_back(0); // The page is completely available at the start
461 available_page = tls_slots.size() - 1;
462 available_slot = 0; // Use the first slot in the new page
463
464 auto& vm_manager = Kernel::g_current_process->vm_manager;
465 vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
466
467 // Map the page to the current process' address space.
468 // TODO(Subv): Find the correct MemoryState for this region.
469 vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
470 linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private);
417 } 471 }
418 472
419 ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); 473 // Mark the slot as used
420 g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; 474 tls_slots[available_page].set(available_slot);
421 g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE; 475 thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
422 476
423 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 477 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
424 // to initialize the context 478 // to initialize the context
@@ -509,10 +563,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
509 context.cpu_registers[1] = output; 563 context.cpu_registers[1] = output;
510} 564}
511 565
512VAddr Thread::GetTLSAddress() const {
513 return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
514}
515
516//////////////////////////////////////////////////////////////////////////////////////////////////// 566////////////////////////////////////////////////////////////////////////////////////////////////////
517 567
518void ThreadingInit() { 568void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 97ba57fc5..deab5d5a6 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -127,7 +127,7 @@ public:
127 * Returns the Thread Local Storage address of the current thread 127 * Returns the Thread Local Storage address of the current thread
128 * @returns VAddr of the thread's TLS 128 * @returns VAddr of the thread's TLS
129 */ 129 */
130 VAddr GetTLSAddress() const; 130 VAddr GetTLSAddress() const { return tls_address; }
131 131
132 Core::ThreadContext context; 132 Core::ThreadContext context;
133 133
@@ -144,7 +144,7 @@ public:
144 144
145 s32 processor_id; 145 s32 processor_id;
146 146
147 s32 tls_index; ///< Index of the Thread Local Storage of the thread 147 VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread
148 148
149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait 149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
150 150
diff --git a/src/core/memory.h b/src/core/memory.h
index 9caa3c3f5..126d60471 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -100,15 +100,9 @@ enum : VAddr {
100 SHARED_PAGE_SIZE = 0x00001000, 100 SHARED_PAGE_SIZE = 0x00001000,
101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, 101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
102 102
103 // TODO(yuriks): The size of this area is dynamic, the kernel grows
104 // it as more and more threads are created. For now we'll just use a
105 // hardcoded value.
106 /// Area where TLS (Thread-Local Storage) buffers are allocated. 103 /// Area where TLS (Thread-Local Storage) buffers are allocated.
107 TLS_AREA_VADDR = 0x1FF82000, 104 TLS_AREA_VADDR = 0x1FF82000,
108 TLS_ENTRY_SIZE = 0x200, 105 TLS_ENTRY_SIZE = 0x200,
109 TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
110 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
111
112 106
113 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. 107 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
114 NEW_LINEAR_HEAP_VADDR = 0x30000000, 108 NEW_LINEAR_HEAP_VADDR = 0x30000000,
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 2bc747102..db99ce666 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -75,8 +75,6 @@ static void InitScreenCoordinates(OutputVertex& vtx)
75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); 75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
78 viewport.zscale = float24::FromRaw(regs.viewport_depth_range);
79 viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane);
80 78
81 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 79 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
82 vtx.color *= inv_w; 80 vtx.color *= inv_w;
@@ -89,7 +87,7 @@ static void InitScreenCoordinates(OutputVertex& vtx)
89 87
90 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 88 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
91 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 89 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
92 vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; 90 vtx.screenpos[2] = vtx.pos.z * inv_w;
93} 91}
94 92
95void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { 93void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index dd1379503..ad0da796e 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -128,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
128 128
129 // TODO: Verify that this actually modifies the register! 129 // TODO: Verify that this actually modifies the register!
130 if (setup.index < 15) { 130 if (setup.index < 15) {
131 g_state.vs.default_attributes[setup.index] = attribute; 131 g_state.vs_default_attributes[setup.index] = attribute;
132 setup.index++; 132 setup.index++;
133 } else { 133 } else {
134 // Put each attribute into an immediate input buffer. 134 // Put each attribute into an immediate input buffer.
@@ -144,12 +144,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
144 immediate_attribute_id = 0; 144 immediate_attribute_id = 0;
145 145
146 Shader::UnitState<false> shader_unit; 146 Shader::UnitState<false> shader_unit;
147 Shader::Setup(); 147 g_state.vs.Setup();
148 148
149 // Send to vertex shader 149 // Send to vertex shader
150 if (g_debug_context) 150 if (g_debug_context)
151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input)); 151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
152 Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); 152 Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
153 153
154 // Send to renderer 154 // Send to renderer
155 using Pica::Shader::OutputVertex; 155 using Pica::Shader::OutputVertex;
@@ -237,7 +237,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
237 vertex_cache_ids.fill(-1); 237 vertex_cache_ids.fill(-1);
238 238
239 Shader::UnitState<false> shader_unit; 239 Shader::UnitState<false> shader_unit;
240 Shader::Setup(); 240 g_state.vs.Setup();
241 241
242 for (unsigned int index = 0; index < regs.num_vertices; ++index) 242 for (unsigned int index = 0; index < regs.num_vertices; ++index)
243 { 243 {
@@ -274,7 +274,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
274 // Send to vertex shader 274 // Send to vertex shader
275 if (g_debug_context) 275 if (g_debug_context)
276 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); 276 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
277 output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); 277 output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
278 278
279 if (is_indexed) { 279 if (is_indexed) {
280 vertex_cache[vertex_cache_pos] = output; 280 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index be82cf4b5..ec78f9593 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -500,7 +500,7 @@ void Init() {
500} 500}
501 501
502void Shutdown() { 502void Shutdown() {
503 Shader::Shutdown(); 503 Shader::ClearCache();
504} 504}
505 505
506template <typename T> 506template <typename T>
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 5891fb72a..86c0a0096 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -70,7 +70,7 @@ struct Regs {
70 INSERT_PADDING_WORDS(0x9); 70 INSERT_PADDING_WORDS(0x9);
71 71
72 BitField<0, 24, u32> viewport_depth_range; // float24 72 BitField<0, 24, u32> viewport_depth_range; // float24
73 BitField<0, 24, u32> viewport_depth_far_plane; // float24 73 BitField<0, 24, u32> viewport_depth_near_plane; // float24
74 74
75 BitField<0, 3, u32> vs_output_total; 75 BitField<0, 3, u32> vs_output_total;
76 76
@@ -122,9 +122,31 @@ struct Regs {
122 BitField<16, 10, s32> y; 122 BitField<16, 10, s32> y;
123 } viewport_corner; 123 } viewport_corner;
124 124
125 INSERT_PADDING_WORDS(0x17); 125 INSERT_PADDING_WORDS(0x1);
126
127 //TODO: early depth
128 INSERT_PADDING_WORDS(0x1);
129
130 INSERT_PADDING_WORDS(0x2);
131
132 enum DepthBuffering : u32 {
133 WBuffering = 0,
134 ZBuffering = 1,
135 };
136 BitField< 0, 1, DepthBuffering> depthmap_enable;
137
138 INSERT_PADDING_WORDS(0x12);
126 139
127 struct TextureConfig { 140 struct TextureConfig {
141 enum TextureType : u32 {
142 Texture2D = 0,
143 TextureCube = 1,
144 Shadow2D = 2,
145 Projection2D = 3,
146 ShadowCube = 4,
147 Disabled = 5,
148 };
149
128 enum WrapMode : u32 { 150 enum WrapMode : u32 {
129 ClampToEdge = 0, 151 ClampToEdge = 0,
130 ClampToBorder = 1, 152 ClampToBorder = 1,
@@ -155,6 +177,7 @@ struct Regs {
155 BitField< 2, 1, TextureFilter> min_filter; 177 BitField< 2, 1, TextureFilter> min_filter;
156 BitField< 8, 2, WrapMode> wrap_t; 178 BitField< 8, 2, WrapMode> wrap_t;
157 BitField<12, 2, WrapMode> wrap_s; 179 BitField<12, 2, WrapMode> wrap_s;
180 BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew.
158 }; 181 };
159 182
160 INSERT_PADDING_WORDS(0x1); 183 INSERT_PADDING_WORDS(0x1);
@@ -1279,10 +1302,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40);
1279ASSERT_REG_POSITION(viewport_size_x, 0x41); 1302ASSERT_REG_POSITION(viewport_size_x, 0x41);
1280ASSERT_REG_POSITION(viewport_size_y, 0x43); 1303ASSERT_REG_POSITION(viewport_size_y, 0x43);
1281ASSERT_REG_POSITION(viewport_depth_range, 0x4d); 1304ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
1282ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); 1305ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e);
1283ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); 1306ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
1284ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); 1307ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
1285ASSERT_REG_POSITION(viewport_corner, 0x68); 1308ASSERT_REG_POSITION(viewport_corner, 0x68);
1309ASSERT_REG_POSITION(depthmap_enable, 0x6D);
1286ASSERT_REG_POSITION(texture0_enable, 0x80); 1310ASSERT_REG_POSITION(texture0_enable, 0x80);
1287ASSERT_REG_POSITION(texture0, 0x81); 1311ASSERT_REG_POSITION(texture0, 0x81);
1288ASSERT_REG_POSITION(texture0_format, 0x8e); 1312ASSERT_REG_POSITION(texture0_format, 0x8e);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 1059c6ae4..495174c25 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -25,6 +25,8 @@ struct State {
25 Shader::ShaderSetup vs; 25 Shader::ShaderSetup vs;
26 Shader::ShaderSetup gs; 26 Shader::ShaderSetup gs;
27 27
28 std::array<Math::Vec4<float24>, 16> vs_default_attributes;
29
28 struct { 30 struct {
29 union LutEntry { 31 union LutEntry {
30 // Used for raw access 32 // Used for raw access
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index df67b9081..65168f05a 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -442,8 +442,33 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
442 442
443 DEBUG_ASSERT(0 != texture.config.address); 443 DEBUG_ASSERT(0 != texture.config.address);
444 444
445 int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); 445 float24 u = uv[i].u();
446 int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); 446 float24 v = uv[i].v();
447
448 // Only unit 0 respects the texturing type (according to 3DBrew)
449 // TODO: Refactor so cubemaps and shadowmaps can be handled
450 if (i == 0) {
451 switch(texture.config.type) {
452 case Regs::TextureConfig::Texture2D:
453 break;
454 case Regs::TextureConfig::Projection2D: {
455 auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
456 u /= tc0_w;
457 v /= tc0_w;
458 break;
459 }
460 default:
461 // TODO: Change to LOG_ERROR when more types are handled.
462 LOG_DEBUG(HW_GPU, "Unhandled texture type %x", (int)texture.config.type);
463 UNIMPLEMENTED();
464 break;
465 }
466 }
467
468 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
469 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
470
471
447 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { 472 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
448 switch (mode) { 473 switch (mode) {
449 case Regs::TextureConfig::ClampToEdge: 474 case Regs::TextureConfig::ClampToEdge:
@@ -862,10 +887,30 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
862 } 887 }
863 } 888 }
864 889
890 // interpolated_z = z / w
891 float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
892 v1.screenpos[2].ToFloat32() * w1 +
893 v2.screenpos[2].ToFloat32() * w2) / wsum;
894
895 // Not fully accurate. About 3 bits in precision are missing.
896 // Z-Buffer (z / w * scale + offset)
897 float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
898 float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
899 float depth = interpolated_z_over_w * depth_scale + depth_offset;
900
901 // Potentially switch to W-Buffer
902 if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
903
904 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
905 depth *= interpolated_w_inverse.ToFloat32() * wsum;
906 }
907
908 // Clamp the result
909 depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
910
911 // Convert float to integer
865 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); 912 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
866 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + 913 u32 z = (u32)(depth * ((1 << num_bits) - 1));
867 v1.screenpos[2].ToFloat32() * w1 +
868 v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
869 914
870 if (output_merger.depth_test_enable) { 915 if (output_merger.depth_test_enable) {
871 u32 ref_z = GetDepth(x >> 4, y >> 4); 916 u32 ref_z = GetDepth(x >> 4, y >> 4);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0b471dfd2..ed2e2f3ae 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -76,6 +76,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
78 78
79 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w));
80 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W);
81
79 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); 82 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
80 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
81 84
@@ -256,10 +259,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
256 259
257 // Depth modifiers 260 // Depth modifiers
258 case PICA_REG_INDEX(viewport_depth_range): 261 case PICA_REG_INDEX(viewport_depth_range):
259 case PICA_REG_INDEX(viewport_depth_far_plane): 262 case PICA_REG_INDEX(viewport_depth_near_plane):
260 SyncDepthModifiers(); 263 SyncDepthModifiers();
261 break; 264 break;
262 265
266 // Depth buffering
267 case PICA_REG_INDEX(depthmap_enable):
268 shader_dirty = true;
269 break;
270
263 // Blending 271 // Blending
264 case PICA_REG_INDEX(output_merger.alphablend_enable): 272 case PICA_REG_INDEX(output_merger.alphablend_enable):
265 SyncBlendEnabled(); 273 SyncBlendEnabled();
@@ -314,6 +322,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
314 SyncLogicOp(); 322 SyncLogicOp();
315 break; 323 break;
316 324
325 // Texture 0 type
326 case PICA_REG_INDEX(texture0.type):
327 shader_dirty = true;
328 break;
329
317 // TEV stages 330 // TEV stages
318 case PICA_REG_INDEX(tev_stage0.color_source1): 331 case PICA_REG_INDEX(tev_stage0.color_source1):
319 case PICA_REG_INDEX(tev_stage0.color_modifier1): 332 case PICA_REG_INDEX(tev_stage0.color_modifier1):
@@ -910,10 +923,10 @@ void RasterizerOpenGL::SyncCullMode() {
910} 923}
911 924
912void RasterizerOpenGL::SyncDepthModifiers() { 925void RasterizerOpenGL::SyncDepthModifiers() {
913 float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 926 float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
914 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; 927 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
915 928
916 // TODO: Implement scale modifier 929 uniform_block_data.data.depth_scale = depth_scale;
917 uniform_block_data.data.depth_offset = depth_offset; 930 uniform_block_data.data.depth_offset = depth_offset;
918 uniform_block_data.dirty = true; 931 uniform_block_data.dirty = true;
919} 932}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 82fa61742..eed00011a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -39,140 +39,185 @@ struct ScreenInfo;
39 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where 39 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) 40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
41 * two separate shaders sharing the same key. 41 * two separate shaders sharing the same key.
42 *
43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X."
44 * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X."
45 * = Bytewise copy instead of memberwise copy.
46 * This is important because the padding bytes are included in the hash and comparison between objects.
42 */ 47 */
43struct PicaShaderConfig { 48union PicaShaderConfig {
49
44 /// Construct a PicaShaderConfig with the current Pica register configuration. 50 /// Construct a PicaShaderConfig with the current Pica register configuration.
45 static PicaShaderConfig CurrentConfig() { 51 static PicaShaderConfig CurrentConfig() {
46 PicaShaderConfig res; 52 PicaShaderConfig res;
53
54 auto& state = res.state;
55 std::memset(&state, 0, sizeof(PicaShaderConfig::State));
56
47 const auto& regs = Pica::g_state.regs; 57 const auto& regs = Pica::g_state.regs;
48 58
49 res.alpha_test_func = regs.output_merger.alpha_test.enable ? 59 state.depthmap_enable = regs.depthmap_enable;
60
61 state.alpha_test_func = regs.output_merger.alpha_test.enable ?
50 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; 62 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always;
51 63
64 state.texture0_type = regs.texture0.type;
65
52 // Copy relevant tev stages fields. 66 // Copy relevant tev stages fields.
53 // We don't sync const_color here because of the high variance, it is a 67 // We don't sync const_color here because of the high variance, it is a
54 // shader uniform instead. 68 // shader uniform instead.
55 const auto& tev_stages = regs.GetTevStages(); 69 const auto& tev_stages = regs.GetTevStages();
56 DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size()); 70 DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
57 for (size_t i = 0; i < tev_stages.size(); i++) { 71 for (size_t i = 0; i < tev_stages.size(); i++) {
58 const auto& tev_stage = tev_stages[i]; 72 const auto& tev_stage = tev_stages[i];
59 res.tev_stages[i].sources_raw = tev_stage.sources_raw; 73 state.tev_stages[i].sources_raw = tev_stage.sources_raw;
60 res.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; 74 state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
61 res.tev_stages[i].ops_raw = tev_stage.ops_raw; 75 state.tev_stages[i].ops_raw = tev_stage.ops_raw;
62 res.tev_stages[i].scales_raw = tev_stage.scales_raw; 76 state.tev_stages[i].scales_raw = tev_stage.scales_raw;
63 } 77 }
64 78
65 res.combiner_buffer_input = 79 state.combiner_buffer_input =
66 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 80 regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
67 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; 81 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
68 82
69 // Fragment lighting 83 // Fragment lighting
70 84
71 res.lighting.enable = !regs.lighting.disable; 85 state.lighting.enable = !regs.lighting.disable;
72 res.lighting.src_num = regs.lighting.num_lights + 1; 86 state.lighting.src_num = regs.lighting.num_lights + 1;
73 87
74 for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { 88 for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
75 unsigned num = regs.lighting.light_enable.GetNum(light_index); 89 unsigned num = regs.lighting.light_enable.GetNum(light_index);
76 const auto& light = regs.lighting.light[num]; 90 const auto& light = regs.lighting.light[num];
77 res.lighting.light[light_index].num = num; 91 state.lighting.light[light_index].num = num;
78 res.lighting.light[light_index].directional = light.directional != 0; 92 state.lighting.light[light_index].directional = light.directional != 0;
79 res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; 93 state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
80 res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); 94 state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
81 res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); 95 state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
82 res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); 96 state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
83 } 97 }
84 98
85 res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; 99 state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
86 res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; 100 state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
87 res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); 101 state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
88 res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); 102 state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
89 103
90 res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; 104 state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
91 res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; 105 state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
92 res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); 106 state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
93 res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); 107 state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
94 108
95 res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; 109 state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
96 res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; 110 state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
97 res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); 111 state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
98 res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); 112 state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
99 113
100 res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; 114 state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
101 res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; 115 state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
102 res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); 116 state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
103 res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); 117 state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
104 118
105 res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; 119 state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
106 res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; 120 state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
107 res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); 121 state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
108 res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); 122 state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
109 123
110 res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; 124 state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
111 res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; 125 state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
112 res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); 126 state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
113 res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); 127 state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
114 128
115 res.lighting.config = regs.lighting.config; 129 state.lighting.config = regs.lighting.config;
116 res.lighting.fresnel_selector = regs.lighting.fresnel_selector; 130 state.lighting.fresnel_selector = regs.lighting.fresnel_selector;
117 res.lighting.bump_mode = regs.lighting.bump_mode; 131 state.lighting.bump_mode = regs.lighting.bump_mode;
118 res.lighting.bump_selector = regs.lighting.bump_selector; 132 state.lighting.bump_selector = regs.lighting.bump_selector;
119 res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; 133 state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
120 res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; 134 state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
121 135
122 return res; 136 return res;
123 } 137 }
124 138
125 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { 139 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
126 return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); 140 return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
127 } 141 }
128 142
129 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { 143 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
130 return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); 144 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
131 } 145 }
132 146
133 bool operator ==(const PicaShaderConfig& o) const { 147 bool operator ==(const PicaShaderConfig& o) const {
134 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; 148 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
135 }; 149 };
136 150
137 Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; 151 // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC.
138 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; 152 // This makes BitField not TC when used in a union or struct so we have to resort
139 u8 combiner_buffer_input = 0; 153 // to this ugly hack.
154 // Once that bug is fixed we can use Pica::Regs::TevStageConfig here.
155 // Doesn't include const_color because we don't sync it, see comment in CurrentConfig()
156 struct TevStageConfigRaw {
157 u32 sources_raw;
158 u32 modifiers_raw;
159 u32 ops_raw;
160 u32 scales_raw;
161 explicit operator Pica::Regs::TevStageConfig() const noexcept {
162 Pica::Regs::TevStageConfig stage;
163 stage.sources_raw = sources_raw;
164 stage.modifiers_raw = modifiers_raw;
165 stage.ops_raw = ops_raw;
166 stage.const_color = 0;
167 stage.scales_raw = scales_raw;
168 return stage;
169 }
170 };
140 171
141 struct { 172 struct State {
142 struct { 173
143 unsigned num = 0; 174 Pica::Regs::CompareFunc alpha_test_func;
144 bool directional = false; 175 Pica::Regs::TextureConfig::TextureType texture0_type;
145 bool two_sided_diffuse = false; 176 std::array<TevStageConfigRaw, 6> tev_stages;
146 bool dist_atten_enable = false; 177 u8 combiner_buffer_input;
147 GLfloat dist_atten_scale = 0.0f; 178
148 GLfloat dist_atten_bias = 0.0f; 179 Pica::Regs::DepthBuffering depthmap_enable;
149 } light[8];
150
151 bool enable = false;
152 unsigned src_num = 0;
153 Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
154 unsigned bump_selector = 0;
155 bool bump_renorm = false;
156 bool clamp_highlights = false;
157
158 Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
159 Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
160 180
161 struct { 181 struct {
162 bool enable = false; 182 struct {
163 bool abs_input = false; 183 unsigned num;
164 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; 184 bool directional;
165 float scale = 1.0f; 185 bool two_sided_diffuse;
166 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; 186 bool dist_atten_enable;
167 } lighting; 187 GLfloat dist_atten_scale;
188 GLfloat dist_atten_bias;
189 } light[8];
190
191 bool enable;
192 unsigned src_num;
193 Pica::Regs::LightingBumpMode bump_mode;
194 unsigned bump_selector;
195 bool bump_renorm;
196 bool clamp_highlights;
197
198 Pica::Regs::LightingConfig config;
199 Pica::Regs::LightingFresnelSelector fresnel_selector;
200
201 struct {
202 bool enable;
203 bool abs_input;
204 Pica::Regs::LightingLutInput type;
205 float scale;
206 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
207 } lighting;
208
209 } state;
168}; 210};
211#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
212static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable");
213#endif
169 214
170namespace std { 215namespace std {
171 216
172template <> 217template <>
173struct hash<PicaShaderConfig> { 218struct hash<PicaShaderConfig> {
174 size_t operator()(const PicaShaderConfig& k) const { 219 size_t operator()(const PicaShaderConfig& k) const {
175 return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); 220 return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State));
176 } 221 }
177}; 222};
178 223
@@ -239,6 +284,7 @@ private:
239 tex_coord1[1] = v.tc1.y.ToFloat32(); 284 tex_coord1[1] = v.tc1.y.ToFloat32();
240 tex_coord2[0] = v.tc2.x.ToFloat32(); 285 tex_coord2[0] = v.tc2.x.ToFloat32();
241 tex_coord2[1] = v.tc2.y.ToFloat32(); 286 tex_coord2[1] = v.tc2.y.ToFloat32();
287 tex_coord0_w = v.tc0_w.ToFloat32();
242 normquat[0] = v.quat.x.ToFloat32(); 288 normquat[0] = v.quat.x.ToFloat32();
243 normquat[1] = v.quat.y.ToFloat32(); 289 normquat[1] = v.quat.y.ToFloat32();
244 normquat[2] = v.quat.z.ToFloat32(); 290 normquat[2] = v.quat.z.ToFloat32();
@@ -259,6 +305,7 @@ private:
259 GLfloat tex_coord0[2]; 305 GLfloat tex_coord0[2];
260 GLfloat tex_coord1[2]; 306 GLfloat tex_coord1[2];
261 GLfloat tex_coord2[2]; 307 GLfloat tex_coord2[2];
308 GLfloat tex_coord0_w;
262 GLfloat normquat[4]; 309 GLfloat normquat[4];
263 GLfloat view[3]; 310 GLfloat view[3];
264 }; 311 };
@@ -277,6 +324,7 @@ private:
277 GLvec4 const_color[6]; 324 GLvec4 const_color[6];
278 GLvec4 tev_combiner_buffer_color; 325 GLvec4 tev_combiner_buffer_color;
279 GLint alphatest_ref; 326 GLint alphatest_ref;
327 GLfloat depth_scale;
280 GLfloat depth_offset; 328 GLfloat depth_offset;
281 alignas(16) GLvec3 lighting_global_ambient; 329 alignas(16) GLvec3 lighting_global_ambient;
282 LightSrc light_src[8]; 330 LightSrc light_src[8];
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9011caa39..71d60e69c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -32,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
32} 32}
33 33
34/// Writes the specified TEV stage source component(s) 34/// Writes the specified TEV stage source component(s)
35static void AppendSource(std::string& out, TevStageConfig::Source source, 35static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source,
36 const std::string& index_name) { 36 const std::string& index_name) {
37 const auto& state = config.state;
37 using Source = TevStageConfig::Source; 38 using Source = TevStageConfig::Source;
38 switch (source) { 39 switch (source) {
39 case Source::PrimaryColor: 40 case Source::PrimaryColor:
@@ -46,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
46 out += "secondary_fragment_color"; 47 out += "secondary_fragment_color";
47 break; 48 break;
48 case Source::Texture0: 49 case Source::Texture0:
49 out += "texture(tex[0], texcoord[0])"; 50 // Only unit 0 respects the texturing type (according to 3DBrew)
51 switch(state.texture0_type) {
52 case Pica::Regs::TextureConfig::Texture2D:
53 out += "texture(tex[0], texcoord[0])";
54 break;
55 case Pica::Regs::TextureConfig::Projection2D:
56 out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))";
57 break;
58 default:
59 out += "texture(tex[0], texcoord[0])";
60 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type));
61 UNIMPLEMENTED();
62 break;
63 }
50 break; 64 break;
51 case Source::Texture1: 65 case Source::Texture1:
52 out += "texture(tex[1], texcoord[1])"; 66 out += "texture(tex[1], texcoord[1])";
@@ -71,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
71} 85}
72 86
73/// Writes the color components to use for the specified TEV stage color modifier 87/// Writes the color components to use for the specified TEV stage color modifier
74static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, 88static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier,
75 TevStageConfig::Source source, const std::string& index_name) { 89 TevStageConfig::Source source, const std::string& index_name) {
76 using ColorModifier = TevStageConfig::ColorModifier; 90 using ColorModifier = TevStageConfig::ColorModifier;
77 switch (modifier) { 91 switch (modifier) {
78 case ColorModifier::SourceColor: 92 case ColorModifier::SourceColor:
79 AppendSource(out, source, index_name); 93 AppendSource(out, config, source, index_name);
80 out += ".rgb"; 94 out += ".rgb";
81 break; 95 break;
82 case ColorModifier::OneMinusSourceColor: 96 case ColorModifier::OneMinusSourceColor:
83 out += "vec3(1.0) - "; 97 out += "vec3(1.0) - ";
84 AppendSource(out, source, index_name); 98 AppendSource(out, config, source, index_name);
85 out += ".rgb"; 99 out += ".rgb";
86 break; 100 break;
87 case ColorModifier::SourceAlpha: 101 case ColorModifier::SourceAlpha:
88 AppendSource(out, source, index_name); 102 AppendSource(out, config, source, index_name);
89 out += ".aaa"; 103 out += ".aaa";
90 break; 104 break;
91 case ColorModifier::OneMinusSourceAlpha: 105 case ColorModifier::OneMinusSourceAlpha:
92 out += "vec3(1.0) - "; 106 out += "vec3(1.0) - ";
93 AppendSource(out, source, index_name); 107 AppendSource(out, config, source, index_name);
94 out += ".aaa"; 108 out += ".aaa";
95 break; 109 break;
96 case ColorModifier::SourceRed: 110 case ColorModifier::SourceRed:
97 AppendSource(out, source, index_name); 111 AppendSource(out, config, source, index_name);
98 out += ".rrr"; 112 out += ".rrr";
99 break; 113 break;
100 case ColorModifier::OneMinusSourceRed: 114 case ColorModifier::OneMinusSourceRed:
101 out += "vec3(1.0) - "; 115 out += "vec3(1.0) - ";
102 AppendSource(out, source, index_name); 116 AppendSource(out, config, source, index_name);
103 out += ".rrr"; 117 out += ".rrr";
104 break; 118 break;
105 case ColorModifier::SourceGreen: 119 case ColorModifier::SourceGreen:
106 AppendSource(out, source, index_name); 120 AppendSource(out, config, source, index_name);
107 out += ".ggg"; 121 out += ".ggg";
108 break; 122 break;
109 case ColorModifier::OneMinusSourceGreen: 123 case ColorModifier::OneMinusSourceGreen:
110 out += "vec3(1.0) - "; 124 out += "vec3(1.0) - ";
111 AppendSource(out, source, index_name); 125 AppendSource(out, config, source, index_name);
112 out += ".ggg"; 126 out += ".ggg";
113 break; 127 break;
114 case ColorModifier::SourceBlue: 128 case ColorModifier::SourceBlue:
115 AppendSource(out, source, index_name); 129 AppendSource(out, config, source, index_name);
116 out += ".bbb"; 130 out += ".bbb";
117 break; 131 break;
118 case ColorModifier::OneMinusSourceBlue: 132 case ColorModifier::OneMinusSourceBlue:
119 out += "vec3(1.0) - "; 133 out += "vec3(1.0) - ";
120 AppendSource(out, source, index_name); 134 AppendSource(out, config, source, index_name);
121 out += ".bbb"; 135 out += ".bbb";
122 break; 136 break;
123 default: 137 default:
@@ -128,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier
128} 142}
129 143
130/// Writes the alpha component to use for the specified TEV stage alpha modifier 144/// Writes the alpha component to use for the specified TEV stage alpha modifier
131static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, 145static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier,
132 TevStageConfig::Source source, const std::string& index_name) { 146 TevStageConfig::Source source, const std::string& index_name) {
133 using AlphaModifier = TevStageConfig::AlphaModifier; 147 using AlphaModifier = TevStageConfig::AlphaModifier;
134 switch (modifier) { 148 switch (modifier) {
135 case AlphaModifier::SourceAlpha: 149 case AlphaModifier::SourceAlpha:
136 AppendSource(out, source, index_name); 150 AppendSource(out, config, source, index_name);
137 out += ".a"; 151 out += ".a";
138 break; 152 break;
139 case AlphaModifier::OneMinusSourceAlpha: 153 case AlphaModifier::OneMinusSourceAlpha:
140 out += "1.0 - "; 154 out += "1.0 - ";
141 AppendSource(out, source, index_name); 155 AppendSource(out, config, source, index_name);
142 out += ".a"; 156 out += ".a";
143 break; 157 break;
144 case AlphaModifier::SourceRed: 158 case AlphaModifier::SourceRed:
145 AppendSource(out, source, index_name); 159 AppendSource(out, config, source, index_name);
146 out += ".r"; 160 out += ".r";
147 break; 161 break;
148 case AlphaModifier::OneMinusSourceRed: 162 case AlphaModifier::OneMinusSourceRed:
149 out += "1.0 - "; 163 out += "1.0 - ";
150 AppendSource(out, source, index_name); 164 AppendSource(out, config, source, index_name);
151 out += ".r"; 165 out += ".r";
152 break; 166 break;
153 case AlphaModifier::SourceGreen: 167 case AlphaModifier::SourceGreen:
154 AppendSource(out, source, index_name); 168 AppendSource(out, config, source, index_name);
155 out += ".g"; 169 out += ".g";
156 break; 170 break;
157 case AlphaModifier::OneMinusSourceGreen: 171 case AlphaModifier::OneMinusSourceGreen:
158 out += "1.0 - "; 172 out += "1.0 - ";
159 AppendSource(out, source, index_name); 173 AppendSource(out, config, source, index_name);
160 out += ".g"; 174 out += ".g";
161 break; 175 break;
162 case AlphaModifier::SourceBlue: 176 case AlphaModifier::SourceBlue:
163 AppendSource(out, source, index_name); 177 AppendSource(out, config, source, index_name);
164 out += ".b"; 178 out += ".b";
165 break; 179 break;
166 case AlphaModifier::OneMinusSourceBlue: 180 case AlphaModifier::OneMinusSourceBlue:
167 out += "1.0 - "; 181 out += "1.0 - ";
168 AppendSource(out, source, index_name); 182 AppendSource(out, config, source, index_name);
169 out += ".b"; 183 out += ".b";
170 break; 184 break;
171 default: 185 default:
@@ -287,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
287 301
288/// Writes the code to emulate the specified TEV stage 302/// Writes the code to emulate the specified TEV stage
289static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { 303static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
290 auto& stage = config.tev_stages[index]; 304 const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]);
291 if (!IsPassThroughTevStage(stage)) { 305 if (!IsPassThroughTevStage(stage)) {
292 std::string index_name = std::to_string(index); 306 std::string index_name = std::to_string(index);
293 307
294 out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; 308 out += "vec3 color_results_" + index_name + "[3] = vec3[3](";
295 AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); 309 AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
296 out += ", "; 310 out += ", ";
297 AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); 311 AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
298 out += ", "; 312 out += ", ";
299 AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); 313 AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
300 out += ");\n"; 314 out += ");\n";
301 315
302 out += "vec3 color_output_" + index_name + " = "; 316 out += "vec3 color_output_" + index_name + " = ";
@@ -304,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
304 out += ";\n"; 318 out += ";\n";
305 319
306 out += "float alpha_results_" + index_name + "[3] = float[3]("; 320 out += "float alpha_results_" + index_name + "[3] = float[3](";
307 AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); 321 AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
308 out += ", "; 322 out += ", ";
309 AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); 323 AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
310 out += ", "; 324 out += ", ";
311 AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); 325 AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
312 out += ");\n"; 326 out += ");\n";
313 327
314 out += "float alpha_output_" + index_name + " = "; 328 out += "float alpha_output_" + index_name + " = ";
@@ -331,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
331 345
332/// Writes the code to emulate fragment lighting 346/// Writes the code to emulate fragment lighting
333static void WriteLighting(std::string& out, const PicaShaderConfig& config) { 347static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
348 const auto& lighting = config.state.lighting;
349
334 // Define lighting globals 350 // Define lighting globals
335 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 351 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
336 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 352 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
@@ -338,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
338 "vec3 refl_value = vec3(0.0);\n"; 354 "vec3 refl_value = vec3(0.0);\n";
339 355
340 // Compute fragment normals 356 // Compute fragment normals
341 if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { 357 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
342 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture 358 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
343 std::string bump_selector = std::to_string(config.lighting.bump_selector); 359 std::string bump_selector = std::to_string(lighting.bump_selector);
344 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; 360 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
345 361
346 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result 362 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
347 if (config.lighting.bump_renorm) { 363 if (lighting.bump_renorm) {
348 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; 364 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
349 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; 365 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
350 } 366 }
351 } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { 367 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
352 // Bump mapping is enabled using a tangent map 368 // Bump mapping is enabled using a tangent map
353 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); 369 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
354 UNIMPLEMENTED(); 370 UNIMPLEMENTED();
@@ -361,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
361 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; 377 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
362 378
363 // Gets the index into the specified lookup table for specular lighting 379 // Gets the index into the specified lookup table for specular lighting
364 auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { 380 auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) {
365 const std::string half_angle = "normalize(normalize(view) + light_vector)"; 381 const std::string half_angle = "normalize(normalize(view) + light_vector)";
366 std::string index; 382 std::string index;
367 switch (input) { 383 switch (input) {
@@ -389,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
389 405
390 if (abs) { 406 if (abs) {
391 // LUT index is in the range of (0.0, 1.0) 407 // LUT index is in the range of (0.0, 1.0)
392 index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; 408 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
393 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; 409 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
394 } else { 410 } else {
395 // LUT index is in the range of (-1.0, 1.0) 411 // LUT index is in the range of (-1.0, 1.0)
@@ -407,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
407 }; 423 };
408 424
409 // Write the code to emulate each enabled light 425 // Write the code to emulate each enabled light
410 for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { 426 for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) {
411 const auto& light_config = config.lighting.light[light_index]; 427 const auto& light_config = lighting.light[light_index];
412 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; 428 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
413 429
414 // Compute light vector (directional or positional) 430 // Compute light vector (directional or positional)
@@ -432,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
432 } 448 }
433 449
434 // If enabled, clamp specular component if lighting result is negative 450 // If enabled, clamp specular component if lighting result is negative
435 std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; 451 std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
436 452
437 // Specular 0 component 453 // Specular 0 component
438 std::string d0_lut_value = "1.0"; 454 std::string d0_lut_value = "1.0";
439 if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { 455 if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
440 // Lookup specular "distribution 0" LUT value 456 // Lookup specular "distribution 0" LUT value
441 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); 457 std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
442 d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; 458 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
443 } 459 }
444 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; 460 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
445 461
446 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used 462 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
447 if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { 463 if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
448 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); 464 std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
449 std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; 465 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
450 out += "refl_value.r = " + value + ";\n"; 466 out += "refl_value.r = " + value + ";\n";
451 } else { 467 } else {
452 out += "refl_value.r = 1.0;\n"; 468 out += "refl_value.r = 1.0;\n";
453 } 469 }
454 470
455 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used 471 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
456 if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { 472 if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
457 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); 473 std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
458 std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; 474 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
459 out += "refl_value.g = " + value + ";\n"; 475 out += "refl_value.g = " + value + ";\n";
460 } else { 476 } else {
461 out += "refl_value.g = refl_value.r;\n"; 477 out += "refl_value.g = refl_value.r;\n";
462 } 478 }
463 479
464 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used 480 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
465 if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { 481 if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
466 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); 482 std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
467 std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; 483 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
468 out += "refl_value.b = " + value + ";\n"; 484 out += "refl_value.b = " + value + ";\n";
469 } else { 485 } else {
470 out += "refl_value.b = refl_value.r;\n"; 486 out += "refl_value.b = refl_value.r;\n";
@@ -472,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
472 488
473 // Specular 1 component 489 // Specular 1 component
474 std::string d1_lut_value = "1.0"; 490 std::string d1_lut_value = "1.0";
475 if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { 491 if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
476 // Lookup specular "distribution 1" LUT value 492 // Lookup specular "distribution 1" LUT value
477 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); 493 std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
478 d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; 494 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
479 } 495 }
480 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; 496 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
481 497
482 // Fresnel 498 // Fresnel
483 if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { 499 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
484 // Lookup fresnel LUT value 500 // Lookup fresnel LUT value
485 std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); 501 std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
486 std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; 502 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
487 503
488 // Enabled for difffuse lighting alpha component 504 // Enabled for difffuse lighting alpha component
489 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || 505 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
490 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 506 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
491 out += "diffuse_sum.a *= " + value + ";\n"; 507 out += "diffuse_sum.a *= " + value + ";\n";
492 508
493 // Enabled for the specular lighting alpha component 509 // Enabled for the specular lighting alpha component
494 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || 510 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
495 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 511 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
496 out += "specular_sum.a *= " + value + ";\n"; 512 out += "specular_sum.a *= " + value + ";\n";
497 } 513 }
498 514
@@ -510,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
510} 526}
511 527
512std::string GenerateFragmentShader(const PicaShaderConfig& config) { 528std::string GenerateFragmentShader(const PicaShaderConfig& config) {
529 const auto& state = config.state;
530
513 std::string out = R"( 531 std::string out = R"(
514#version 330 core 532#version 330 core
515#define NUM_TEV_STAGES 6 533#define NUM_TEV_STAGES 6
@@ -519,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
519 537
520in vec4 primary_color; 538in vec4 primary_color;
521in vec2 texcoord[3]; 539in vec2 texcoord[3];
540in float texcoord0_w;
522in vec4 normquat; 541in vec4 normquat;
523in vec3 view; 542in vec3 view;
524 543
@@ -536,6 +555,7 @@ layout (std140) uniform shader_data {
536 vec4 const_color[NUM_TEV_STAGES]; 555 vec4 const_color[NUM_TEV_STAGES];
537 vec4 tev_combiner_buffer_color; 556 vec4 tev_combiner_buffer_color;
538 int alphatest_ref; 557 int alphatest_ref;
558 float depth_scale;
539 float depth_offset; 559 float depth_offset;
540 vec3 lighting_global_ambient; 560 vec3 lighting_global_ambient;
541 LightSrc light_src[NUM_LIGHTS]; 561 LightSrc light_src[NUM_LIGHTS];
@@ -555,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0);
555)"; 575)";
556 576
557 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test 577 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
558 if (config.alpha_test_func == Regs::CompareFunc::Never) { 578 if (state.alpha_test_func == Regs::CompareFunc::Never) {
559 out += "discard; }"; 579 out += "discard; }";
560 return out; 580 return out;
561 } 581 }
562 582
563 if (config.lighting.enable) 583 if (state.lighting.enable)
564 WriteLighting(out, config); 584 WriteLighting(out, config);
565 585
566 out += "vec4 combiner_buffer = vec4(0.0);\n"; 586 out += "vec4 combiner_buffer = vec4(0.0);\n";
567 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; 587 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
568 out += "vec4 last_tex_env_out = vec4(0.0);\n"; 588 out += "vec4 last_tex_env_out = vec4(0.0);\n";
569 589
570 for (size_t index = 0; index < config.tev_stages.size(); ++index) 590 for (size_t index = 0; index < state.tev_stages.size(); ++index)
571 WriteTevStage(out, config, (unsigned)index); 591 WriteTevStage(out, config, (unsigned)index);
572 592
573 if (config.alpha_test_func != Regs::CompareFunc::Always) { 593 if (state.alpha_test_func != Regs::CompareFunc::Always) {
574 out += "if ("; 594 out += "if (";
575 AppendAlphaTestCondition(out, config.alpha_test_func); 595 AppendAlphaTestCondition(out, state.alpha_test_func);
576 out += ") discard;\n"; 596 out += ") discard;\n";
577 } 597 }
578 598
579 out += "color = last_tex_env_out;\n"; 599 out += "color = last_tex_env_out;\n";
580 out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; 600
601 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
602 out += "float depth = z_over_w * depth_scale + depth_offset;\n";
603 if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
604 out += "depth /= gl_FragCoord.w;\n";
605 }
606 out += "gl_FragDepth = depth;\n";
607
608 out += "}";
581 609
582 return out; 610 return out;
583} 611}
@@ -585,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0);
585std::string GenerateVertexShader() { 613std::string GenerateVertexShader() {
586 std::string out = "#version 330 core\n"; 614 std::string out = "#version 330 core\n";
587 615
588 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 616 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
589 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 617 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
590 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 618 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
591 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 619 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
592 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 620 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
593 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; 621 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n";
594 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; 622 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
623 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
595 624
596 out += R"( 625 out += R"(
597out vec4 primary_color; 626out vec4 primary_color;
598out vec2 texcoord[3]; 627out vec2 texcoord[3];
628out float texcoord0_w;
599out vec4 normquat; 629out vec4 normquat;
600out vec3 view; 630out vec3 view;
601 631
@@ -604,6 +634,7 @@ void main() {
604 texcoord[0] = vert_texcoord0; 634 texcoord[0] = vert_texcoord0;
605 texcoord[1] = vert_texcoord1; 635 texcoord[1] = vert_texcoord1;
606 texcoord[2] = vert_texcoord2; 636 texcoord[2] = vert_texcoord2;
637 texcoord0_w = vert_texcoord0_w;
607 normquat = vert_normquat; 638 normquat = vert_normquat;
608 view = vert_view; 639 view = vert_view;
609 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); 640 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3eb07d57a..bef3249cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -6,7 +6,7 @@
6 6
7#include <string> 7#include <string>
8 8
9struct PicaShaderConfig; 9union PicaShaderConfig;
10 10
11namespace GLShader { 11namespace GLShader {
12 12
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 097242f6f..f59912f79 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,7 @@ enum Attributes {
14 ATTRIBUTE_TEXCOORD0, 14 ATTRIBUTE_TEXCOORD0,
15 ATTRIBUTE_TEXCOORD1, 15 ATTRIBUTE_TEXCOORD1,
16 ATTRIBUTE_TEXCOORD2, 16 ATTRIBUTE_TEXCOORD2,
17 ATTRIBUTE_TEXCOORD0_W,
17 ATTRIBUTE_NORMQUAT, 18 ATTRIBUTE_NORMQUAT,
18 ATTRIBUTE_VIEW, 19 ATTRIBUTE_VIEW,
19}; 20};
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 65dcc9156..e93a9d92a 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -35,7 +35,13 @@ static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
35static const JitShader* jit_shader; 35static const JitShader* jit_shader;
36#endif // ARCHITECTURE_x86_64 36#endif // ARCHITECTURE_x86_64
37 37
38void Setup() { 38void ClearCache() {
39#ifdef ARCHITECTURE_x86_64
40 shader_map.clear();
41#endif // ARCHITECTURE_x86_64
42}
43
44void ShaderSetup::Setup() {
39#ifdef ARCHITECTURE_x86_64 45#ifdef ARCHITECTURE_x86_64
40 if (VideoCore::g_shader_jit_enabled) { 46 if (VideoCore::g_shader_jit_enabled) {
41 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 47 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
@@ -54,20 +60,13 @@ void Setup() {
54#endif // ARCHITECTURE_x86_64 60#endif // ARCHITECTURE_x86_64
55} 61}
56 62
57void Shutdown() { 63MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
58#ifdef ARCHITECTURE_x86_64
59 shader_map.clear();
60#endif // ARCHITECTURE_x86_64
61}
62
63MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
64 64
65OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
66 auto& config = g_state.regs.vs; 66 auto& config = g_state.regs.vs;
67 67
68 MICROPROFILE_SCOPE(GPU_VertexShader); 68 MICROPROFILE_SCOPE(GPU_Shader);
69 69
70 state.program_counter = config.main_offset;
71 state.debug.max_offset = 0; 70 state.debug.max_offset = 0;
72 state.debug.max_opdesc_id = 0; 71 state.debug.max_opdesc_id = 0;
73 72
@@ -140,10 +139,9 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
140 return ret; 139 return ret;
141} 140}
142 141
143DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 142DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
144 UnitState<true> state; 143 UnitState<true> state;
145 144
146 state.program_counter = config.main_offset;
147 state.debug.max_offset = 0; 145 state.debug.max_offset = 0;
148 state.debug.max_opdesc_id = 0; 146 state.debug.max_opdesc_id = 0;
149 147
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 56b83bfeb..983e4a967 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -43,7 +43,8 @@ struct OutputVertex {
43 Math::Vec4<float24> color; 43 Math::Vec4<float24> color;
44 Math::Vec2<float24> tc0; 44 Math::Vec2<float24> tc0;
45 Math::Vec2<float24> tc1; 45 Math::Vec2<float24> tc1;
46 INSERT_PADDING_WORDS(2); 46 float24 tc0_w;
47 INSERT_PADDING_WORDS(1);
47 Math::Vec3<float24> view; 48 Math::Vec3<float24> view;
48 INSERT_PADDING_WORDS(1); 49 INSERT_PADDING_WORDS(1);
49 Math::Vec2<float24> tc2; 50 Math::Vec2<float24> tc2;
@@ -83,23 +84,6 @@ struct OutputVertex {
83static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 84static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
84static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 85static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
85 86
86/// Vertex shader memory
87struct ShaderSetup {
88 struct {
89 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
90 // therefore required to be 16-byte aligned.
91 alignas(16) Math::Vec4<float24> f[96];
92
93 std::array<bool, 16> b;
94 std::array<Math::Vec4<u8>, 4> i;
95 } uniforms;
96
97 Math::Vec4<float24> default_attributes[16];
98
99 std::array<u32, 1024> program_code;
100 std::array<u32, 1024> swizzle_data;
101};
102
103// Helper structure used to keep track of data useful for inspection of shader emulation 87// Helper structure used to keep track of data useful for inspection of shader emulation
104template<bool full_debugging> 88template<bool full_debugging>
105struct DebugData; 89struct DebugData;
@@ -288,29 +272,12 @@ struct UnitState {
288 } registers; 272 } registers;
289 static_assert(std::is_pod<Registers>::value, "Structure is not POD"); 273 static_assert(std::is_pod<Registers>::value, "Structure is not POD");
290 274
291 u32 program_counter;
292 bool conditional_code[2]; 275 bool conditional_code[2];
293 276
294 // Two Address registers and one loop counter 277 // Two Address registers and one loop counter
295 // TODO: How many bits do these actually have? 278 // TODO: How many bits do these actually have?
296 s32 address_registers[3]; 279 s32 address_registers[3];
297 280
298 enum {
299 INVALID_ADDRESS = 0xFFFFFFFF
300 };
301
302 struct CallStackElement {
303 u32 final_address; // Address upon which we jump to return_address
304 u32 return_address; // Where to jump when leaving scope
305 u8 repeat_counter; // How often to repeat until this call stack element is removed
306 u8 loop_increment; // Which value to add to the loop counter after an iteration
307 // TODO: Should this be a signed value? Does it even matter?
308 u32 loop_address; // The address where we'll return to after each loop iteration
309 };
310
311 // TODO: Is there a maximal size for this?
312 boost::container::static_vector<CallStackElement, 16> call_stack;
313
314 DebugData<Debug> debug; 281 DebugData<Debug> debug;
315 282
316 static size_t InputOffset(const SourceRegister& reg) { 283 static size_t InputOffset(const SourceRegister& reg) {
@@ -342,33 +309,49 @@ struct UnitState {
342 } 309 }
343}; 310};
344 311
345/** 312/// Clears the shader cache
346 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 313void ClearCache();
347 * vertex, which would happen within the `Run` function).
348 */
349void Setup();
350 314
351/// Performs any cleanup when the emulator is shutdown 315struct ShaderSetup {
352void Shutdown();
353 316
354/** 317 struct {
355 * Runs the currently setup shader 318 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
356 * @param state Shader unit state, must be setup per shader and per shader unit 319 // therefore required to be 16-byte aligned.
357 * @param input Input vertex into the shader 320 alignas(16) Math::Vec4<float24> f[96];
358 * @param num_attributes The number of vertex shader attributes
359 * @return The output vertex, after having been processed by the vertex shader
360 */
361OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
362 321
363/** 322 std::array<bool, 16> b;
364 * Produce debug information based on the given shader and input vertex 323 std::array<Math::Vec4<u8>, 4> i;
365 * @param input Input vertex into the shader 324 } uniforms;
366 * @param num_attributes The number of vertex shader attributes 325
367 * @param config Configuration object for the shader pipeline 326 std::array<u32, 1024> program_code;
368 * @param setup Setup object for the shader pipeline 327 std::array<u32, 1024> swizzle_data;
369 * @return Debug information for this shader with regards to the given vertex 328
370 */ 329 /**
371DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); 330 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
331 * vertex, which would happen within the `Run` function).
332 */
333 void Setup();
334
335 /**
336 * Runs the currently setup shader
337 * @param state Shader unit state, must be setup per shader and per shader unit
338 * @param input Input vertex into the shader
339 * @param num_attributes The number of vertex shader attributes
340 * @return The output vertex, after having been processed by the vertex shader
341 */
342 OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
343
344 /**
345 * Produce debug information based on the given shader and input vertex
346 * @param input Input vertex into the shader
347 * @param num_attributes The number of vertex shader attributes
348 * @param config Configuration object for the shader pipeline
349 * @param setup Setup object for the shader pipeline
350 * @return Debug information for this shader with regards to the given vertex
351 */
352 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
353
354};
372 355
373} // namespace Shader 356} // namespace Shader
374 357
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 7710f7fbc..3a827d11f 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -29,8 +29,24 @@ namespace Pica {
29 29
30namespace Shader { 30namespace Shader {
31 31
32constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF;
33
34struct CallStackElement {
35 u32 final_address; // Address upon which we jump to return_address
36 u32 return_address; // Where to jump when leaving scope
37 u8 repeat_counter; // How often to repeat until this call stack element is removed
38 u8 loop_increment; // Which value to add to the loop counter after an iteration
39 // TODO: Should this be a signed value? Does it even matter?
40 u32 loop_address; // The address where we'll return to after each loop iteration
41};
42
32template<bool Debug> 43template<bool Debug>
33void RunInterpreter(UnitState<Debug>& state) { 44void RunInterpreter(UnitState<Debug>& state) {
45 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack;
47
48 u32 program_counter = g_state.regs.vs.main_offset;
49
34 const auto& uniforms = g_state.vs.uniforms; 50 const auto& uniforms = g_state.vs.uniforms;
35 const auto& swizzle_data = g_state.vs.swizzle_data; 51 const auto& swizzle_data = g_state.vs.swizzle_data;
36 const auto& program_code = g_state.vs.program_code; 52 const auto& program_code = g_state.vs.program_code;
@@ -41,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) {
41 unsigned iteration = 0; 57 unsigned iteration = 0;
42 bool exit_loop = false; 58 bool exit_loop = false;
43 while (!exit_loop) { 59 while (!exit_loop) {
44 if (!state.call_stack.empty()) { 60 if (!call_stack.empty()) {
45 auto& top = state.call_stack.back(); 61 auto& top = call_stack.back();
46 if (state.program_counter == top.final_address) { 62 if (program_counter == top.final_address) {
47 state.address_registers[2] += top.loop_increment; 63 state.address_registers[2] += top.loop_increment;
48 64
49 if (top.repeat_counter-- == 0) { 65 if (top.repeat_counter-- == 0) {
50 state.program_counter = top.return_address; 66 program_counter = top.return_address;
51 state.call_stack.pop_back(); 67 call_stack.pop_back();
52 } else { 68 } else {
53 state.program_counter = top.loop_address; 69 program_counter = top.loop_address;
54 } 70 }
55 71
56 // TODO: Is "trying again" accurate to hardware? 72 // TODO: Is "trying again" accurate to hardware?
@@ -58,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) {
58 } 74 }
59 } 75 }
60 76
61 const Instruction instr = { program_code[state.program_counter] }; 77 const Instruction instr = { program_code[program_counter] };
62 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
63 79
64 static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, 80 static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
65 u32 return_offset, u8 repeat_count, u8 loop_increment) { 81 u32 return_offset, u8 repeat_count, u8 loop_increment) {
66 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
67 ASSERT(state.call_stack.size() < state.call_stack.capacity()); 83 ASSERT(call_stack.size() < call_stack.capacity());
68 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
69 }; 85 };
70 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); 86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
71 if (iteration > 0) 87 if (iteration > 0)
72 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); 88 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter);
73 89
74 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); 90 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter);
75 91
76 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { 92 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
77 switch (source_reg.GetRegisterType()) { 93 switch (source_reg.GetRegisterType()) {
@@ -519,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) {
519 case OpCode::Id::JMPC: 535 case OpCode::Id::JMPC:
520 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
521 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
522 state.program_counter = instr.flow_control.dest_offset - 1; 538 program_counter = instr.flow_control.dest_offset - 1;
523 } 539 }
524 break; 540 break;
525 541
@@ -527,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) {
527 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
528 544
529 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
530 state.program_counter = instr.flow_control.dest_offset - 1; 546 program_counter = instr.flow_control.dest_offset - 1;
531 } 547 }
532 break; 548 break;
533 549
@@ -535,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) {
535 call(state, 551 call(state,
536 instr.flow_control.dest_offset, 552 instr.flow_control.dest_offset,
537 instr.flow_control.num_instructions, 553 instr.flow_control.num_instructions,
538 state.program_counter + 1, 0, 0); 554 program_counter + 1, 0, 0);
539 break; 555 break;
540 556
541 case OpCode::Id::CALLU: 557 case OpCode::Id::CALLU:
@@ -544,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) {
544 call(state, 560 call(state,
545 instr.flow_control.dest_offset, 561 instr.flow_control.dest_offset,
546 instr.flow_control.num_instructions, 562 instr.flow_control.num_instructions,
547 state.program_counter + 1, 0, 0); 563 program_counter + 1, 0, 0);
548 } 564 }
549 break; 565 break;
550 566
@@ -554,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) {
554 call(state, 570 call(state,
555 instr.flow_control.dest_offset, 571 instr.flow_control.dest_offset,
556 instr.flow_control.num_instructions, 572 instr.flow_control.num_instructions,
557 state.program_counter + 1, 0, 0); 573 program_counter + 1, 0, 0);
558 } 574 }
559 break; 575 break;
560 576
@@ -565,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) {
565 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
566 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 582 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
567 call(state, 583 call(state,
568 state.program_counter + 1, 584 program_counter + 1,
569 instr.flow_control.dest_offset - state.program_counter - 1, 585 instr.flow_control.dest_offset - program_counter - 1,
570 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
571 } else { 587 } else {
572 call(state, 588 call(state,
@@ -584,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) {
584 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
585 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
586 call(state, 602 call(state,
587 state.program_counter + 1, 603 program_counter + 1,
588 instr.flow_control.dest_offset - state.program_counter - 1, 604 instr.flow_control.dest_offset - program_counter - 1,
589 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
590 } else { 606 } else {
591 call(state, 607 call(state,
@@ -607,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) {
607 623
608 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
609 call(state, 625 call(state,
610 state.program_counter + 1, 626 program_counter + 1,
611 instr.flow_control.dest_offset - state.program_counter + 1, 627 instr.flow_control.dest_offset - program_counter + 1,
612 instr.flow_control.dest_offset + 1, 628 instr.flow_control.dest_offset + 1,
613 loop_param.x, 629 loop_param.x,
614 loop_param.z); 630 loop_param.z);
@@ -625,7 +641,7 @@ void RunInterpreter(UnitState<Debug>& state) {
625 } 641 }
626 } 642 }
627 643
628 ++state.program_counter; 644 ++program_counter;
629 ++iteration; 645 ++iteration;
630 } 646 }
631} 647}
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index 21ae52949..83896814f 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -124,7 +124,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
125 } else if (vertex_attribute_is_default[i]) { 125 } else if (vertex_attribute_is_default[i]) {
126 // Load the default attribute if we're configured to do so 126 // Load the default attribute if we're configured to do so
127 input.attr[i] = g_state.vs.default_attributes[i]; 127 input.attr[i] = g_state.vs_default_attributes[i];
128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", 128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
129 i, vertex, index, 129 i, vertex, index,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), 130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),