summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt11
-rw-r--r--externals/cmake-modules/FindSDL2.cmake239
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp53
-rw-r--r--src/core/hle/kernel/address_arbiter.h2
-rw-r--r--src/core/hle/kernel/thread.cpp4
-rw-r--r--src/core/hle/service/audio/hwopus.cpp6
-rw-r--r--src/core/hle/service/bcat/backend/backend.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp91
-rw-r--r--src/video_core/engines/maxwell_3d.h23
-rw-r--r--src/video_core/engines/shader_bytecode.h4
-rw-r--r--src/video_core/gpu.cpp16
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_state.h1
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp4
-rw-r--r--src/video_core/shader/decode/bfi.cpp2
17 files changed, 127 insertions, 337 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc782e252..44ed4196d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -151,15 +151,16 @@ if (ENABLE_SDL2)
151 set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers") 151 set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers")
152 set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library") 152 set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library")
153 set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll") 153 set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll")
154 else()
155 find_package(SDL2 REQUIRED)
156 endif()
157 154
158 if (SDL2_FOUND)
159 # TODO(yuriks): Make FindSDL2.cmake export an IMPORTED library instead
160 add_library(SDL2 INTERFACE) 155 add_library(SDL2 INTERFACE)
161 target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARY}") 156 target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARY}")
162 target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}") 157 target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}")
158 else()
159 find_package(SDL2 REQUIRED)
160 include_directories(${SDL2_INCLUDE_DIRS})
161
162 add_library(SDL2 INTERFACE)
163 target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}")
163 endif() 164 endif()
164else() 165else()
165 set(SDL2_FOUND NO) 166 set(SDL2_FOUND NO)
diff --git a/externals/cmake-modules/FindSDL2.cmake b/externals/cmake-modules/FindSDL2.cmake
deleted file mode 100644
index 22ce752c5..000000000
--- a/externals/cmake-modules/FindSDL2.cmake
+++ /dev/null
@@ -1,239 +0,0 @@
1
2# This module defines
3# SDL2_LIBRARY, the name of the library to link against
4# SDL2_FOUND, if false, do not try to link to SDL2
5# SDL2_INCLUDE_DIR, where to find SDL.h
6# SDL2_DLL_DIR, where to find SDL2.dll if it exists
7#
8# This module responds to the the flag:
9# SDL2_BUILDING_LIBRARY
10# If this is defined, then no SDL2main will be linked in because
11# only applications need main().
12# Otherwise, it is assumed you are building an application and this
13# module will attempt to locate and set the the proper link flags
14# as part of the returned SDL2_LIBRARY variable.
15#
16# Don't forget to include SDLmain.h and SDLmain.m your project for the
17# OS X framework based version. (Other versions link to -lSDL2main which
18# this module will try to find on your behalf.) Also for OS X, this
19# module will automatically add the -framework Cocoa on your behalf.
20#
21#
22# Additional Note: If you see an empty SDL2_LIBRARY_TEMP in your configuration
23# and no SDL2_LIBRARY, it means CMake did not find your SDL2 library
24# (SDL2.dll, libsdl2.so, SDL2.framework, etc).
25# Set SDL2_LIBRARY_TEMP to point to your SDL2 library, and configure again.
26# Similarly, if you see an empty SDL2MAIN_LIBRARY, you should set this value
27# as appropriate. These values are used to generate the final SDL2_LIBRARY
28# variable, but when these values are unset, SDL2_LIBRARY does not get created.
29#
30#
31# $SDL2DIR is an environment variable that would
32# correspond to the ./configure --prefix=$SDL2DIR
33# used in building SDL2.
34# l.e.galup 9-20-02
35#
36# Modified by Eric Wing.
37# Added code to assist with automated building by using environmental variables
38# and providing a more controlled/consistent search behavior.
39# Added new modifications to recognize OS X frameworks and
40# additional Unix paths (FreeBSD, etc).
41# Also corrected the header search path to follow "proper" SDL guidelines.
42# Added a search for SDL2main which is needed by some platforms.
43# Added a search for threads which is needed by some platforms.
44# Added needed compile switches for MinGW.
45#
46# On OSX, this will prefer the Framework version (if found) over others.
47# People will have to manually change the cache values of
48# SDL2_LIBRARY to override this selection or set the CMake environment
49# CMAKE_INCLUDE_PATH to modify the search paths.
50#
51# Note that the header path has changed from SDL2/SDL.h to just SDL.h
52# This needed to change because "proper" SDL convention
53# is #include "SDL.h", not <SDL2/SDL.h>. This is done for portability
54# reasons because not all systems place things in SDL2/ (see FreeBSD).
55
56#=============================================================================
57# Copyright 2003-2009 Kitware, Inc.
58#
59# Distributed under the OSI-approved BSD License (the "License").
60#
61# This software is distributed WITHOUT ANY WARRANTY; without even the
62# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
63# See the License for more information.
64#=============================================================================
65# CMake - Cross Platform Makefile Generator
66# Copyright 2000-2016 Kitware, Inc.
67# Copyright 2000-2011 Insight Software Consortium
68# All rights reserved.
69#
70# Redistribution and use in source and binary forms, with or without
71# modification, are permitted provided that the following conditions
72# are met:
73#
74# * Redistributions of source code must retain the above copyright
75# notice, this list of conditions and the following disclaimer.
76#
77# * Redistributions in binary form must reproduce the above copyright
78# notice, this list of conditions and the following disclaimer in the
79# documentation and/or other materials provided with the distribution.
80#
81# * Neither the names of Kitware, Inc., the Insight Software Consortium,
82# nor the names of their contributors may be used to endorse or promote
83# products derived from this software without specific prior written
84# permission.
85#
86# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
87# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
88# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
89# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
90# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
91# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
92# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
93# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
94# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
95# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
96# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97#
98# ------------------------------------------------------------------------------
99#
100# The above copyright and license notice applies to distributions of
101# CMake in source and binary form. Some source files contain additional
102# notices of original copyright by their contributors; see each source
103# for details. Third-party software packages supplied with CMake under
104# compatible licenses provide their own copyright notices documented in
105# corresponding subdirectories.
106#
107# ------------------------------------------------------------------------------
108#
109# CMake was initially developed by Kitware with the following sponsorship:
110#
111# * National Library of Medicine at the National Institutes of Health
112# as part of the Insight Segmentation and Registration Toolkit (ITK).
113#
114# * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel
115# Visualization Initiative.
116#
117# * National Alliance for Medical Image Computing (NAMIC) is funded by the
118# National Institutes of Health through the NIH Roadmap for Medical Research,
119# Grant U54 EB005149.
120#
121# * Kitware, Inc.
122#
123
124message("<FindSDL2.cmake>")
125
126SET(SDL2_SEARCH_PATHS
127 ~/Library/Frameworks
128 /Library/Frameworks
129 /usr/local
130 /usr
131 /sw # Fink
132 /opt/local # DarwinPorts
133 /opt/csw # Blastwave
134 /opt
135 ${SDL2_PATH}
136)
137
138if(CMAKE_SIZEOF_VOID_P EQUAL 8)
139 set(VC_LIB_PATH_SUFFIX lib/x64)
140else()
141 set(VC_LIB_PATH_SUFFIX lib/x86)
142endif()
143
144FIND_LIBRARY(SDL2_LIBRARY_TEMP
145 NAMES SDL2
146 HINTS
147 $ENV{SDL2DIR}
148 PATH_SUFFIXES lib64 lib ${VC_LIB_PATH_SUFFIX}
149 PATHS ${SDL2_SEARCH_PATHS}
150)
151
152IF(SDL2_LIBRARY_TEMP)
153 if(MSVC)
154 get_filename_component(SDL2_DLL_DIR_TEMP ${SDL2_LIBRARY_TEMP} DIRECTORY)
155 if(EXISTS ${SDL2_DLL_DIR_TEMP}/SDL2.dll)
156 set(SDL2_DLL_DIR ${SDL2_DLL_DIR_TEMP})
157 unset(SDL2_DLL_DIR_TEMP)
158 endif()
159 endif()
160
161 FIND_PATH(SDL2_INCLUDE_DIR SDL.h
162 HINTS
163 $ENV{SDL2DIR}
164 PATH_SUFFIXES include/SDL2 include
165 PATHS ${SDL2_SEARCH_PATHS}
166 )
167
168 IF(NOT SDL2_BUILDING_LIBRARY)
169 IF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework")
170 # Non-OS X framework versions expect you to also dynamically link to
171 # SDL2main. This is mainly for Windows and OS X. Other (Unix) platforms
172 # seem to provide SDL2main for compatibility even though they don't
173 # necessarily need it.
174 FIND_LIBRARY(SDL2MAIN_LIBRARY
175 NAMES SDL2main
176 HINTS
177 $ENV{SDL2DIR}
178 PATH_SUFFIXES lib64 lib
179 PATHS ${SDL2_SEARCH_PATHS}
180 )
181 ENDIF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework")
182 ENDIF(NOT SDL2_BUILDING_LIBRARY)
183
184 # SDL2 may require threads on your system.
185 # The Apple build may not need an explicit flag because one of the
186 # frameworks may already provide it.
187 # But for non-OSX systems, I will use the CMake Threads package.
188 IF(NOT APPLE)
189 FIND_PACKAGE(Threads)
190 ENDIF(NOT APPLE)
191
192 # MinGW needs an additional library, mwindows
193 # It's total link flags should look like -lmingw32 -lSDL2main -lSDL2 -lmwindows
194 # (Actually on second look, I think it only needs one of the m* libraries.)
195 IF(MINGW)
196 SET(MINGW32_LIBRARY mingw32 CACHE STRING "mwindows for MinGW")
197 ENDIF(MINGW)
198
199 # For SDL2main
200 IF(NOT SDL2_BUILDING_LIBRARY)
201 IF(SDL2MAIN_LIBRARY)
202 SET(SDL2_LIBRARY_TEMP ${SDL2MAIN_LIBRARY} ${SDL2_LIBRARY_TEMP})
203 ENDIF(SDL2MAIN_LIBRARY)
204 ENDIF(NOT SDL2_BUILDING_LIBRARY)
205
206 # For OS X, SDL2 uses Cocoa as a backend so it must link to Cocoa.
207 # CMake doesn't display the -framework Cocoa string in the UI even
208 # though it actually is there if I modify a pre-used variable.
209 # I think it has something to do with the CACHE STRING.
210 # So I use a temporary variable until the end so I can set the
211 # "real" variable in one-shot.
212 IF(APPLE)
213 SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} "-framework Cocoa")
214 ENDIF(APPLE)
215
216 # For threads, as mentioned Apple doesn't need this.
217 # In fact, there seems to be a problem if I used the Threads package
218 # and try using this line, so I'm just skipping it entirely for OS X.
219 IF(NOT APPLE)
220 SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} ${CMAKE_THREAD_LIBS_INIT})
221 ENDIF(NOT APPLE)
222
223 # For MinGW library
224 IF(MINGW)
225 SET(SDL2_LIBRARY_TEMP ${MINGW32_LIBRARY} ${SDL2_LIBRARY_TEMP})
226 ENDIF(MINGW)
227
228 # Set the final string here so the GUI reflects the final state.
229 SET(SDL2_LIBRARY ${SDL2_LIBRARY_TEMP} CACHE STRING "Where the SDL2 Library can be found")
230
231 # Unset the temp variable to INTERNAL so it is not seen in the CMake GUI
232 UNSET(SDL2_LIBRARY_TEMP)
233ENDIF(SDL2_LIBRARY_TEMP)
234
235message("</FindSDL2.cmake>")
236
237INCLUDE(FindPackageHandleStandardArgs)
238
239FIND_PACKAGE_HANDLE_STANDARD_ARGS(SDL2 REQUIRED_VARS SDL2_LIBRARY SDL2_INCLUDE_DIR)
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 2ea3dcb61..8475b698c 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
201void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { 201void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
202 const VAddr arb_addr = thread->GetArbiterWaitAddress(); 202 const VAddr arb_addr = thread->GetArbiterWaitAddress();
203 std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; 203 std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
204 auto it = thread_list.begin(); 204
205 while (it != thread_list.end()) { 205 const auto iter =
206 const std::shared_ptr<Thread>& current_thread = *it; 206 std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) {
207 if (current_thread->GetPriority() >= thread->GetPriority()) { 207 return entry->GetPriority() >= thread->GetPriority();
208 thread_list.insert(it, thread); 208 });
209 return; 209
210 } 210 if (iter == thread_list.cend()) {
211 ++it; 211 thread_list.push_back(std::move(thread));
212 } else {
213 thread_list.insert(iter, std::move(thread));
212 } 214 }
213 thread_list.push_back(std::move(thread));
214} 215}
215 216
216void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { 217void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
217 const VAddr arb_addr = thread->GetArbiterWaitAddress(); 218 const VAddr arb_addr = thread->GetArbiterWaitAddress();
218 std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; 219 std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
219 auto it = thread_list.begin(); 220
220 while (it != thread_list.end()) { 221 const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
221 const std::shared_ptr<Thread>& current_thread = *it; 222 [&thread](const auto& entry) { return thread == entry; });
222 if (current_thread.get() == thread.get()) { 223
223 thread_list.erase(it); 224 ASSERT(iter != thread_list.cend());
224 return; 225
225 } 226 thread_list.erase(iter);
226 ++it;
227 }
228 UNREACHABLE();
229} 227}
230 228
231std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { 229std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
232 std::vector<std::shared_ptr<Thread>> result; 230 VAddr address) const {
233 std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; 231 const auto iter = arb_threads.find(address);
234 auto it = thread_list.begin(); 232 if (iter == arb_threads.cend()) {
235 while (it != thread_list.end()) { 233 return {};
236 std::shared_ptr<Thread> current_thread = *it;
237 result.push_back(std::move(current_thread));
238 ++it;
239 } 234 }
240 return result; 235
236 const std::list<std::shared_ptr<Thread>>& thread_list = iter->second;
237 return {thread_list.cbegin(), thread_list.cend()};
241} 238}
242} // namespace Kernel 239} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index 386983e54..f958eee5a 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -86,7 +86,7 @@ private:
86 void RemoveThread(std::shared_ptr<Thread> thread); 86 void RemoveThread(std::shared_ptr<Thread> thread);
87 87
88 // Gets the threads waiting on an address. 88 // Gets the threads waiting on an address.
89 std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); 89 std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
90 90
91 /// List of threads waiting for a address arbiter 91 /// List of threads waiting for a address arbiter
92 std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; 92 std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 43b30dd3d..ae5f2c8bd 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -481,7 +481,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
481 if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { 481 if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
482 return; 482 return;
483 } 483 }
484 auto& scheduler = Core::System::GetInstance().GlobalScheduler(); 484 auto& scheduler = kernel.GlobalScheduler();
485 if (processor_id >= 0) { 485 if (processor_id >= 0) {
486 scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); 486 scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this);
487 } 487 }
@@ -513,7 +513,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
513} 513}
514 514
515void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { 515void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
516 auto& scheduler = Core::System::GetInstance().GlobalScheduler(); 516 auto& scheduler = kernel.GlobalScheduler();
517 if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || 517 if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
518 current_priority >= THREADPRIO_COUNT) { 518 current_priority >= THREADPRIO_COUNT) {
519 return; 519 return;
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index cb839e4a2..d19513cbb 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -170,8 +170,10 @@ public:
170 {3, nullptr, "SetContextForMultiStream"}, 170 {3, nullptr, "SetContextForMultiStream"},
171 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, 171 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
172 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, 172 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
173 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, 173 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"},
174 {7, nullptr, "DecodeInterleavedForMultiStream"}, 174 {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"},
175 {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
176 {9, nullptr, "DecodeInterleavedForMultiStream"},
175 }; 177 };
176 // clang-format on 178 // clang-format on
177 179
diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp
index 6f5ea095a..def3410cc 100644
--- a/src/core/hle/service/bcat/backend/backend.cpp
+++ b/src/core/hle/service/bcat/backend/backend.cpp
@@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name,
117} 117}
118 118
119bool NullBackend::Clear(u64 title_id) { 119bool NullBackend::Clear(u64 title_id) {
120 LOG_DEBUG(Service_BCAT, "called, title_id={:016X}"); 120 LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);
121 121
122 return true; 122 return true;
123} 123}
124 124
125void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) { 125void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) {
126 LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id, 126 LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id,
127 Common::HexToString(passphrase)); 127 Common::HexToString(passphrase));
128} 128}
129 129
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f0..0b3e8749b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/shader_type.h" 11#include "video_core/engines/shader_type.h"
12#include "video_core/gpu.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
14#include "video_core/textures/texture.h" 15#include "video_core/textures/texture.h"
@@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {
519 regs.reg_array[0xd00] = 1; 520 regs.reg_array[0xd00] = 1;
520} 521}
521 522
522void Maxwell3D::ProcessQueryGet() { 523void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
524 struct LongQueryResult {
525 u64_le value;
526 u64_le timestamp;
527 };
528 static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
523 const GPUVAddr sequence_address{regs.query.QueryAddress()}; 529 const GPUVAddr sequence_address{regs.query.QueryAddress()};
524 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 530 if (long_query) {
525 // VAddr before writing. 531 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
532 // GPU, this command may actually take a while to complete in real hardware due to GPU
533 // wait queues.
534 LongQueryResult query_result{payload, system.GPU().GetTicks()};
535 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
536 } else {
537 memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
538 }
539}
526 540
541void Maxwell3D::ProcessQueryGet() {
527 // TODO(Subv): Support the other query units. 542 // TODO(Subv): Support the other query units.
528 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 543 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
529 "Units other than CROP are unimplemented"); 544 "Units other than CROP are unimplemented");
530 545
531 u64 result = 0; 546 switch (regs.query.query_get.operation) {
532 547 case Regs::QueryOperation::Release: {
533 // TODO(Subv): Support the other query variables 548 const u64 result = regs.query.query_sequence;
534 switch (regs.query.query_get.select) { 549 StampQueryResult(result, regs.query.query_get.short_query == 0);
535 case Regs::QuerySelect::Zero:
536 // This seems to actually write the query sequence to the query address.
537 result = regs.query.query_sequence;
538 break; 550 break;
539 default:
540 result = 1;
541 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
542 static_cast<u32>(regs.query.query_get.select.Value()));
543 } 551 }
544 552 case Regs::QueryOperation::Acquire: {
545 // TODO(Subv): Research and implement how query sync conditions work. 553 // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
546 554 // to write a value that matches the current payload.
547 struct LongQueryResult { 555 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
548 u64_le value; 556 break;
549 u64_le timestamp; 557 }
550 }; 558 case Regs::QueryOperation::Counter: {
551 static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); 559 u64 result{};
552 560 switch (regs.query.query_get.select) {
553 switch (regs.query.query_get.mode) { 561 case Regs::QuerySelect::Zero:
554 case Regs::QueryMode::Write: 562 result = 0;
555 case Regs::QueryMode::Write2: { 563 break;
556 u32 sequence = regs.query.query_sequence; 564 default:
557 if (regs.query.query_get.short_query) { 565 result = 1;
558 // Write the current query sequence to the sequence address. 566 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
559 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short 567 static_cast<u32>(regs.query.query_get.select.Value()));
560 // query.
561 memory_manager.Write<u32>(sequence_address, sequence);
562 } else {
563 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
564 // GPU, this command may actually take a while to complete in real hardware due to GPU
565 // wait queues.
566 LongQueryResult query_result{};
567 query_result.value = result;
568 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
569 query_result.timestamp = system.CoreTiming().GetTicks();
570 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
571 } 568 }
569 StampQueryResult(result, regs.query.query_get.short_query == 0);
570 break;
571 }
572 case Regs::QueryOperation::Trap: {
573 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
574 break;
575 }
576 default: {
577 UNIMPLEMENTED_MSG("Unknown query operation");
572 break; 578 break;
573 } 579 }
574 default:
575 UNIMPLEMENTED_MSG("Query mode {} not implemented",
576 static_cast<u32>(regs.query.query_get.mode.Value()));
577 } 580 }
578} 581}
579 582
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index e437bacb7..0a2af54e5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -71,12 +71,11 @@ public:
71 static constexpr std::size_t MaxConstBuffers = 18; 71 static constexpr std::size_t MaxConstBuffers = 18;
72 static constexpr std::size_t MaxConstBufferSize = 0x10000; 72 static constexpr std::size_t MaxConstBufferSize = 0x10000;
73 73
74 enum class QueryMode : u32 { 74 enum class QueryOperation : u32 {
75 Write = 0, 75 Release = 0,
76 Sync = 1, 76 Acquire = 1,
77 // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 77 Counter = 2,
78 // is. 78 Trap = 3,
79 Write2 = 2,
80 }; 79 };
81 80
82 enum class QueryUnit : u32 { 81 enum class QueryUnit : u32 {
@@ -862,7 +861,11 @@ public:
862 861
863 float point_size; 862 float point_size;
864 863
865 INSERT_UNION_PADDING_WORDS(0x7); 864 INSERT_UNION_PADDING_WORDS(0x1);
865
866 u32 point_sprite_enable;
867
868 INSERT_UNION_PADDING_WORDS(0x5);
866 869
867 u32 zeta_enable; 870 u32 zeta_enable;
868 871
@@ -1077,7 +1080,7 @@ public:
1077 u32 query_sequence; 1080 u32 query_sequence;
1078 union { 1081 union {
1079 u32 raw; 1082 u32 raw;
1080 BitField<0, 2, QueryMode> mode; 1083 BitField<0, 2, QueryOperation> operation;
1081 BitField<4, 1, u32> fence; 1084 BitField<4, 1, u32> fence;
1082 BitField<12, 4, QueryUnit> unit; 1085 BitField<12, 4, QueryUnit> unit;
1083 BitField<16, 1, QuerySyncCondition> sync_cond; 1086 BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1409,6 +1412,9 @@ private:
1409 /// Handles a write to the QUERY_GET register. 1412 /// Handles a write to the QUERY_GET register.
1410 void ProcessQueryGet(); 1413 void ProcessQueryGet();
1411 1414
1415 // Writes the query result accordingly
1416 void StampQueryResult(u64 payload, bool long_query);
1417
1412 // Handles Conditional Rendering 1418 // Handles Conditional Rendering
1413 void ProcessQueryCondition(); 1419 void ProcessQueryCondition();
1414 1420
@@ -1494,6 +1500,7 @@ ASSERT_REG_POSITION(vb_element_base, 0x50D);
1494ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1500ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1495ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1501ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1496ASSERT_REG_POSITION(point_size, 0x546); 1502ASSERT_REG_POSITION(point_size, 0x546);
1503ASSERT_REG_POSITION(point_sprite_enable, 0x548);
1497ASSERT_REG_POSITION(zeta_enable, 0x54E); 1504ASSERT_REG_POSITION(zeta_enable, 0x54E);
1498ASSERT_REG_POSITION(multisample_control, 0x54F); 1505ASSERT_REG_POSITION(multisample_control, 0x54F);
1499ASSERT_REG_POSITION(condition, 0x554); 1506ASSERT_REG_POSITION(condition, 0x554);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 402869fde..c9bc83cd7 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1677,11 +1677,11 @@ union Instruction {
1677 } xmad; 1677 } xmad;
1678 1678
1679 union { 1679 union {
1680 BitField<20, 14, u64> offset; 1680 BitField<20, 14, u64> shifted_offset;
1681 BitField<34, 5, u64> index; 1681 BitField<34, 5, u64> index;
1682 1682
1683 u64 GetOffset() const { 1683 u64 GetOffset() const {
1684 return offset * 4; 1684 return shifted_offset * 4;
1685 } 1685 }
1686 } cbuf34; 1686 } cbuf34;
1687 1687
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 062ca83b8..4419ab735 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/core_timing.h" 8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
9#include "core/memory.h" 10#include "core/memory.h"
10#include "video_core/engines/fermi_2d.h" 11#include "video_core/engines/fermi_2d.h"
11#include "video_core/engines/kepler_compute.h" 12#include "video_core/engines/kepler_compute.h"
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
122 return true; 123 return true;
123} 124}
124 125
126u64 GPU::GetTicks() const {
127 // This values were reversed engineered by fincs from NVN
128 // The gpu clock is reported in units of 385/625 nanoseconds
129 constexpr u64 gpu_ticks_num = 384;
130 constexpr u64 gpu_ticks_den = 625;
131
132 const u64 cpu_ticks = system.CoreTiming().GetTicks();
133 const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
134 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
135 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
136 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
137}
138
125void GPU::FlushCommands() { 139void GPU::FlushCommands() {
126 renderer.Rasterizer().FlushCommands(); 140 renderer.Rasterizer().FlushCommands();
127} 141}
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
340 block.sequence = regs.semaphore_sequence; 354 block.sequence = regs.semaphore_sequence;
341 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of 355 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
342 // CoreTiming 356 // CoreTiming
343 block.timestamp = system.CoreTiming().GetTicks(); 357 block.timestamp = GetTicks();
344 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, 358 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
345 sizeof(block)); 359 sizeof(block));
346 } else { 360 } else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b648317bb..07727210c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:
192 192
193 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); 193 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
194 194
195 u64 GetTicks() const;
196
195 std::unique_lock<std::mutex> LockSync() { 197 std::unique_lock<std::mutex> LockSync() {
196 return std::unique_lock{sync_mutex}; 198 return std::unique_lock{sync_mutex};
197 } 199 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 46a7433ea..b0eb14c8b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1220,6 +1220,7 @@ void RasterizerOpenGL::SyncPointState() {
1220 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid 1220 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1221 // in OpenGL). 1221 // in OpenGL).
1222 state.point.program_control = regs.vp_point_size.enable != 0; 1222 state.point.program_control = regs.vp_point_size.enable != 0;
1223 state.point.sprite = regs.point_sprite_enable != 0;
1223 state.point.size = std::max(1.0f, regs.point_size); 1224 state.point.size = std::max(1.0f, regs.point_size);
1224} 1225}
1225 1226
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index cc185e9e1..ab1f7983c 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -128,6 +128,7 @@ void OpenGLState::ApplyClipDistances() {
128 128
129void OpenGLState::ApplyPointSize() { 129void OpenGLState::ApplyPointSize() {
130 Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); 130 Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
131 Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
131 if (UpdateValue(cur_state.point.size, point.size)) { 132 if (UpdateValue(cur_state.point.size, point.size)) {
132 glPointSize(point.size); 133 glPointSize(point.size);
133 } 134 }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 678e5cd89..4953eeda2 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -132,6 +132,7 @@ public:
132 132
133 struct { 133 struct {
134 bool program_control = false; // GL_PROGRAM_POINT_SIZE 134 bool program_control = false; // GL_PROGRAM_POINT_SIZE
135 bool sprite = false; // GL_POINT_SPRITE
135 GLfloat size = 1.0f; // GL_POINT_SIZE 136 GLfloat size = 1.0f; // GL_POINT_SIZE
136 } point; 137 } point;
137 138
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index e60875cc4..21366869d 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
166 const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { 166 const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
167 switch (opcode->get().GetId()) { 167 switch (opcode->get().GetId()) {
168 case OpCode::Id::ICMP_CR: 168 case OpCode::Id::ICMP_CR:
169 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), 169 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
170 GetRegister(instr.gpr39)}; 170 GetRegister(instr.gpr39)};
171 case OpCode::Id::ICMP_R: 171 case OpCode::Id::ICMP_R:
172 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; 172 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
173 case OpCode::Id::ICMP_RC: 173 case OpCode::Id::ICMP_RC:
174 return {GetRegister(instr.gpr39), 174 return {GetRegister(instr.gpr39),
175 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; 175 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
176 case OpCode::Id::ICMP_IMM: 176 case OpCode::Id::ICMP_IMM:
177 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; 177 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
178 default: 178 default:
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index f992bbe2a..70d1c055b 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
21 switch (opcode->get().GetId()) { 21 switch (opcode->get().GetId()) {
22 case OpCode::Id::BFI_RC: 22 case OpCode::Id::BFI_RC:
23 return {GetRegister(instr.gpr39), 23 return {GetRegister(instr.gpr39),
24 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; 24 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
25 case OpCode::Id::BFI_IMM_R: 25 case OpCode::Id::BFI_IMM_R:
26 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; 26 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
27 default: 27 default: