summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2019-06-01 19:12:00 -0400
committerGravatar ReinUsesLisp2019-06-20 21:38:33 -0300
commit60bf761afbb125abd324e4b798d18a1611b5777b (patch)
tree5321727de0bf56eacd99a3917573f36f28197ba4 /src
parenttexture_cache uncompress-compress is untopological. (diff)
downloadyuzu-60bf761afbb125abd324e4b798d18a1611b5777b.tar.gz
yuzu-60bf761afbb125abd324e4b798d18a1611b5777b.tar.xz
yuzu-60bf761afbb125abd324e4b798d18a1611b5777b.zip
texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp92
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h9
-rw-r--r--src/video_core/texture_cache/texture_cache.h40
8 files changed, 148 insertions, 12 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 65a88b06c..ad15ea54e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -28,6 +28,7 @@ Device::Device() {
28 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 28 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
29 has_variable_aoffi = TestVariableAoffi(); 29 has_variable_aoffi = TestVariableAoffi();
30 has_component_indexing_bug = TestComponentIndexingBug(); 30 has_component_indexing_bug = TestComponentIndexingBug();
31 is_turing_plus = GLAD_GL_NV_mesh_shader;
31} 32}
32 33
33Device::Device(std::nullptr_t) { 34Device::Device(std::nullptr_t) {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8c8c93760..1afe16779 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -34,6 +34,10 @@ public:
34 return has_component_indexing_bug; 34 return has_component_indexing_bug;
35 } 35 }
36 36
37 bool IsTuringGPU() const {
38 return is_turing_plus;
39 }
40
37private: 41private:
38 static bool TestVariableAoffi(); 42 static bool TestVariableAoffi();
39 static bool TestComponentIndexingBug(); 43 static bool TestComponentIndexingBug();
@@ -43,6 +47,7 @@ private:
43 u32 max_varyings{}; 47 u32 max_varyings{};
44 bool has_variable_aoffi{}; 48 bool has_variable_aoffi{};
45 bool has_component_indexing_bug{}; 49 bool has_component_indexing_bug{};
50 bool is_turing_plus{};
46}; 51};
47 52
48} // namespace OpenGL 53} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8fe115aec..97c55f2ec 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -82,7 +82,7 @@ struct DrawParameters {
82 82
83RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 83RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
84 ScreenInfo& info) 84 ScreenInfo& info)
85 : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, 85 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
86 global_cache{*this}, system{system}, screen_info{info}, 86 global_cache{*this}, system{system}, screen_info{info},
87 buffer_cache(*this, STREAM_BUFFER_SIZE) { 87 buffer_cache(*this, STREAM_BUFFER_SIZE) {
88 OpenGLState::ApplyDefaultState(); 88 OpenGLState::ApplyDefaultState();
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 9f81c15cb..a1f91d677 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -148,6 +148,14 @@ void OGLBuffer::Release() {
148 handle = 0; 148 handle = 0;
149} 149}
150 150
151void OGLBuffer::MakePersistant(std::size_t buffer_size) {
152 if (handle == 0 || buffer_size == 0)
153 return;
154
155 const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT;
156 glNamedBufferStorage(handle, static_cast<GLsizeiptr>(buffer_size), nullptr, flags);
157}
158
151void OGLSync::Create() { 159void OGLSync::Create() {
152 if (handle != 0) 160 if (handle != 0)
153 return; 161 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 310ee2bf3..f2873ef96 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -186,6 +186,9 @@ public:
186 /// Deletes the internal OpenGL resource 186 /// Deletes the internal OpenGL resource
187 void Release(); 187 void Release();
188 188
189 // Converts the buffer into a persistant storage buffer
190 void MakePersistant(std::size_t buffer_size);
191
189 GLuint handle = 0; 192 GLuint handle = 0;
190}; 193};
191 194
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index e6f08a764..bddb15cb1 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/bit_util.h"
6#include "common/common_types.h" 7#include "common/common_types.h"
7#include "common/microprofile.h" 8#include "common/microprofile.h"
8#include "common/scope_exit.h" 9#include "common/scope_exit.h"
@@ -435,8 +436,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
435} 436}
436 437
437TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, 438TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
438 VideoCore::RasterizerInterface& rasterizer) 439 VideoCore::RasterizerInterface& rasterizer,
440 const Device& device)
439 : TextureCacheBase{system, rasterizer} { 441 : TextureCacheBase{system, rasterizer} {
442 support_info.depth_color_image_copies = !device.IsTuringGPU();
440 src_framebuffer.Create(); 443 src_framebuffer.Create();
441 dst_framebuffer.Create(); 444 dst_framebuffer.Create();
442} 445}
@@ -449,6 +452,14 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams
449 452
450void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, 453void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface,
451 const VideoCommon::CopyParams& copy_params) { 454 const VideoCommon::CopyParams& copy_params) {
455 if (!support_info.depth_color_image_copies) {
456 const auto& src_params = src_surface->GetSurfaceParams();
457 const auto& dst_params = dst_surface->GetSurfaceParams();
458 if (src_params.type != dst_params.type) {
459 // A fallback is needed
460 return;
461 }
462 }
452 const auto src_handle = src_surface->GetTexture(); 463 const auto src_handle = src_surface->GetTexture();
453 const auto src_target = src_surface->GetTarget(); 464 const auto src_target = src_surface->GetTarget();
454 const auto dst_handle = dst_surface->GetTexture(); 465 const auto dst_handle = dst_surface->GetTexture();
@@ -517,4 +528,83 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view,
517 is_linear ? GL_LINEAR : GL_NEAREST); 528 is_linear ? GL_LINEAR : GL_NEAREST);
518} 529}
519 530
531void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) {
532 const auto& src_params = src_surface->GetSurfaceParams();
533 const auto& dst_params = dst_surface->GetSurfaceParams();
534
535 const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
536 const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
537
538 const std::size_t source_size = src_surface->GetHostSizeInBytes();
539 const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
540
541 const std::size_t buffer_size = std::max(source_size, dest_size);
542
543 GLuint copy_pbo_handle = FetchPBO(buffer_size);
544
545 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
546
547 if (source_format.compressed) {
548 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
549 nullptr);
550 } else {
551 glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
552 static_cast<GLsizei>(source_size), nullptr);
553 }
554 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
555
556 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
557
558 const GLsizei width = static_cast<GLsizei>(dst_params.width);
559 const GLsizei height = static_cast<GLsizei>(dst_params.height);
560 const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
561 if (dest_format.compressed) {
562 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
563 UNREACHABLE();
564 } else {
565 switch (dst_params.target) {
566 case SurfaceTarget::Texture1D:
567 glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
568 dest_format.type, nullptr);
569 break;
570 case SurfaceTarget::Texture2D:
571 glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
572 dest_format.format, dest_format.type, nullptr);
573 break;
574 case SurfaceTarget::Texture3D:
575 case SurfaceTarget::Texture2DArray:
576 case SurfaceTarget::TextureCubeArray:
577 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
578 dest_format.format, dest_format.type, nullptr);
579 break;
580 case SurfaceTarget::TextureCubemap:
581 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
582 dest_format.format, dest_format.type, nullptr);
583 break;
584 default:
585 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
586 static_cast<u32>(dst_params.target));
587 UNREACHABLE();
588 }
589 }
590 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
591
592 glTextureBarrier();
593}
594
595GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
596 if (buffer_size < 0) {
597 UNREACHABLE();
598 return 0;
599 }
600 const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
601 OGLBuffer& cp = copy_pbo_cache[l2];
602 if (cp.handle == 0) {
603 const std::size_t ceil_size = 1ULL << l2;
604 cp.Create();
605 cp.MakePersistant(ceil_size);
606 }
607 return cp.handle;
608}
609
520} // namespace OpenGL 610} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 0b333e9e3..f514f137c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -13,6 +13,7 @@
13 13
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "video_core/engines/shader_bytecode.h" 15#include "video_core/engines/shader_bytecode.h"
16#include "video_core/renderer_opengl/gl_device.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h" 17#include "video_core/renderer_opengl/gl_resource_manager.h"
17#include "video_core/texture_cache/texture_cache.h" 18#include "video_core/texture_cache/texture_cache.h"
18 19
@@ -129,7 +130,8 @@ private:
129 130
130class TextureCacheOpenGL final : public TextureCacheBase { 131class TextureCacheOpenGL final : public TextureCacheBase {
131public: 132public:
132 explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer); 133 explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
134 const Device& device);
133 ~TextureCacheOpenGL(); 135 ~TextureCacheOpenGL();
134 136
135protected: 137protected:
@@ -141,9 +143,14 @@ protected:
141 void ImageBlit(View src_view, View dst_view, 143 void ImageBlit(View src_view, View dst_view,
142 const Tegra::Engines::Fermi2D::Config& copy_config) override; 144 const Tegra::Engines::Fermi2D::Config& copy_config) override;
143 145
146 void BufferCopy(Surface src_surface, Surface dst_surface) override;
147
144private: 148private:
149 GLuint FetchPBO(std::size_t buffer_size);
150
145 OGLFramebuffer src_framebuffer; 151 OGLFramebuffer src_framebuffer;
146 OGLFramebuffer dst_framebuffer; 152 OGLFramebuffer dst_framebuffer;
153 std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
147}; 154};
148 155
149} // namespace OpenGL 156} // namespace OpenGL
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 69ef7a2bd..e0d0e1f70 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -214,6 +214,13 @@ public:
214 } 214 }
215 215
216protected: 216protected:
217 // This structure is used for communicating with the backend, on which behaviors
218 // it supports and what not, to avoid assuming certain things about hardware.
219 // The backend is RESPONSIBLE for filling this settings on creation.
220 struct Support {
221 bool depth_color_image_copies;
222 } support_info;
223
217 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) 224 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
218 : system{system}, rasterizer{rasterizer} { 225 : system{system}, rasterizer{rasterizer} {
219 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 226 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
@@ -233,6 +240,10 @@ protected:
233 virtual void ImageBlit(TView src_view, TView dst_view, 240 virtual void ImageBlit(TView src_view, TView dst_view,
234 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 241 const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
235 242
243 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
244 // and reading it from a sepparate buffer.
245 virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0;
246
236 void Register(TSurface surface) { 247 void Register(TSurface surface) {
237 std::lock_guard lock{mutex}; 248 std::lock_guard lock{mutex};
238 249
@@ -377,9 +388,14 @@ private:
377 const SurfaceParams& params) { 388 const SurfaceParams& params) {
378 const auto gpu_addr = current_surface->GetGpuAddr(); 389 const auto gpu_addr = current_surface->GetGpuAddr();
379 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 390 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
380 std::vector<CopyParams> bricks = current_surface->BreakDown(params); 391 const auto& cr_params = current_surface->GetSurfaceParams();
381 for (auto& brick : bricks) { 392 if (!support_info.depth_color_image_copies && cr_params.type != params.type) {
382 ImageCopy(current_surface, new_surface, brick); 393 BufferCopy(current_surface, new_surface);
394 } else {
395 std::vector<CopyParams> bricks = current_surface->BreakDown(params);
396 for (auto& brick : bricks) {
397 ImageCopy(current_surface, new_surface, brick);
398 }
383 } 399 }
384 Unregister(current_surface); 400 Unregister(current_surface);
385 Register(new_surface); 401 Register(new_surface);
@@ -505,7 +521,8 @@ private:
505 auto topological_result = current_surface->MatchesTopology(params); 521 auto topological_result = current_surface->MatchesTopology(params);
506 if (topological_result != MatchTopologyResult::FullMatch) { 522 if (topological_result != MatchTopologyResult::FullMatch) {
507 std::vector<TSurface> overlaps{current_surface}; 523 std::vector<TSurface> overlaps{current_surface};
508 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); 524 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
525 topological_result);
509 } 526 }
510 MatchStructureResult s_result = current_surface->MatchesStructure(params); 527 MatchStructureResult s_result = current_surface->MatchesStructure(params);
511 if (s_result != MatchStructureResult::None && 528 if (s_result != MatchStructureResult::None &&
@@ -537,7 +554,8 @@ private:
537 for (auto surface : overlaps) { 554 for (auto surface : overlaps) {
538 auto topological_result = surface->MatchesTopology(params); 555 auto topological_result = surface->MatchesTopology(params);
539 if (topological_result != MatchTopologyResult::FullMatch) { 556 if (topological_result != MatchTopologyResult::FullMatch) {
540 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); 557 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
558 topological_result);
541 } 559 }
542 } 560 }
543 561
@@ -555,7 +573,8 @@ private:
555 return *view; 573 return *view;
556 } 574 }
557 } 575 }
558 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); 576 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
577 MatchTopologyResult::FullMatch);
559 } 578 }
560 // Now we check if the candidate is a mipmap/layer of the overlap 579 // Now we check if the candidate is a mipmap/layer of the overlap
561 std::optional<TView> view = 580 std::optional<TView> view =
@@ -578,13 +597,15 @@ private:
578 pair.first->EmplaceView(params, gpu_addr, candidate_size); 597 pair.first->EmplaceView(params, gpu_addr, candidate_size);
579 if (mirage_view) 598 if (mirage_view)
580 return {pair.first, *mirage_view}; 599 return {pair.first, *mirage_view};
581 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); 600 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
601 MatchTopologyResult::FullMatch);
582 } 602 }
583 return {current_surface, *view}; 603 return {current_surface, *view};
584 } 604 }
585 // The next case is unsafe, so if we r in accurate GPU, just skip it 605 // The next case is unsafe, so if we r in accurate GPU, just skip it
586 if (Settings::values.use_accurate_gpu_emulation) { 606 if (Settings::values.use_accurate_gpu_emulation) {
587 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); 607 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
608 MatchTopologyResult::FullMatch);
588 } 609 }
589 // This is the case the texture is a part of the parent. 610 // This is the case the texture is a part of the parent.
590 if (current_surface->MatchesSubTexture(params, gpu_addr)) { 611 if (current_surface->MatchesSubTexture(params, gpu_addr)) {
@@ -601,7 +622,8 @@ private:
601 } 622 }
602 } 623 }
603 // We failed all the tests, recycle the overlaps into a new texture. 624 // We failed all the tests, recycle the overlaps into a new texture.
604 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); 625 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
626 MatchTopologyResult::FullMatch);
605 } 627 }
606 628
607 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 629 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,